#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "lmedsq.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
Go to the source code of this file.
#define EXTERN |
* (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
Definition at line 40 of file makerow.cpp.
#define MAX_HEIGHT_MODES 12 |
FIX: What's a mode?
Definition at line 123 of file makerow.cpp.
Referenced by compute_row_xheight().
void adjust_row_limits | ( | TO_BLOCK * | block | ) |
Tidy limits.
Change the limits of rows to suit the default fractions(tweeks).
Definition at line 1150 of file makerow.cpp.
References FALSE, TO_ROW::intercept(), TO_ROW::max_y(), TO_ROW::min_y(), TO_ROW::set_limits(), ymax, and ymin.
Referenced by expand_rows().
01152 { 01153 TO_ROW *row; //current row 01154 float size; //size of row 01155 float ymax; //top of row 01156 float ymin; //bottom of row 01157 TO_ROW_IT row_it = block->get_rows (); 01158 01159 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01160 row = row_it.data (); 01161 size = row->max_y () - row->min_y (); 01162 size /= textord_merge_x + textord_merge_asc + textord_merge_desc; 01163 ymax = size * (textord_merge_x + textord_merge_asc); 01164 ymin = -size * textord_merge_desc; 01165 row->set_limits (row->intercept () + ymin, row->intercept () + ymax); 01166 row->merged = FALSE; 01167 } 01168 }
void assign_blobs_to_rows | ( | TO_BLOCK * | block, | |
float * | gradient, | |||
int | pass, | |||
BOOL8 | reject_misses, | |||
BOOL8 | make_new_rows, | |||
BOOL8 | drawing_skew | |||
) |
Find lines.
Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.
Definition at line 2275 of file makerow.cpp.
References TO_ROW::add_blob(), ASSIGN, blob_x_order(), cprintf(), draw2d, f, TO_ROW::initial_min_y(), TO_ROW::max_y(), TO_ROW::min_y(), most_overlapping_row(), move2d, NEW_ROW, NULL, REJECT, to_win, and tprintf().
Referenced by cleanup_rows(), expand_rows(), and make_initial_textrows().
02282 { 02283 OVERLAP_STATE overlap_result; //what to do with it 02284 float ycoord; //current y 02285 float top, bottom; //of blob 02286 float g_length = 1.0f; //from gradient 02287 INT16 row_count; //no of rows 02288 INT16 left_x; //left edge 02289 INT16 last_x; //previous edge 02290 float block_skew; //y delta 02291 float smooth_factor; //for new coords 02292 float near_dist; //dist to nearest row 02293 ICOORD testpt; //testing only 02294 BLOBNBOX *blob; //current blob 02295 TO_ROW *row; //current row 02296 TO_ROW *dest_row; //row to put blob in 02297 //iterators 02298 BLOBNBOX_IT blob_it = &block->blobs; 02299 TO_ROW_IT row_it = block->get_rows (); 02300 02301 #ifdef TEXT_VERBOSE 02302 // gets a 'l', see ccmain/tesseractmain.dox 02303 cprintf("l"); 02304 #endif 02305 ycoord = 02306 (block->block->bounding_box ().bottom () + 02307 block->block->bounding_box ().top ()) / 2.0f; 02308 if (gradient != NULL) 02309 g_length = sqrt (1 + *gradient * *gradient); 02310 #ifndef GRAPHICS_DISABLED 02311 if (drawing_skew) 02312 move2d (to_win, block->block->bounding_box ().left (), ycoord); 02313 #endif 02314 testpt = ICOORD (textord_test_x, textord_test_y); 02315 blob_it.sort (blob_x_order); 02316 smooth_factor = 1.0; 02317 block_skew = 0.0f; 02318 row_count = row_it.length (); //might have rows 02319 if (!blob_it.empty ()) { 02320 left_x = blob_it.data ()->bounding_box ().left (); 02321 } 02322 else { 02323 left_x = block->block->bounding_box ().left (); 02324 } 02325 last_x = left_x; 02326 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 02327 blob = blob_it.data (); 02328 if (gradient != NULL) { 02329 block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom () 02330 + *gradient / g_length * blob->bounding_box ().left (); 02331 } 02332 else if (blob->bounding_box ().left () - last_x > block->line_size / 2 02333 && last_x - left_x > block->line_size * 2 02334 && textord_interpolating_skew) { 02335 // tprintf("Interpolating skew from %g",block_skew); 02336 block_skew *= (float) (blob->bounding_box ().left () - left_x) 02337 / (last_x - left_x); 02338 // tprintf("to %g\n",block_skew); 02339 } 02340 last_x = blob->bounding_box ().left (); 02341 top = blob->bounding_box ().top () - block_skew; 02342 bottom = blob->bounding_box ().bottom () - block_skew; 02343 #ifndef GRAPHICS_DISABLED 02344 if (drawing_skew) 02345 draw2d (to_win, blob->bounding_box ().left (), ycoord + block_skew); 02346 #endif 02347 if (!row_it.empty ()) { 02348 for (row_it.move_to_first (); 02349 !row_it.at_last () && row_it.data ()->min_y () > top; 02350 row_it.forward ()); 02351 row = row_it.data (); 02352 if (row->min_y () <= top && row->max_y () >= bottom) { 02353 //any overlap 02354 dest_row = row; 02355 overlap_result = most_overlapping_row (&row_it, dest_row, 02356 top, bottom, 02357 block->line_size, 02358 blob->bounding_box (). 02359 contains (testpt)); 02360 if (overlap_result == NEW_ROW && !reject_misses) 02361 overlap_result = ASSIGN; 02362 } 02363 else { 02364 overlap_result = NEW_ROW; 02365 if (!make_new_rows) { 02366 near_dist = row_it.data_relative (-1)->min_y () - top; 02367 //below bottom 02368 if (bottom < row->min_y ()) { 02369 if (row->min_y () - bottom <= 02370 (block->line_spacing - 02371 block->line_size) * textord_merge_desc) { 02372 //done it 02373 overlap_result = ASSIGN; 02374 dest_row = row; 02375 } 02376 } 02377 else if (near_dist > 0 02378 && near_dist < bottom - row->max_y ()) { 02379 row_it.backward (); 02380 dest_row = row_it.data (); 02381 if (dest_row->min_y () - bottom <= 02382 (block->line_spacing - 02383 block->line_size) * textord_merge_desc) { 02384 //done it 02385 overlap_result = ASSIGN; 02386 } 02387 } 02388 else { 02389 if (top - row->max_y () <= 02390 (block->line_spacing - 02391 block->line_size) * (textord_merge_x + 02392 textord_merge_asc)) { 02393 //done it 02394 overlap_result = ASSIGN; 02395 dest_row = row; 02396 } 02397 } 02398 } 02399 } 02400 if (overlap_result == ASSIGN) 02401 dest_row->add_blob (blob_it.extract (), top, bottom, 02402 block->line_size); 02403 if (overlap_result == NEW_ROW) { 02404 if (make_new_rows && top - bottom < block->max_blob_size) { 02405 dest_row = 02406 new TO_ROW (blob_it.extract (), top, bottom, 02407 block->line_size); 02408 row_count++; 02409 if (bottom > row_it.data ()->min_y ()) 02410 row_it.add_before_then_move (dest_row); 02411 //insert in right place 02412 else 02413 row_it.add_after_then_move (dest_row); 02414 smooth_factor = 02415 1.0 / (row_count * textord_skew_lag + 02416 textord_skewsmooth_offset); 02417 } 02418 else 02419 overlap_result = REJECT; 02420 } 02421 } 02422 else if (make_new_rows && top - bottom < block->max_blob_size) { 02423 overlap_result = NEW_ROW; 02424 dest_row = 02425 new TO_ROW (blob_it.extract (), top, bottom, block->line_size); 02426 row_count++; 02427 row_it.add_after_then_move (dest_row); 02428 smooth_factor = 1.0 / (row_count * textord_skew_lag + 1); 02429 } 02430 else 02431 overlap_result = REJECT; 02432 if (blob->bounding_box ().contains (testpt)) { 02433 if (overlap_result != REJECT) { 02434 tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n", 02435 dest_row->min_y (), dest_row->max_y (), pass); 02436 } 02437 else { 02438 tprintf ("Test blob assigned to no row on pass %d\n", pass); 02439 } 02440 } 02441 if (overlap_result != REJECT) { 02442 while (!row_it.at_first () 02443 && row_it.data ()->min_y () > 02444 row_it.data_relative (-1)->min_y ()) { 02445 row = row_it.extract (); 02446 row_it.backward (); 02447 row_it.add_before_then_move (row); 02448 } 02449 while (!row_it.at_last () 02450 && row_it.data ()->min_y () < 02451 row_it.data_relative (1)->min_y ()) { 02452 row = row_it.extract (); 02453 row_it.forward (); 02454 //keep rows in order 02455 row_it.add_after_then_move (row); 02456 } 02457 block_skew = (1 - smooth_factor) * block_skew 02458 + smooth_factor * (blob->bounding_box ().bottom () - 02459 dest_row->initial_min_y ()); 02460 } 02461 } 02462 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 02463 if (row_it.data ()->blob_list ()->empty ()) 02464 delete row_it.extract (); //discard empty rows 02465 } 02466 }
int blob_x_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
Sort function.
Sort function to sort blobs in x from page's left-hand side.
Definition at line 2579 of file makerow.cpp.
Referenced by assign_blobs_to_rows(), cleanup_rows(), and most_overlapping_row().
02581 { 02582 //converted ptr 02583 BLOBNBOX *blob1 = *(BLOBNBOX **) item1; 02584 //converted ptr 02585 BLOBNBOX *blob2 = *(BLOBNBOX **) item2; 02586 02587 if (blob1->bounding_box ().left () < blob2->bounding_box ().left ()) 02588 return -1; 02589 else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ()) 02590 return 1; 02591 else 02592 return 0; 02593 }
void cleanup_rows | ( | ICOORD | page_tr, | |
TO_BLOCK * | block, | |||
float | gradient, | |||
FCOORD | rotation, | |||
INT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Remove overlapping rows and fit all the blobs to what's left.
Definition at line 507 of file makerow.cpp.
References assign_blobs_to_rows(), blob_x_order(), compute_block_xheight(), CORAL, create_to_win(), delete_non_dropout_rows(), draw_meanlines(), expand_rows(), FALSE, fit_parallel_rows(), MAGENTA, make_spline_rows(), NO_WINDOW, plot_blob_list(), pre_associate_blobs(), restore_underlined_blobs(), rotation, separate_underlines(), to_win, tprintf(), TRUE, vigorous_noise_removal(), WHITE, and YELLOW.
Referenced by make_rows().
00514 { 00515 //iterators 00516 BLOBNBOX_IT blob_it = &block->blobs; 00517 TO_ROW_IT row_it = block->get_rows (); 00518 00519 #ifndef GRAPHICS_DISABLED 00520 if (textord_show_parallel_rows && testing_on) { 00521 if (to_win == NO_WINDOW) 00522 create_to_win(page_tr); 00523 } 00524 #endif 00525 //get row coords 00526 fit_parallel_rows(block, 00527 gradient, 00528 rotation, 00529 block_edge, 00530 textord_show_parallel_rows &&testing_on); 00531 delete_non_dropout_rows(block, 00532 gradient, 00533 rotation, 00534 block_edge, 00535 textord_show_parallel_rows &&testing_on); 00536 expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on); 00537 blob_it.set_to_list (&block->blobs); 00538 row_it.set_to_list (block->get_rows ()); 00539 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 00540 blob_it.add_list_after (row_it.data ()->blob_list ()); 00541 //give blobs back 00542 assign_blobs_to_rows (block, &gradient, 1, TRUE, TRUE, FALSE); 00543 //now new rows must be genuine 00544 blob_it.set_to_list (&block->blobs); 00545 blob_it.add_list_after (&block->large_blobs); 00546 assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE); 00547 //safe to use big ones now 00548 blob_it.set_to_list (&block->blobs); 00549 //throw all blobs in 00550 blob_it.add_list_after (&block->noise_blobs); 00551 blob_it.add_list_after (&block->small_blobs); 00552 assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE); 00553 //no rows for noise 00554 row_it.set_to_list (block->get_rows ()); 00555 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 00556 row_it.data ()->blob_list ()->sort (blob_x_order); 00557 fit_parallel_rows(block, gradient, rotation, block_edge, FALSE); 00558 // if () new in v1.03 00559 if (textord_heavy_nr) { 00560 vigorous_noise_removal(block); 00561 } 00562 separate_underlines(block, gradient, rotation, testing_on); 00563 pre_associate_blobs(page_tr, block, rotation, testing_on); 00564 00565 #ifndef GRAPHICS_DISABLED 00566 if (textord_show_final_rows && testing_on) { 00567 if (to_win == NO_WINDOW) 00568 create_to_win(page_tr); 00569 } 00570 #endif 00571 00572 fit_parallel_rows(block, gradient, rotation, block_edge, FALSE); 00573 // textord_show_final_rows && testing_on); 00574 make_spline_rows(block, 00575 gradient, 00576 rotation, 00577 block_edge, 00578 textord_show_final_rows &&testing_on); 00579 if (!textord_old_xheight || !textord_old_baselines) 00580 compute_block_xheight(block, gradient); 00581 if (textord_restore_underlines) 00582 //fix underlines 00583 restore_underlined_blobs(block); 00584 #ifndef GRAPHICS_DISABLED 00585 if (textord_show_final_rows && testing_on) { 00586 plot_blob_list (to_win, &block->blobs, MAGENTA, WHITE); 00587 //show discarded blobs 00588 plot_blob_list (to_win, &block->underlines, YELLOW, CORAL); 00589 } 00590 if (textord_show_final_rows && testing_on && block->blobs.length () > 0) 00591 tprintf ("%d blobs discarded as noise\n", block->blobs.length ()); 00592 if (textord_show_final_rows && testing_on) { 00593 draw_meanlines(block, gradient, block_edge, WHITE, rotation); 00594 } 00595 #endif 00596 }
void compute_block_xheight | ( | TO_BLOCK * | block, | |
float | gradient | |||
) |
Find lines xheight.
Compute the xheight of the individual rows, then correlate them and interpret ascenderless lines, correcting xheights.
Definition at line 1267 of file makerow.cpp.
References STATS::add(), TO_ROW::ascrise, compute_row_xheight(), correct_row_xheight(), TO_ROW::descdrop, STATS::ile(), median_block_xheight(), STATS::set_range(), and TO_ROW::xheight.
Referenced by cleanup_rows().
01270 { 01271 TO_ROW *row; //current row 01272 int xh_count, desc_count; //no of samples 01273 float block_median; //median blob size 01274 int asc_count, cap_count; 01275 INT32 min_size, max_size; //limits on xheight 01276 INT32 evidence; //no of samples on row 01277 float xh_sum, desc_sum; //for averages 01278 float asc_sum, cap_sum; 01279 TO_ROW_IT row_it = block->get_rows (); 01280 STATS row_heights; //block evidence 01281 01282 if (row_it.empty ()) 01283 return; //no rows 01284 block_median = median_block_xheight (block, gradient); 01285 block_median *= 2; 01286 if (block_median < block->line_size) 01287 block_median = block->line_size; 01288 // tprintf("Block median=%g, linesize=%g\n", 01289 // block_median,block->line_size); 01290 max_size = (INT32) ceil (block_median); 01291 min_size = (INT32) floor (block_median * textord_minxh); 01292 row_heights.set_range (min_size, max_size + 1); 01293 xh_count = desc_count = asc_count = cap_count = 0; 01294 xh_sum = desc_sum = asc_sum = cap_sum = 0.0f; 01295 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01296 row = row_it.data (); 01297 evidence = compute_row_xheight (row, min_size, max_size, gradient); 01298 if (row->xheight > 0 && row->ascrise > 0) { 01299 row_heights.add ((INT32) row->xheight, evidence); 01300 xh_count += evidence; 01301 asc_sum += row->ascrise; 01302 asc_count++; 01303 } 01304 else if (row->xheight > 0) { 01305 cap_sum += row->xheight; //assume just caps 01306 cap_count++; 01307 } 01308 if (row->descdrop != 0) { 01309 desc_sum += row->descdrop; 01310 desc_count++; 01311 } 01312 } 01313 if (xh_count > 0) { 01314 //median 01315 xh_sum = row_heights.ile (0.5); 01316 asc_sum /= asc_count; 01317 } 01318 else if (cap_count > 0) { 01319 cap_sum /= cap_count; //must assume caps 01320 xh_sum = 01321 cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc); 01322 asc_sum = 01323 cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc); 01324 } 01325 else { 01326 //default sizes 01327 xh_sum = block_median * textord_merge_x; 01328 asc_sum = block_median * textord_merge_asc; 01329 } 01330 if (desc_count > 0) { 01331 desc_sum /= desc_count; 01332 } 01333 else { 01334 desc_sum = xh_sum * textord_merge_desc / textord_merge_x; 01335 } 01336 // tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n", 01337 // xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count, 01338 // cap_sum,cap_count); 01339 if (xh_sum < textord_min_xheight) 01340 xh_sum = (float) textord_min_xheight; 01341 block->xheight = xh_sum; 01342 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01343 correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum); 01344 } 01345 }
Project blobs.
Compute the distance from each coordinate to the nearest dropout.
Definition at line 963 of file makerow.cpp.
Referenced by delete_non_dropout_rows().
00967 { 00968 INT32 line_index; //of thresholds line 00969 INT32 distance; //from prev dropout 00970 INT32 next_dist; //to next dropout 00971 INT32 back_index; //for back filling 00972 INT32 prev_threshold; //before overwrite 00973 00974 distance = -line_count; 00975 line_index = 0; 00976 do { 00977 do { 00978 distance--; 00979 prev_threshold = thresholds[line_index]; 00980 //distance from prev 00981 thresholds[line_index] = distance; 00982 line_index++; 00983 } 00984 while (line_index < line_count 00985 && (occupation[line_index] < thresholds[line_index] 00986 || occupation[line_index - 1] >= prev_threshold)); 00987 if (line_index < line_count) { 00988 back_index = line_index - 1; 00989 next_dist = 1; 00990 while (next_dist < -distance && back_index >= 0) { 00991 thresholds[back_index] = next_dist; 00992 back_index--; 00993 next_dist++; 00994 distance++; 00995 } 00996 distance = 1; 00997 } 00998 } 00999 while (line_index < line_count); 01000 }
INT32 compute_height_modes | ( | STATS * | heights, | |
INT32 | min_height, | |||
INT32 | max_height, | |||
INT32 * | modes, | |||
INT32 | maxmodes | |||
) |
Find lines.
Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.
Definition at line 1557 of file makerow.cpp.
References MAX_INT32, and STATS::pile_count().
Referenced by compute_row_xheight().
01563 { 01564 INT32 pile_count; //no in source pile 01565 INT32 src_count; //no of source entries 01566 INT32 src_index; //current entry 01567 INT32 least_count; //height of smalllest 01568 INT32 least_index; //index of least 01569 INT32 dest_count; //index in modes 01570 01571 src_count = max_height + 1 - min_height; 01572 dest_count = 0; 01573 least_count = MAX_INT32; 01574 least_index = -1; 01575 for (src_index = 0; src_index < src_count; src_index++) { 01576 pile_count = heights->pile_count (min_height + src_index); 01577 if (pile_count > 0) { 01578 if (dest_count < maxmodes) { 01579 if (pile_count < least_count) { 01580 //find smallest in array 01581 least_count = pile_count; 01582 least_index = dest_count; 01583 } 01584 modes[dest_count++] = min_height + src_index; 01585 } 01586 else if (pile_count >= least_count) { 01587 while (least_index < maxmodes - 1) { 01588 modes[least_index] = modes[least_index + 1]; 01589 //shuffle up 01590 least_index++; 01591 } 01592 //new one on end 01593 modes[maxmodes - 1] = min_height + src_index; 01594 if (pile_count == least_count) { 01595 //new smallest 01596 least_index = maxmodes - 1; 01597 } 01598 else { 01599 least_count = heights->pile_count (modes[0]); 01600 least_index = 0; 01601 for (dest_count = 1; dest_count < maxmodes; dest_count++) { 01602 pile_count = heights->pile_count (modes[dest_count]); 01603 if (pile_count < least_count) { 01604 //find smallest 01605 least_count = pile_count; 01606 least_index = dest_count; 01607 } 01608 } 01609 } 01610 } 01611 } 01612 } 01613 return dest_count; 01614 }
void compute_line_occupation | ( | TO_BLOCK * | block, | |
float | gradient, | |||
INT32 | min_y, | |||
INT32 | max_y, | |||
INT32 * | occupation, | |||
INT32 * | deltas | |||
) |
Project blobs.
Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.
Definition at line 813 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::bottom(), INT32FORMAT, BOX::left(), BOX::right(), BOX::rotate(), rotation, and BOX::top().
Referenced by delete_non_dropout_rows().
00820 { 00821 INT32 line_count; //maxy-miny+1 00822 INT32 line_index; //of scan line 00823 float top, bottom; //coords of blob 00824 INT32 width; //of blob 00825 INT32 idx; //for -O3 bug on some platforms 00826 TO_ROW *row; //current row 00827 TO_ROW_IT row_it = block->get_rows (); 00828 BLOBNBOX *blob; //current blob 00829 BLOBNBOX_IT blob_it; //iterator 00830 float length; //of skew vector 00831 BOX blob_box; //bounding box 00832 FCOORD rotation; //inverse of skew 00833 00834 line_count = max_y - min_y + 1; 00835 length = sqrt (gradient * gradient + 1); 00836 rotation = FCOORD (1 / length, -gradient / length); 00837 for (line_index = 0; line_index < line_count; line_index++) 00838 deltas[line_index] = 0; 00839 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00840 row = row_it.data (); 00841 blob_it.set_to_list (row->blob_list ()); 00842 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 00843 blob_it.forward ()) { 00844 blob = blob_it.data (); 00845 blob_box = blob->bounding_box (); 00846 blob_box.rotate (rotation);//de-skew it 00847 top = blob_box.top (); 00848 bottom = blob_box.bottom (); 00849 width = 00850 (INT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ())); 00851 if ((INT32) floor (bottom) < min_y 00852 || (INT32) floor (bottom) - min_y >= line_count) 00853 fprintf (stderr, 00854 "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT "," 00855 INT32FORMAT ")\n", (INT32) floor (bottom), min_y, max_y); 00856 //count transitions 00857 //for -O3 bug on some platforms 00858 //deltas[(INT32) floor (bottom) - min_y] += width; 00859 idx = (INT32) floor (bottom) - min_y; 00860 deltas[idx] += width; 00861 if ((INT32) floor (top) < min_y 00862 || (INT32) floor (top) - min_y >= line_count) 00863 fprintf (stderr, 00864 "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT "," 00865 INT32FORMAT ")\n", (INT32) floor (top), min_y, max_y); 00866 //deltas[(INT32) floor (top) - min_y] -= width; 00867 idx = (INT32) floor (top) - min_y; 00868 deltas[idx] -= width; 00869 } 00870 } 00871 occupation[0] = deltas[0]; 00872 for (line_index = 1; line_index < line_count; line_index++) 00873 occupation[line_index] = occupation[line_index - 1] + deltas[line_index]; 00874 }
void compute_occupation_threshold | ( | INT32 | low_window, | |
INT32 | high_window, | |||
INT32 | line_count, | |||
INT32 * | occupation, | |||
INT32 * | thresholds | |||
) |
Project blobs.
Compute thresholds for textline or not for the occupation array.
Definition at line 882 of file makerow.cpp.
Referenced by delete_non_dropout_rows().
00888 { 00889 INT32 line_index; //of thresholds line 00890 INT32 low_index; //in occupation 00891 INT32 high_index; //in occupation 00892 INT32 sum; //current average 00893 INT32 divisor; //to get thresholds 00894 INT32 min_index; //of min occ 00895 INT32 min_occ; //min in locality 00896 INT32 test_index; //for finding min 00897 00898 divisor = 00899 (INT32) ceil ((low_window + high_window) / textord_occupancy_threshold); 00900 if (low_window + high_window < line_count) { 00901 for (sum = 0, high_index = 0; high_index < low_window; high_index++) 00902 sum += occupation[high_index]; 00903 for (low_index = 0; low_index < high_window; low_index++, high_index++) 00904 sum += occupation[high_index]; 00905 min_occ = occupation[0]; 00906 min_index = 0; 00907 for (test_index = 1; test_index < high_index; test_index++) { 00908 if (occupation[test_index] <= min_occ) { 00909 min_occ = occupation[test_index]; 00910 min_index = test_index; //find min in region 00911 } 00912 } 00913 for (line_index = 0; line_index < low_window; line_index++) 00914 thresholds[line_index] = (sum - min_occ) / divisor + min_occ; 00915 //same out to end 00916 for (low_index = 0; high_index < line_count; low_index++, high_index++) { 00917 sum -= occupation[low_index]; 00918 sum += occupation[high_index]; 00919 if (occupation[high_index] <= min_occ) { 00920 //find min in region 00921 min_occ = occupation[high_index]; 00922 min_index = high_index; 00923 } 00924 //lost min from region 00925 if (min_index <= low_index) { 00926 min_occ = occupation[low_index + 1]; 00927 min_index = low_index + 1; 00928 for (test_index = low_index + 2; test_index <= high_index; 00929 test_index++) { 00930 if (occupation[test_index] <= min_occ) { 00931 min_occ = occupation[test_index]; 00932 //find min in region 00933 min_index = test_index; 00934 } 00935 } 00936 } 00937 thresholds[line_index++] = (sum - min_occ) / divisor + min_occ; 00938 } 00939 } 00940 else { 00941 min_occ = occupation[0]; 00942 min_index = 0; 00943 for (sum = 0, low_index = 0; low_index < line_count; low_index++) { 00944 if (occupation[low_index] < min_occ) { 00945 min_occ = occupation[low_index]; 00946 min_index = low_index; 00947 } 00948 sum += occupation[low_index]; 00949 } 00950 line_index = 0; 00951 } 00952 for (; line_index < line_count; line_index++) 00953 thresholds[line_index] = (sum - min_occ) / divisor + min_occ; 00954 //same out to end 00955 }
void compute_page_skew | ( | TO_BLOCK_LIST * | blocks, | |
float & | page_m, | |||
float & | page_err | |||
) |
Get average gradient.
Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.
Definition at line 251 of file makerow.cpp.
References ABORT, alloc_mem(), blob_count, TO_ROW::blob_list(), choose_nth_item(), cprintf(), ERRCODE::error(), free_mem(), TO_ROW::line_error(), TO_ROW::line_m(), MEMORY_OUT, and NULL.
Referenced by make_rows().
00255 { 00256 INT32 row_count; //total rows 00257 INT32 blob_count; //total_blobs 00258 INT32 row_err; //integer error 00259 float *gradients; //of rows 00260 float *errors; //of rows 00261 INT32 row_index; //of total 00262 TO_ROW *row; //current row 00263 TO_BLOCK_IT block_it = blocks; //iterator 00264 TO_ROW_IT row_it; 00265 00266 row_count = 0; 00267 blob_count = 0; 00268 00269 #ifdef TEXT_VERBOSE 00270 // gets a 'q', see ccmain/tesseractmain.dox 00271 cprintf("q"); 00272 #endif 00273 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00274 block_it.forward ()) { 00275 row_count += block_it.data ()->get_rows ()->length (); 00276 //count up rows 00277 row_it.set_to_list (block_it.data ()->get_rows ()); 00278 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 00279 blob_count += row_it.data ()->blob_list ()->length (); 00280 } 00281 if (row_count == 0) { 00282 page_m = 0.0f; 00283 page_err = 0.0f; 00284 return; 00285 } 00286 gradients = (float *) alloc_mem (blob_count * sizeof (float)); 00287 //get mem 00288 errors = (float *) alloc_mem (blob_count * sizeof (float)); 00289 if (gradients == NULL || errors == NULL) 00290 MEMORY_OUT.error ("compute_page_skew", ABORT, NULL); 00291 00292 row_index = 0; 00293 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00294 block_it.forward ()) { 00295 row_it.set_to_list (block_it.data ()->get_rows ()); 00296 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00297 row = row_it.data (); 00298 blob_count = row->blob_list ()->length (); 00299 row_err = (INT32) ceil (row->line_error ()); 00300 if (row_err <= 0) 00301 row_err = 1; 00302 if (textord_biased_skewcalc) { 00303 blob_count /= row_err; 00304 for (blob_count /= row_err; blob_count > 0; blob_count--) { 00305 gradients[row_index] = row->line_m (); 00306 errors[row_index] = row->line_error (); 00307 row_index++; 00308 } 00309 } 00310 else if (blob_count >= textord_min_blobs_in_row) { 00311 //get gradient 00312 gradients[row_index] = row->line_m (); 00313 errors[row_index] = row->line_error (); 00314 row_index++; 00315 } 00316 } 00317 } 00318 if (row_index == 0) { 00319 //desperate 00320 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00321 block_it.forward ()) { 00322 row_it.set_to_list (block_it.data ()->get_rows ()); 00323 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); 00324 row_it.forward ()) { 00325 row = row_it.data (); 00326 gradients[row_index] = row->line_m (); 00327 errors[row_index] = row->line_error (); 00328 row_index++; 00329 } 00330 } 00331 } 00332 row_count = row_index; 00333 row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile), 00334 gradients, row_count); 00335 page_m = gradients[row_index]; 00336 row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile), 00337 errors, row_count); 00338 page_err = errors[row_index]; 00339 free_mem(gradients); 00340 free_mem(errors); 00341 }
Find row descender drop.
Estimate the descdrop of this row.
Definition at line 1511 of file makerow.cpp.
References STATS::add(), blob_count, TO_ROW::blob_list(), f, STATS::mode(), TO_ROW::parallel_c(), STATS::pile_count(), and TO_ROW::xheight.
Referenced by compute_row_xheight().
01514 { 01515 INT32 min_height = (INT32) floor (row->xheight * textord_descx_ratio_min); 01516 INT32 max_height = (INT32) floor (row->xheight * textord_descx_ratio_max); 01517 float xcentre; //centre of blob 01518 float height; //height of blob 01519 BLOBNBOX_IT blob_it = row->blob_list (); 01520 BLOBNBOX *blob; //current blob 01521 INT32 blob_count; //blobs in block 01522 INT32 blob_index; //current blob 01523 STATS heights (min_height, max_height + 1); 01524 01525 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 01526 blob = blob_it.data (); 01527 if (!blob->joined_to_prev ()) { 01528 xcentre = 01529 (blob->bounding_box ().left () + 01530 blob->bounding_box ().right ()) / 2.0f; 01531 height = 01532 gradient * xcentre + row->parallel_c () - 01533 blob->bounding_box ().bottom (); 01534 if (height >= min_height && height <= max_height) 01535 heights.add ((INT32) floor (height + 0.5), 1); 01536 } 01537 } 01538 blob_index = heights.mode (); //find mode 01539 //get count of mode 01540 blob_count = heights.pile_count (blob_index); 01541 return blob_count > 0 ? -blob_index : 0; 01542 }
Find lines stats.
Compute the linespacing and offset.
Definition at line 1176 of file makerow.cpp.
References ABORT, alloc_mem(), choose_nth_item(), ERRCODE::error(), free_mem(), TO_ROW::intercept(), MEMORY_OUT, NULL, TO_ROW::parallel_c(), row_spacing_order(), TO_ROW::spacing, and tprintf().
Referenced by expand_rows().
01179 { 01180 INT32 row_index; //of median 01181 TO_ROW *row; //current row 01182 TO_ROW *prev_row; //previous row 01183 float iqr; //inter quartile range 01184 TO_ROW_IT row_it = block->get_rows (); 01185 //number of rows 01186 INT16 rowcount = row_it.length (); 01187 TO_ROW **rows; //for choose nth 01188 01189 rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); 01190 if (rows == NULL) 01191 MEMORY_OUT.error ("compute_row_stats", ABORT, NULL); 01192 rowcount = 0; 01193 prev_row = NULL; 01194 row_it.move_to_last (); //start at bottom 01195 do { 01196 row = row_it.data (); 01197 if (prev_row != NULL) { 01198 rows[rowcount++] = prev_row; 01199 prev_row->spacing = row->intercept () - prev_row->intercept (); 01200 if (testing_on) 01201 tprintf ("Row at %g yields spacing of %g\n", 01202 row->intercept (), prev_row->spacing); 01203 } 01204 prev_row = row; 01205 row_it.backward (); 01206 } 01207 while (!row_it.at_last ()); 01208 block->key_row = prev_row; 01209 block->baseline_offset = 01210 fmod (prev_row->parallel_c (), block->line_spacing); 01211 if (testing_on) 01212 tprintf ("Blob based spacing=(%g,%g), offset=%g", 01213 block->line_size, block->line_spacing, block->baseline_offset); 01214 if (rowcount > 0) { 01215 row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount, 01216 sizeof (TO_ROW *), row_spacing_order); 01217 iqr = rows[row_index]->spacing; 01218 row_index = choose_nth_item (rowcount / 4, rows, rowcount, 01219 sizeof (TO_ROW *), row_spacing_order); 01220 iqr -= rows[row_index]->spacing; 01221 row_index = choose_nth_item (rowcount / 2, rows, rowcount, 01222 sizeof (TO_ROW *), row_spacing_order); 01223 block->key_row = rows[row_index]; 01224 if (testing_on) 01225 tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr); 01226 if (rowcount > 2 01227 && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { 01228 if (!textord_new_initial_xheight) { 01229 if (rows[row_index]->spacing < block->line_spacing 01230 && rows[row_index]->spacing > block->line_size) 01231 //within range 01232 block->line_size = rows[row_index]->spacing; 01233 //spacing=size 01234 else if (rows[row_index]->spacing > block->line_spacing) 01235 block->line_size = block->line_spacing; 01236 //too big so use max 01237 } 01238 else { 01239 if (rows[row_index]->spacing < block->line_spacing) 01240 block->line_size = rows[row_index]->spacing; 01241 else 01242 block->line_size = block->line_spacing; 01243 //too big so use max 01244 } 01245 if (block->line_size < textord_min_xheight) 01246 block->line_size = (float) textord_min_xheight; 01247 block->line_spacing = rows[row_index]->spacing; 01248 block->max_blob_size = 01249 block->line_spacing * textord_excess_blobsize; 01250 } 01251 block->baseline_offset = fmod (rows[row_index]->intercept (), 01252 block->line_spacing); 01253 } 01254 if (testing_on) 01255 tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n", 01256 block->line_size, block->line_spacing, block->baseline_offset); 01257 free_mem(rows); 01258 }
Find row xheight.
Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time.
Definition at line 1408 of file makerow.cpp.
References STATS::add(), TO_ROW::ascrise, TO_ROW::baseline, blob_count, TO_ROW::blob_list(), compute_height_modes(), compute_row_descdrop(), TO_ROW::descdrop, f, FALSE, STATS::get_total(), MAX_HEIGHT_MODES, MAX_INT32, STATS::mode(), TO_ROW::parallel_c(), STATS::pile_count(), tprintf(), TRUE, TO_ROW::xheight, and QSPLINE::y().
Referenced by compute_block_xheight().
01413 { 01414 BOOL8 in_best_pile; //control of mode size 01415 INT32 prev_size; //previous size 01416 float xcentre; //centre of blob 01417 float height; //height of blob 01418 BLOBNBOX_IT blob_it = row->blob_list (); 01419 BLOBNBOX *blob; //current blob 01420 INT32 blob_count; //blobs in block 01421 INT32 x; //xheight index 01422 INT32 asc; //ascender index 01423 INT32 blob_index; //current blob 01424 INT32 mode_count; //no of modes 01425 INT32 best_count; //count of best x so far 01426 float ratio; //size ratio 01427 INT32 modes[MAX_HEIGHT_MODES]; //biggest piles 01428 STATS heights (min_height, max_height + 1); 01429 01430 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 01431 blob = blob_it.data (); 01432 if (!blob->joined_to_prev ()) { 01433 xcentre = 01434 (blob->bounding_box ().left () + 01435 blob->bounding_box ().right ()) / 2.0f; 01436 height = blob->bounding_box ().top (); 01437 if (textord_fix_xheight_bug) 01438 height -= row->baseline.y (xcentre); 01439 else 01440 height -= gradient * xcentre + row->parallel_c (); 01441 if (height >= min_height && height <= max_height 01442 && (!textord_xheight_tweak || height > textord_min_xheight)) 01443 heights.add ((INT32) floor (height + 0.5), 1); 01444 } 01445 } 01446 blob_index = heights.mode (); //find mode 01447 //get count of mode 01448 blob_count = heights.pile_count (blob_index); 01449 if (textord_debug_xheights) 01450 tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n", 01451 min_height, max_height, blob_index, blob_count, 01452 heights.get_total (), row->blob_list ()->length ()); 01453 row->ascrise = 0.0f; 01454 row->xheight = 0.0f; 01455 row->descdrop = 0.0f; //undefined; 01456 in_best_pile = FALSE; 01457 prev_size = -MAX_INT32; 01458 best_count = 0; 01459 if (blob_count > 0) { 01460 //get biggest ones 01461 mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES); 01462 for (x = 0; x < mode_count - 1; x++) { 01463 if (modes[x] != prev_size + 1) 01464 in_best_pile = FALSE; //had empty height 01465 if (heights.pile_count (modes[x]) 01466 >= blob_count * textord_xheight_mode_fraction 01467 && (in_best_pile || heights.pile_count (modes[x]) > best_count)) { 01468 for (asc = x + 1; asc < mode_count; asc++) { 01469 ratio = (float) modes[asc] / modes[x]; 01470 if (textord_ascx_ratio_min < ratio 01471 && ratio < textord_ascx_ratio_max 01472 && heights.pile_count (modes[asc]) 01473 >= blob_count * textord_ascheight_mode_fraction) { 01474 if (heights.pile_count (modes[x]) > best_count) { 01475 in_best_pile = TRUE; 01476 best_count = heights.pile_count (modes[x]); 01477 } 01478 // tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", 01479 // modes[x],modes[asc]-modes[x], 01480 // heights.pile_count(modes[x]), 01481 // ratio); 01482 prev_size = modes[x]; 01483 row->xheight = (float) modes[x]; 01484 row->ascrise = (float) (modes[asc] - modes[x]); 01485 } 01486 } 01487 } 01488 } 01489 if (row->xheight == 0) { 01490 //single mode 01491 row->xheight = (float) blob_index; 01492 row->ascrise = 0.0f; 01493 if (textord_debug_xheights) 01494 tprintf ("Single mode xheight set to %g\n", row->xheight); 01495 } 01496 else if (textord_debug_xheights) 01497 tprintf ("Multi-mode xheight set to %g, asc=%g\n", 01498 row->xheight, row->ascrise); 01499 row->descdrop = (float) compute_row_descdrop (row, gradient); 01500 //find descenders 01501 } 01502 return best_count; 01503 }
void correct_row_xheight | ( | TO_ROW * | row, | |
float | xheight, | |||
float | ascrise, | |||
float | descdrop | |||
) |
Fix bad row xht values.
Adjust the xheight etc of this row if not within reasonable limits of the average for the block.
Definition at line 1623 of file makerow.cpp.
References TO_ROW::all_caps, TO_ROW::ascrise, TO_ROW::descdrop, FALSE, TRUE, and TO_ROW::xheight.
Referenced by compute_block_xheight().
01627 { 01628 if (textord_row_xheights) { 01629 if (row->xheight <= 0) 01630 row->xheight = xheight; 01631 if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) { 01632 if (row->xheight >= xheight * (1 - textord_xheight_error_margin) 01633 && row->xheight <= xheight * (1 + textord_xheight_error_margin)) { 01634 row->all_caps = FALSE; 01635 row->ascrise = ascrise; 01636 } 01637 else if (row->xheight >= 01638 (xheight + ascrise) * (1 - textord_xheight_error_margin) 01639 && row->xheight <= 01640 (xheight + ascrise) * (1 + textord_xheight_error_margin)) { 01641 row->all_caps = TRUE; 01642 //it was caps 01643 row->ascrise = row->xheight - xheight; 01644 row->xheight = xheight; 01645 } 01646 else { 01647 row->all_caps = TRUE; 01648 row->ascrise = row->xheight * ascrise / (xheight + ascrise); 01649 row->xheight -= row->ascrise; 01650 } 01651 } 01652 else 01653 row->all_caps = FALSE; 01654 row->ascrise = ascrise; 01655 if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1)) 01656 row->descdrop = descdrop; 01657 } 01658 else { 01659 if (row->xheight < xheight * (1 - textord_xheight_error_margin) 01660 || row->xheight > xheight * (1 + textord_xheight_error_margin)) 01661 row->xheight = xheight; //set to average 01662 row->all_caps = row->ascrise <= 0; 01663 if (row->ascrise < ascrise * (1 - textord_xheight_error_margin) 01664 || row->ascrise > ascrise * (1 + textord_xheight_error_margin)) 01665 row->ascrise = ascrise; //set to average 01666 if (row->descdrop < descdrop * (1 - textord_xheight_error_margin) 01667 || row->descdrop > descdrop * (1 + textord_xheight_error_margin)) 01668 row->descdrop = descdrop; //set to average 01669 } 01670 }
void delete_non_dropout_rows | ( | TO_BLOCK * | block, | |
float | gradient, | |||
FCOORD | rotation, | |||
INT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Compute the linespacing and offset.
Definition at line 604 of file makerow.cpp.
References ABORT, alloc_mem(), BOX::bottom(), compute_dropout_distances(), compute_line_occupation(), compute_occupation_threshold(), cprintf(), deskew_block_coords(), draw_occupation(), ERRCODE::error(), find_best_dropout_row(), free_mem(), TO_ROW::intercept(), MEMORY_OUT, NULL, plot_parallel_row(), rotation, BOX::top(), and WHITE.
Referenced by cleanup_rows().
00610 { 00611 BOX block_box; //deskewed block 00612 INT32 *deltas; //change in occupation 00613 INT32 *occupation; //of pixel coords 00614 INT32 max_y; //in block 00615 INT32 min_y; 00616 INT32 line_index; //of scan line 00617 INT32 line_count; //no of scan lines 00618 INT32 distance; //to drop-out 00619 INT32 xleft; //of block 00620 INT32 ybottom; //of block 00621 TO_ROW *row; //current row 00622 TO_ROW_IT row_it = block->get_rows (); 00623 BLOBNBOX_IT blob_it = &block->blobs; 00624 00625 #ifdef TEXT_VERBOSE 00626 // gets a 'n', see ccmain/tesseractmain.dox 00627 cprintf("n"); 00628 #endif 00629 if (row_it.length () == 0) 00630 return; //empty block 00631 block_box = deskew_block_coords (block, gradient); 00632 xleft = block->block->bounding_box ().left (); 00633 ybottom = block->block->bounding_box ().bottom (); 00634 min_y = block_box.bottom () - 1; 00635 max_y = block_box.top () + 1; 00636 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00637 line_index = (INT32) floor (row_it.data ()->intercept ()); 00638 if (line_index <= min_y) 00639 min_y = line_index - 1; 00640 if (line_index >= max_y) 00641 max_y = line_index + 1; 00642 } 00643 line_count = max_y - min_y + 1; 00644 if (line_count <= 0) 00645 return; //empty block 00646 deltas = (INT32 *) alloc_mem (line_count * sizeof (INT32)); 00647 occupation = (INT32 *) alloc_mem (line_count * sizeof (INT32)); 00648 if (deltas == NULL || occupation == NULL) 00649 MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL); 00650 00651 compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas); 00652 compute_occupation_threshold ((INT32) 00653 ceil (block->line_spacing * 00654 (textord_merge_desc + 00655 textord_merge_asc)), 00656 (INT32) ceil (block->line_spacing * 00657 (textord_merge_x + 00658 textord_merge_asc)), 00659 max_y - min_y + 1, occupation, deltas); 00660 #ifndef GRAPHICS_DISABLED 00661 if (testing_on) { 00662 draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas); 00663 } 00664 #endif 00665 compute_dropout_distances(occupation, deltas, line_count); 00666 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00667 row = row_it.data (); 00668 line_index = (INT32) floor (row->intercept ()); 00669 distance = deltas[line_index - min_y]; 00670 if (find_best_dropout_row (row, distance, block->line_spacing / 2, 00671 line_index, &row_it, testing_on)) { 00672 #ifndef GRAPHICS_DISABLED 00673 if (testing_on) 00674 plot_parallel_row(row, gradient, block_edge, WHITE, rotation); 00675 #endif 00676 blob_it.add_list_after (row_it.data ()->blob_list ()); 00677 delete row_it.extract (); //too far away 00678 } 00679 } 00680 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00681 blob_it.add_list_after (row_it.data ()->blob_list ()); 00682 } 00683 00684 free_mem(deltas); 00685 free_mem(occupation); 00686 }
Block box.
Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.
Definition at line 777 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::rotate(), and rotation.
Referenced by delete_non_dropout_rows().
00780 { 00781 BOX result; //block bounds 00782 BOX blob_box; //of block 00783 FCOORD rotation; //deskew vector 00784 float length; //of gradient vector 00785 TO_ROW_IT row_it = block->get_rows (); 00786 TO_ROW *row; //current row 00787 BLOBNBOX *blob; //current blob 00788 BLOBNBOX_IT blob_it; //iterator 00789 00790 length = sqrt (gradient * gradient + 1); 00791 rotation = FCOORD (1 / length, -gradient / length); 00792 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00793 row = row_it.data (); 00794 blob_it.set_to_list (row->blob_list ()); 00795 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 00796 blob_it.forward ()) { 00797 blob = blob_it.data (); 00798 blob_box = blob->bounding_box (); 00799 blob_box.rotate (rotation);//de-skew it 00800 result += blob_box; 00801 } 00802 } 00803 return result; 00804 }
Return true if the dot looks like it is part of 'i'.
Doesn't work for any other diacritical.
To be considered part of an 'i' it:
Definition at line 360 of file makerow.cpp.
References BOX::bottom(), BOX::height(), BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), MAX, MIN, C_BLOB::out_list(), BOX::right(), BOX::top(), BOX::width(), and ICOORD::y().
Referenced by vigorous_noise_removal().
00360 { 00361 const BOX& ibox = i->bounding_box(); 00362 const BOX& dotbox = dot->bounding_box(); 00363 00364 // Must overlap horizontally by enough and be high enough. 00365 int overlap = MIN(dotbox.right(), ibox.right()) - 00366 MAX(dotbox.left(), ibox.left()); 00367 if (ibox.height() <= 2 * dotbox.height() || 00368 (overlap * 2 < ibox.width() && overlap < dotbox.width())) 00369 return false; 00370 00371 // The i or ! must be tall and thin to be good. 00372 if (ibox.height() > ibox.width() * 2) 00373 return true; 00374 00375 // It might still be tall and thin, but it might be joined to something. 00376 // So search the outline for a piece of large height close to the edges 00377 // of the dot. 00378 const double kHeightFraction = 0.6; 00379 double target_height = MIN(dotbox.bottom(), ibox.top()); 00380 target_height -= row->line_m()*dotbox.left() + row->line_c(); 00381 target_height *= kHeightFraction; 00382 int left_min = dotbox.left() - dotbox.width(); 00383 int middle = (dotbox.left() + dotbox.right())/2; 00384 int right_max = dotbox.right() + dotbox.width(); 00385 int left_miny = 0; 00386 int left_maxy = 0; 00387 int right_miny = 0; 00388 int right_maxy = 0; 00389 bool found_left = false; 00390 bool found_right = false; 00391 bool in_left = false; 00392 bool in_right = false; 00393 C_BLOB* blob = i->cblob(); 00394 C_OUTLINE_IT o_it = blob->out_list(); 00395 for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) { 00396 C_OUTLINE* outline = o_it.data(); 00397 int length = outline->pathlength(); 00398 ICOORD pos = outline->start_pos(); 00399 for (int step = 0; step < length; pos += outline->step(step++)) { 00400 int x = pos.x(); 00401 int y = pos.y(); 00402 if (x >= left_min && x < middle && !found_left) { 00403 // We are in the left part so find min and max y. 00404 if (in_left) { 00405 if (y > left_maxy) left_maxy = y; 00406 if (y < left_miny) left_miny = y; 00407 } else { 00408 left_maxy = left_miny = y; 00409 in_left = true; 00410 } 00411 } else if (in_left) { 00412 // We just left the left so look for size. 00413 if (left_maxy - left_miny > target_height) { 00414 if (found_right) 00415 return true; 00416 found_left = true; 00417 } 00418 in_left = false; 00419 } 00420 if (x <= right_max && x > middle && !found_right) { 00421 // We are in the right part so find min and max y. 00422 if (in_right) { 00423 if (y > right_maxy) right_maxy = y; 00424 if (y < right_miny) right_miny = y; 00425 } else { 00426 right_maxy = right_miny = y; 00427 in_right = true; 00428 } 00429 } else if (in_right) { 00430 // We just left the right so look for size. 00431 if (right_maxy - right_miny > target_height) { 00432 if (found_left) 00433 return true; 00434 found_right = true; 00435 } 00436 in_right = false; 00437 } 00438 } 00439 } 00440 return false; 00441 }
void expand_rows | ( | ICOORD | page_tr, | |
TO_BLOCK * | block, | |||
float | gradient, | |||
FCOORD | rotation, | |||
INT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.
It seems to handle testing/random/falling_y_position.tif ok but not testing/random/angled_test4.tif - is this function responsible for the former working?
Definition at line 1015 of file makerow.cpp.
References adjust_row_limits(), assign_blobs_to_rows(), TO_ROW::blob_list(), compute_row_stats(), cprintf(), create_to_win(), FALSE, fit_parallel_rows(), TO_ROW::intercept(), TO_ROW::max_y(), TO_ROW::min_y(), NO_WINDOW, plot_parallel_row(), rotation, TO_ROW::set_limits(), to_win, TRUE, and WHITE.
Referenced by cleanup_rows().
01022 { 01023 BOOL8 swallowed_row; //eaten a neighbour 01024 float y_max, y_min; //new row limits 01025 float y_bottom, y_top; //allowed limits 01026 TO_ROW *test_row; //next row 01027 TO_ROW *row; //current row 01028 //iterators 01029 BLOBNBOX_IT blob_it = &block->blobs; 01030 TO_ROW_IT row_it = block->get_rows (); 01031 01032 #ifdef TEXT_VERBOSE 01033 // gets a 'x', see ccmain/tesseractmain.dox 01034 cprintf("x"); 01035 #endif 01036 #ifndef GRAPHICS_DISABLED 01037 if (textord_show_expanded_rows && testing_on) { 01038 if (to_win == NO_WINDOW) 01039 create_to_win(page_tr); 01040 } 01041 #endif 01042 01043 adjust_row_limits(block); //shift min,max. 01044 if (textord_new_initial_xheight) { 01045 if (block->get_rows ()->length () == 0) 01046 return; 01047 compute_row_stats(block, textord_show_expanded_rows &&testing_on); 01048 } 01049 assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE); 01050 //get real membership 01051 if (block->get_rows ()->length () == 0) 01052 return; 01053 fit_parallel_rows(block, 01054 gradient, 01055 rotation, 01056 block_edge, 01057 textord_show_expanded_rows &&testing_on); 01058 if (!textord_new_initial_xheight) 01059 compute_row_stats(block, textord_show_expanded_rows &&testing_on); 01060 row_it.move_to_last (); 01061 do { 01062 row = row_it.data (); 01063 y_max = row->max_y (); //get current limits 01064 y_min = row->min_y (); 01065 y_bottom = row->intercept () - block->line_size * textord_merge_desc; 01066 y_top = row->intercept () + block->line_size 01067 * (textord_merge_x + textord_merge_asc); 01068 if (y_min > y_bottom) { //expansion allowed 01069 //expandable 01070 swallowed_row = TRUE; 01071 while (swallowed_row && !row_it.at_last ()) { 01072 swallowed_row = FALSE; 01073 //get next one 01074 test_row = row_it.data_relative (1); 01075 //overlaps space 01076 if (test_row->max_y () > y_bottom) { 01077 if (test_row->min_y () > y_bottom) { 01078 row_it.forward (); 01079 #ifndef GRAPHICS_DISABLED 01080 if (textord_show_expanded_rows && testing_on) 01081 plot_parallel_row(test_row, 01082 gradient, 01083 block_edge, 01084 WHITE, 01085 rotation); 01086 #endif 01087 blob_it.set_to_list (row->blob_list ()); 01088 blob_it.add_list_after (test_row->blob_list ()); 01089 //swallow complete row 01090 delete row_it.extract (); 01091 row_it.backward (); 01092 swallowed_row = TRUE; 01093 } 01094 else if (test_row->max_y () < y_min) 01095 //shorter limit 01096 y_bottom = test_row->max_y (); 01097 else 01098 y_bottom = y_min; //can't expand it 01099 } 01100 } 01101 y_min = y_bottom; //expand it 01102 } 01103 if (y_max < y_top) { //expansion allowed 01104 swallowed_row = TRUE; 01105 while (swallowed_row && !row_it.at_first ()) { 01106 swallowed_row = FALSE; 01107 //get one above 01108 test_row = row_it.data_relative (-1); 01109 if (test_row->min_y () < y_top) { 01110 if (test_row->max_y () < y_top) { 01111 row_it.backward (); 01112 blob_it.set_to_list (row->blob_list ()); 01113 #ifndef GRAPHICS_DISABLED 01114 if (textord_show_expanded_rows && testing_on) 01115 plot_parallel_row(test_row, 01116 gradient, 01117 block_edge, 01118 WHITE, 01119 rotation); 01120 #endif 01121 blob_it.add_list_after (test_row->blob_list ()); 01122 //swallow complete row 01123 delete row_it.extract (); 01124 row_it.forward (); 01125 swallowed_row = TRUE; 01126 } 01127 else if (test_row->min_y () < y_max) 01128 //shorter limit 01129 y_top = test_row->min_y (); 01130 else 01131 y_top = y_max; //can't expand it 01132 01133 } 01134 } 01135 y_max = y_top; 01136 } 01137 //new limits 01138 row->set_limits (y_min, y_max); 01139 row_it.backward (); 01140 } 01141 while (!row_it.at_last ()); 01142 }
BOOL8 find_best_dropout_row | ( | TO_ROW * | row, | |
INT32 | distance, | |||
float | dist_limit, | |||
INT32 | line_index, | |||
TO_ROW_IT * | row_it, | |||
BOOL8 | testing_on | |||
) |
Find neighbours.
Definition at line 697 of file makerow.cpp.
References TO_ROW::believability(), FALSE, TO_ROW::intercept(), TO_ROW::parallel_c(), tprintf(), and TRUE.
Referenced by delete_non_dropout_rows().
00704 { 00705 INT32 next_index; //of neigbouring row 00706 INT32 row_offset; //from current row 00707 INT32 abs_dist; //absolute distance 00708 INT8 row_inc; //increment to row_index 00709 TO_ROW *next_row; //nextious row 00710 00711 if (testing_on) 00712 tprintf ("Row at %g(%g), dropout dist=%d,", 00713 row->intercept (), row->parallel_c (), distance); 00714 if (distance < 0) { 00715 row_inc = 1; 00716 abs_dist = -distance; 00717 } 00718 else { 00719 row_inc = -1; 00720 abs_dist = distance; 00721 } 00722 if (abs_dist > dist_limit) { 00723 if (testing_on) { 00724 tprintf (" too far - deleting\n"); 00725 } 00726 return TRUE; 00727 } 00728 if (distance < 0 && !row_it->at_last () 00729 || distance >= 0 && !row_it->at_first ()) { 00730 row_offset = row_inc; 00731 do { 00732 next_row = row_it->data_relative (row_offset); 00733 next_index = (INT32) floor (next_row->intercept ()); 00734 if (distance < 0 00735 && next_index < line_index 00736 && next_index > line_index + distance + distance 00737 || distance >= 0 00738 && next_index > line_index 00739 && next_index < line_index + distance + distance) { 00740 if (testing_on) { 00741 tprintf (" nearer neighbour (%d) at %g\n", 00742 line_index + distance - next_index, 00743 next_row->intercept ()); 00744 } 00745 return TRUE; //other is nearer 00746 } 00747 else if (next_index == line_index 00748 || next_index == line_index + distance + distance) { 00749 if (row->believability () <= next_row->believability ()) { 00750 if (testing_on) { 00751 tprintf (" equal but more believable at %g (%g/%g)\n", 00752 next_row->intercept (), 00753 row->believability (), 00754 next_row->believability ()); 00755 } 00756 return TRUE; //other is more believable 00757 } 00758 } 00759 row_offset += row_inc; 00760 } 00761 while ((next_index == line_index 00762 || next_index == line_index + distance + distance) 00763 && row_offset < row_it->length ()); 00764 if (testing_on) 00765 tprintf (" keeping\n"); 00766 } 00767 return FALSE; 00768 }
void fit_lms_line | ( | TO_ROW * | row | ) |
Sort function.
Fit an LMS line to a row.
Definition at line 227 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::bottom(), BOX::left(), BOX::right(), and TO_ROW::set_line().
Referenced by make_initial_textrows().
00229 { 00230 float m, c; //fitted line 00231 BOX box; //blob box 00232 LMS lms (row->blob_list ()->length ()); 00233 //blobs 00234 BLOBNBOX_IT blob_it = row->blob_list (); 00235 00236 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00237 box = blob_it.data ()->bounding_box (); 00238 lms.add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ())); 00239 } 00240 lms.fit (m, c); 00241 row->set_line (m, c, lms.error ()); 00242 }
void fit_parallel_lms | ( | float | gradient, | |
TO_ROW * | row | |||
) |
Sort function.
Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.
Definition at line 1920 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::bottom(), cprintf(), BOX::left(), BOX::right(), TO_ROW::set_line(), and TO_ROW::set_parallel_line().
Referenced by fit_parallel_rows().
01923 { 01924 float c; //fitted line 01925 int blobcount; //no of blobs 01926 BOX box; //blob box 01927 LMS lms (row->blob_list ()->length ()); 01928 //blobs 01929 BLOBNBOX_IT blob_it = row->blob_list (); 01930 01931 #ifdef TEXT_VERBOSE 01932 // gets a 'm', see ccmain/tesseractmain.dox 01933 cprintf("m"); 01934 #endif 01935 blobcount = 0; 01936 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 01937 if (!blob_it.data ()->joined_to_prev ()) { 01938 box = blob_it.data ()->bounding_box (); 01939 lms. 01940 add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ())); 01941 blobcount++; 01942 } 01943 } 01944 lms.constrained_fit (gradient, c); 01945 row->set_parallel_line (gradient, c, lms.error ()); 01946 if (textord_straight_baselines && blobcount > lms_line_trials) { 01947 lms.fit (gradient, c); 01948 } 01949 //set the other too 01950 row->set_line (gradient, c, lms.error ()); 01951 }
void fit_parallel_rows | ( | TO_BLOCK * | block, | |
float | gradient, | |||
FCOORD | rotation, | |||
INT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Re-fit the rows in the block to the given gradient.
Definition at line 1879 of file makerow.cpp.
References fit_parallel_lms(), MAGENTA, plot_parallel_row(), RED, rotation, and row_y_order().
Referenced by cleanup_rows(), and expand_rows().
01885 { 01886 #ifndef GRAPHICS_DISABLED 01887 COLOUR colour; //of row 01888 #endif 01889 TO_ROW_IT row_it = block->get_rows (); 01890 01891 row_it.move_to_first (); 01892 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01893 if (row_it.data ()->blob_list ()->empty ()) 01894 delete row_it.extract (); //nothing in it 01895 else 01896 fit_parallel_lms (gradient, row_it.data ()); 01897 } 01898 #ifndef GRAPHICS_DISABLED 01899 if (testing_on) { 01900 colour = RED; 01901 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01902 plot_parallel_row (row_it.data (), gradient, 01903 block_edge, colour, rotation); 01904 colour = (COLOUR) (colour + 1); 01905 if (colour > MAGENTA) 01906 colour = RED; 01907 } 01908 } 01909 #endif 01910 row_it.sort (row_y_order); //may have gone out of order 01911 }
double* linear_spline_baseline | ( | TO_ROW * | row, | |
TO_BLOCK * | block, | |||
INT32 & | segments, | |||
INT32 | xstarts[] | |||
) |
Split baseline.
Definition at line 2183 of file makerow.cpp.
References alloc_mem(), TO_ROW::blob_list(), BOX::bottom(), box_next_pre_chopped(), BOX::left(), BOX::right(), and tprintf().
Referenced by make_baseline_spline().
02188 { 02189 int blobcount; //no of blobs 02190 int blobindex; //current blob 02191 int index1, index2; //blob numbers 02192 int blobs_per_segment; //blobs in each 02193 BOX box; //blob box 02194 BOX new_box; //new_it box 02195 float middle; //xcentre of blob 02196 //blobs 02197 BLOBNBOX_IT blob_it = row->blob_list (); 02198 BLOBNBOX_IT new_it = blob_it; //front end 02199 float b, c; //fitted curve 02200 LMS lms (row->blob_list ()->length ()); 02201 double *coeffs; //quadratic coeffs 02202 INT32 segment; //current segment 02203 02204 box = box_next_pre_chopped (&blob_it); 02205 xstarts[0] = box.left (); 02206 blobcount = 1; 02207 while (!blob_it.at_first ()) { 02208 blobcount++; 02209 box = box_next_pre_chopped (&blob_it); 02210 } 02211 segments = blobcount / textord_spline_medianwin; 02212 if (segments < 1) 02213 segments = 1; 02214 blobs_per_segment = blobcount / segments; 02215 coeffs = (double *) alloc_mem (segments * 3 * sizeof (double)); 02216 if (textord_oldbl_debug) 02217 tprintf 02218 ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n", 02219 blobcount, box.left (), box.bottom (), segments, blobs_per_segment); 02220 segment = 1; 02221 for (index2 = 0; index2 < blobs_per_segment / 2; index2++) 02222 box_next_pre_chopped(&new_it); 02223 index1 = 0; 02224 blobindex = index2; 02225 do { 02226 blobindex += blobs_per_segment; 02227 lms.clear (); 02228 while (index1 < blobindex || segment == segments && index1 < blobcount) { 02229 box = box_next_pre_chopped (&blob_it); 02230 middle = (box.left () + box.right ()) / 2.0; 02231 lms.add (FCOORD (middle, box.bottom ())); 02232 index1++; 02233 if (index1 == blobindex - blobs_per_segment / 2 02234 || index1 == blobcount - 1) { 02235 xstarts[segment] = box.left (); 02236 } 02237 } 02238 lms.fit (b, c); 02239 coeffs[segment * 3 - 3] = 0; 02240 coeffs[segment * 3 - 2] = b; 02241 coeffs[segment * 3 - 1] = c; 02242 segment++; 02243 if (segment > segments) 02244 break; 02245 02246 blobindex += blobs_per_segment; 02247 lms.clear (); 02248 while (index2 < blobindex || segment == segments && index2 < blobcount) { 02249 new_box = box_next_pre_chopped (&new_it); 02250 middle = (new_box.left () + new_box.right ()) / 2.0; 02251 lms.add (FCOORD (middle, new_box.bottom ())); 02252 index2++; 02253 if (index2 == blobindex - blobs_per_segment / 2 02254 || index2 == blobcount - 1) { 02255 xstarts[segment] = new_box.left (); 02256 } 02257 } 02258 lms.fit (b, c); 02259 coeffs[segment * 3 - 3] = 0; 02260 coeffs[segment * 3 - 2] = b; 02261 coeffs[segment * 3 - 1] = c; 02262 segment++; 02263 } 02264 while (segment <= segments); 02265 return coeffs; 02266 }
Fit an LMS line to a row.
Make the fit parallel to the given gradient and set the row accordingly.
Definition at line 2011 of file makerow.cpp.
References alloc_mem(), TO_ROW::blob_list(), BOX::bottom(), free_mem(), BOX::left(), linear_spline_baseline(), BOX::right(), and segment_baseline().
Referenced by make_spline_rows().
02014 { 02015 float b, c; //fitted curve 02016 float middle; //x middle of blob 02017 BOX box; //blob box 02018 LMS lms (row->blob_list ()->length ()); 02019 //blobs 02020 BLOBNBOX_IT blob_it = row->blob_list (); 02021 INT32 *xstarts; //spline boundaries 02022 double *coeffs; //quadratic coeffs 02023 INT32 segments; //no of segments 02024 INT32 segment; //current segment 02025 02026 xstarts = 02027 (INT32 *) alloc_mem ((row->blob_list ()->length () + 1) * sizeof (INT32)); 02028 if (segment_baseline (row, block, segments, xstarts) 02029 && !textord_straight_baselines && !textord_parallel_baselines) { 02030 if (textord_quadratic_baselines) { 02031 coeffs = (double *) alloc_mem (segments * 3 * sizeof (double)); 02032 for (segment = 0; segment < segments; segment++) { 02033 lms.clear (); 02034 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 02035 blob_it.forward ()) { 02036 if (!blob_it.data ()->joined_to_prev ()) { 02037 box = blob_it.data ()->bounding_box (); 02038 middle = (box.left () + box.right ()) / 2.0; 02039 if (middle >= xstarts[segment] 02040 && middle < xstarts[segment + 1]) { 02041 lms.add (FCOORD (middle, box.bottom ())); 02042 } 02043 } 02044 } 02045 if (textord_quadratic_baselines) 02046 lms.fit_quadratic (block->line_size * 02047 textord_spline_outlier_fraction, 02048 coeffs[segment * 3], b, c); 02049 else { 02050 lms.fit (b, c); 02051 coeffs[segment * 3] = 0; 02052 } 02053 coeffs[segment * 3 + 1] = b; 02054 coeffs[segment * 3 + 2] = c; 02055 } 02056 } 02057 else 02058 coeffs = linear_spline_baseline (row, block, segments, xstarts); 02059 } 02060 else { 02061 xstarts[1] = xstarts[segments]; 02062 segments = 1; 02063 coeffs = (double *) alloc_mem (3 * sizeof (double)); 02064 coeffs[0] = 0; 02065 coeffs[1] = row->line_m (); 02066 coeffs[2] = row->line_c (); 02067 } 02068 row->baseline = QSPLINE (segments, xstarts, coeffs); 02069 free_mem(coeffs); 02070 free_mem(xstarts); 02071 }
Find lines.
Arrange the good blobs into rows of text.
Definition at line 187 of file makerow.cpp.
References assign_blobs_to_rows(), create_to_win(), fit_lms_line(), MAGENTA, NO_WINDOW, NULL, plot_to_row(), RED, rotation, to_win, and TRUE.
Referenced by make_rows().
00192 { 00193 TO_ROW_IT row_it = block->get_rows (); 00194 00195 #ifndef GRAPHICS_DISABLED 00196 COLOUR colour; //of row 00197 00198 if (textord_show_initial_rows && testing_on) { 00199 if (to_win == NO_WINDOW) 00200 create_to_win(page_tr); 00201 } 00202 #endif 00203 //guess skew 00204 assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on); 00205 row_it.move_to_first (); 00206 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 00207 fit_lms_line (row_it.data ()); 00208 #ifndef GRAPHICS_DISABLED 00209 if (textord_show_initial_rows && testing_on) { 00210 colour = RED; 00211 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00212 plot_to_row (row_it.data (), colour, rotation); 00213 colour = (COLOUR) (colour + 1); 00214 if (colour > MAGENTA) 00215 colour = RED; 00216 } 00217 } 00218 #endif 00219 }
float make_rows | ( | ICOORD | page_tr, | |
BLOCK_LIST * | blocks, | |||
TO_BLOCK_LIST * | land_blocks, | |||
TO_BLOCK_LIST * | port_blocks | |||
) |
Make rows.
Arrange the blobs into rows.
Definition at line 130 of file makerow.cpp.
References cleanup_rows(), compute_page_skew(), cprintf(), f, and make_initial_textrows().
Referenced by textord_page().
00135 { 00136 float port_m; //global skew 00137 float port_err; //global noise 00138 //float land_m; //global skew 00139 //float land_err; //global noise 00140 TO_BLOCK_IT block_it; //iterator 00141 00142 //don't do landscape for now 00143 // block_it.set_to_list(land_blocks); 00144 // for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward()) 00145 // make_initial_textrows(page_tr,block_it.data(),FCOORD(0,-1), 00146 // (BOOL8)textord_test_landscape); 00147 00148 #ifdef TEXT_VERBOSE 00149 // gets a 'r', see ccmain/tesseractmain.dox 00150 cprintf("r\n"); 00151 #endif 00152 block_it.set_to_list (port_blocks); 00153 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00154 block_it.forward ()) 00155 make_initial_textrows (page_tr, block_it.data (), FCOORD (1.0f, 0.0f), 00156 !(BOOL8) textord_test_landscape); 00157 //compute globally 00158 compute_page_skew(port_blocks, port_m, port_err); 00159 // compute_page_skew(land_blocks,land_m,land_err); //compute globally 00160 // tprintf("Portrait skew gradient=%g, error=%g.\n", 00161 // port_m,port_err); 00162 // tprintf("Landscape skew gradient=%g, error=%g.\n", 00163 // land_m,land_err); 00164 block_it.set_to_list (port_blocks); 00165 for (block_it.mark_cycle_pt (); !block_it.cycled_list (); 00166 block_it.forward ()) { 00167 cleanup_rows (page_tr, block_it.data (), port_m, FCOORD (1.0f, 0.0f), 00168 block_it.data ()->block->bounding_box ().left (), 00169 !(BOOL8) textord_test_landscape); 00170 } 00171 block_it.set_to_list (land_blocks); 00172 // for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward()) 00173 // { 00174 // cleanup_rows(page_tr,block_it.data(),land_m,FCOORD(0,-1), 00175 // -block_it.data()->block->bounding_box().top(), 00176 // (BOOL8)textord_test_landscape); 00177 // } 00178 return port_m; //global skew 00179 }
void make_spline_rows | ( | TO_BLOCK * | block, | |
float | gradient, | |||
FCOORD | rotation, | |||
INT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
Find lines.
Re-fit the rows in the block to the given gradient.
Definition at line 1959 of file makerow.cpp.
References MAGENTA, make_baseline_spline(), make_old_baselines(), RED, and to_win.
Referenced by cleanup_rows().
01965 { 01966 COLOUR colour; //of row 01967 TO_ROW_IT row_it = block->get_rows (); 01968 01969 row_it.move_to_first (); 01970 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01971 if (row_it.data ()->blob_list ()->empty ()) 01972 delete row_it.extract (); //nothing in it 01973 else 01974 make_baseline_spline (row_it.data (), block); 01975 } 01976 if (textord_old_baselines) { 01977 #ifndef GRAPHICS_DISABLED 01978 if (testing_on) { 01979 colour = RED; 01980 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); 01981 row_it.forward ()) { 01982 row_it.data ()->baseline.plot (to_win, colour); 01983 colour = (COLOUR) (colour + 1); 01984 if (colour > MAGENTA) 01985 colour = RED; 01986 } 01987 } 01988 #endif 01989 make_old_baselines(block, testing_on); 01990 } 01991 #ifndef GRAPHICS_DISABLED 01992 if (testing_on) { 01993 colour = RED; 01994 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01995 row_it.data ()->baseline.plot (to_win, colour); 01996 colour = (COLOUR) (colour + 1); 01997 if (colour > MAGENTA) 01998 colour = RED; 01999 } 02000 } 02001 #endif 02002 }
float median_block_xheight | ( | TO_BLOCK * | block, | |
float | gradient | |||
) |
Find lines.
Compute the linespacing and offset.
Definition at line 1353 of file makerow.cpp.
References ABORT, alloc_mem(), ASSERT_HOST, blob_count, TO_ROW::blob_list(), choose_nth_item(), ERRCODE::error(), f, free_mem(), MEMORY_OUT, NULL, and TO_ROW::parallel_c().
Referenced by compute_block_xheight().
01356 { 01357 TO_ROW *row; //current row 01358 float result; //output size 01359 float xcentre; //centre of blob 01360 TO_ROW_IT row_it = block->get_rows (); 01361 BLOBNBOX_IT blob_it; 01362 BLOBNBOX *blob; //current blob 01363 float *heights; //for choose nth 01364 INT32 blob_count; //blobs in block 01365 INT32 blob_index; //current blob 01366 01367 blob_count = 0; 01368 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 01369 blob_count += row_it.data ()->blob_list ()->length (); 01370 heights = (float *) alloc_mem (blob_count * sizeof (float)); 01371 if (heights == NULL) 01372 MEMORY_OUT.error ("compute_row_stats", ABORT, NULL); 01373 01374 blob_index = 0; 01375 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01376 row = row_it.data (); 01377 blob_it.set_to_list (row->blob_list ()); 01378 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 01379 blob_it.forward ()) { 01380 blob = blob_it.data (); 01381 if (!blob->joined_to_prev ()) { 01382 xcentre = 01383 (blob->bounding_box ().left () + 01384 blob->bounding_box ().right ()) / 2.0f; 01385 heights[blob_index] = 01386 blob->bounding_box ().top () - gradient * xcentre - 01387 row->parallel_c (); 01388 if (heights[blob_index] > 0) 01389 blob_index++; 01390 } 01391 } 01392 } 01393 ASSERT_HOST (blob_index > 0); //dont expect 0 01394 blob_count = blob_index; 01395 blob_index = choose_nth_item (blob_count / 2, heights, blob_count); 01396 result = heights[blob_index]; 01397 free_mem(heights); 01398 return result; 01399 }
OVERLAP_STATE most_overlapping_row | ( | TO_ROW_IT * | row_it, | |
TO_ROW *& | best_row, | |||
float | top, | |||
float | bottom, | |||
float | rowsize, | |||
BOOL8 | testing_blob | |||
) |
Find best row.
row_it | Iterator | |
best_row | Output row (results returned are for this row) | |
top | Top of blob | |
bottom | Bottom of blob | |
rowsize | Max row size | |
testing_blob | TRUE or FALSE, Test stuff |
Definition at line 2482 of file makerow.cpp.
Referenced by assign_blobs_to_rows(), and restore_underlined_blobs().
02489 { 02490 OVERLAP_STATE result; //result of tests 02491 float overlap; //of blob & row 02492 float bestover; //nearest row 02493 float merge_top, merge_bottom; //size of merged row 02494 ICOORD testpt; //testing only 02495 TO_ROW *row; //current row 02496 TO_ROW *test_row; //for multiple overlaps 02497 BLOBNBOX_IT blob_it; //for merging rows 02498 02499 result = ASSIGN; 02500 row = row_it->data (); 02501 bestover = top - bottom; 02502 if (top > row->max_y ()) 02503 bestover -= top - row->max_y (); 02504 if (bottom < row->min_y ()) 02505 //compute overlap 02506 bestover -= row->min_y () - bottom; 02507 if (testing_blob) { 02508 tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n", 02509 bottom, top, row->min_y (), row->max_y (), bestover); 02510 } 02511 test_row = row; 02512 do { 02513 if (!row_it->at_last ()) { 02514 row_it->forward (); 02515 test_row = row_it->data (); 02516 if (test_row->min_y () <= top && test_row->max_y () >= bottom) { 02517 merge_top = 02518 test_row->max_y () > 02519 row->max_y ()? test_row->max_y () : row->max_y (); 02520 merge_bottom = 02521 test_row->min_y () < 02522 row->min_y ()? test_row->min_y () : row->min_y (); 02523 if (merge_top - merge_bottom <= rowsize) { 02524 if (testing_blob) { 02525 tprintf ("Merging rows at (%g,%g), (%g,%g)\n", 02526 row->min_y (), row->max_y (), 02527 test_row->min_y (), test_row->max_y ()); 02528 } 02529 test_row->set_limits (merge_bottom, merge_top); 02530 blob_it.set_to_list (test_row->blob_list ()); 02531 blob_it.add_list_after (row->blob_list ()); 02532 blob_it.sort (blob_x_order); 02533 row_it->backward (); 02534 delete row_it->extract (); 02535 row_it->forward (); 02536 bestover = -1.0f; //force replacement 02537 } 02538 overlap = top - bottom; 02539 if (top > test_row->max_y ()) 02540 overlap -= top - test_row->max_y (); 02541 if (bottom < test_row->min_y ()) 02542 overlap -= test_row->min_y () - bottom; 02543 if (bestover >= rowsize - 1 && overlap >= rowsize - 1) { 02544 result = REJECT; 02545 } 02546 if (overlap > bestover) { 02547 bestover = overlap; //find biggest overlap 02548 row = test_row; 02549 } 02550 if (testing_blob) { 02551 tprintf 02552 ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n", 02553 bottom, top, test_row->min_y (), test_row->max_y (), 02554 overlap, bestover); 02555 } 02556 } 02557 } 02558 } 02559 while (!row_it->at_last () 02560 && test_row->min_y () <= top && test_row->max_y () >= bottom); 02561 while (row_it->data () != row) 02562 row_it->backward (); //make it point to row 02563 02564 //doesn't overlap much 02565 if (top - bottom - bestover > rowsize * textord_merge_x 02566 && (!textord_fix_makerow_bug || bestover < rowsize * textord_merge_x) 02567 && result == ASSIGN) 02568 result = NEW_ROW; //doesn't overlap enough 02569 best_row = row; 02570 return result; 02571 }
Make rough chars.
Associate overlapping blobs and fake chop wide blobs.
Definition at line 1780 of file makerow.cpp.
References BOX::bottom(), create_to_win(), INT_HOLLOW, interior_style, BOX::left(), MAGENTA, NO_WINDOW, perimeter_color_index, rectangle, RED, BOX::right(), BOX::rotate(), rotation, to_win, BOX::top(), TRUE, and BOX::width().
Referenced by cleanup_rows().
01785 { 01786 #ifndef GRAPHICS_DISABLED 01787 COLOUR colour; //of boxes 01788 #endif 01789 INT16 overlap; //of adjacent boxes 01790 BLOBNBOX *blob; //current blob 01791 BLOBNBOX *nextblob; //next in list 01792 BOX blob_box; 01793 BOX next_box; //next blob 01794 FCOORD blob_rotation; //inverse of rotation 01795 BLOBNBOX_IT blob_it; //iterator 01796 BLOBNBOX_IT start_it; //iterator 01797 TO_ROW_IT row_it = block->get_rows (); 01798 01799 #ifndef GRAPHICS_DISABLED 01800 colour = RED; 01801 #endif 01802 01803 blob_rotation = FCOORD (rotation.x (), -rotation.y ()); 01804 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01805 //get blobs 01806 blob_it.set_to_list (row_it.data ()->blob_list ()); 01807 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 01808 blob_it.forward ()) { 01809 blob = blob_it.data (); 01810 blob_box = blob->bounding_box (); 01811 start_it = blob_it; //save start point 01812 // if (testing_on && textord_show_final_blobs) 01813 // { 01814 // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n", 01815 // blob_box.left(),blob_box.bottom(), 01816 // blob_box.right(),blob_box.top(), 01817 // (void*)blob,blob_it.length()); 01818 // } 01819 do { 01820 if (!blob_it.at_last ()) { 01821 nextblob = blob_it.data_relative (1); 01822 next_box = nextblob->bounding_box (); 01823 overlap = next_box.width (); 01824 if (blob_box.left () > next_box.left ()) 01825 overlap -= blob_box.left () - next_box.left (); 01826 if (blob_box.right () < next_box.right ()) 01827 overlap -= next_box.right () - blob_box.right (); 01828 if (overlap >= next_box.width () / 2 01829 || overlap >= blob_box.width () / 2) { 01830 //merge new blob 01831 blob->merge (nextblob); 01832 //get bigger box 01833 blob_box = blob->bounding_box (); 01834 blob_it.forward (); 01835 } 01836 else 01837 overlap = -1; //no overlap 01838 } 01839 else 01840 overlap = -1; //no overlap 01841 } 01842 while (overlap >= 0); 01843 blob->chop (&start_it, &blob_it, 01844 blob_rotation, 01845 block->line_size * textord_merge_x * 01846 textord_chop_width); 01847 //attempt chop 01848 } 01849 #ifndef GRAPHICS_DISABLED 01850 if (testing_on && textord_show_final_blobs) { 01851 if (to_win == NO_WINDOW) 01852 create_to_win(page_tr); 01853 perimeter_color_index(to_win, colour); 01854 interior_style(to_win, INT_HOLLOW, TRUE); 01855 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 01856 blob_it.forward ()) { 01857 blob = blob_it.data (); 01858 blob_box = blob->bounding_box (); 01859 blob_box.rotate (rotation); 01860 if (!blob->joined_to_prev ()) { 01861 rectangle (to_win, blob_box.left (), blob_box.bottom (), 01862 blob_box.right (), blob_box.top ()); 01863 } 01864 } 01865 colour = (COLOUR) (colour + 1); 01866 if (colour > MAGENTA) 01867 colour = RED; 01868 } 01869 #endif 01870 } 01871 }
int row_spacing_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
Sort function.
Qsort style function to compare 2 TO_ROWS based on their spacing value.
Definition at line 2623 of file makerow.cpp.
References TO_ROW::spacing.
Referenced by compute_row_stats().
02625 { 02626 //converted ptr 02627 TO_ROW *row1 = *(TO_ROW **) item1; 02628 //converted ptr 02629 TO_ROW *row2 = *(TO_ROW **) item2; 02630 02631 if (row1->spacing < row2->spacing) 02632 return -1; 02633 else if (row1->spacing > row2->spacing) 02634 return 1; 02635 else 02636 return 0; 02637 }
int row_y_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
Sort function.
Sort function to sort rows in y from page's top.
Definition at line 2601 of file makerow.cpp.
References TO_ROW::parallel_c().
Referenced by fit_parallel_rows().
02603 { 02604 //converted ptr 02605 TO_ROW *row1 = *(TO_ROW **) item1; 02606 //converted ptr 02607 TO_ROW *row2 = *(TO_ROW **) item2; 02608 02609 if (row1->parallel_c () > row2->parallel_c ()) 02610 return -1; 02611 else if (row1->parallel_c () < row2->parallel_c ()) 02612 return 1; 02613 else 02614 return 0; 02615 }
Split baseline.
Definition at line 2083 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::bottom(), box_next_pre_chopped(), FALSE, BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), BOX::right(), tprintf(), and TRUE.
Referenced by make_baseline_spline().
02088 { 02089 BOOL8 needs_curve; //needs curved line 02090 int blobcount; //no of blobs 02091 int blobindex; //current blob 02092 int last_state; //above, on , below 02093 int state; //of current blob 02094 float yshift; //from baseline 02095 BOX box; //blob box 02096 BOX new_box; //new_it box 02097 float middle; //xcentre of blob 02098 //blobs 02099 BLOBNBOX_IT blob_it = row->blob_list (); 02100 BLOBNBOX_IT new_it = blob_it; //front end 02101 SORTED_FLOATS yshifts; //shifts from baseline 02102 02103 needs_curve = FALSE; 02104 box = box_next_pre_chopped (&blob_it); 02105 xstarts[0] = box.left (); 02106 segments = 1; 02107 blobcount = row->blob_list ()->length (); 02108 if (textord_oldbl_debug) 02109 tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n", 02110 blobcount, box.left (), box.bottom ()); 02111 if (blobcount <= textord_spline_medianwin 02112 || blobcount < textord_spline_minblobs) { 02113 blob_it.move_to_last (); 02114 box = blob_it.data ()->bounding_box (); 02115 xstarts[1] = box.right (); 02116 return FALSE; 02117 } 02118 last_state = 0; 02119 new_it.mark_cycle_pt (); 02120 for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) { 02121 new_box = box_next_pre_chopped (&new_it); 02122 middle = (new_box.left () + new_box.right ()) / 2.0; 02123 yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); 02124 //record shift 02125 yshifts.add (yshift, blobindex); 02126 if (new_it.cycled_list ()) { 02127 xstarts[1] = new_box.right (); 02128 return FALSE; 02129 } 02130 } 02131 for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) 02132 box = box_next_pre_chopped (&blob_it); 02133 do { 02134 new_box = box_next_pre_chopped (&new_it); 02135 //get middle one 02136 yshift = yshifts[textord_spline_medianwin / 2]; 02137 if (yshift > textord_spline_shift_fraction * block->line_size) 02138 state = 1; 02139 else if (-yshift > textord_spline_shift_fraction * block->line_size) 02140 state = -1; 02141 else 02142 state = 0; 02143 if (state != 0) 02144 needs_curve = TRUE; 02145 // tprintf("State=%d, prev=%d, shift=%g\n", 02146 // state,last_state,yshift); 02147 if (state != last_state && blobcount > textord_spline_minblobs) { 02148 xstarts[segments++] = box.left (); 02149 blobcount = 0; 02150 } 02151 last_state = state; 02152 yshifts.remove (blobindex - textord_spline_medianwin); 02153 box = box_next_pre_chopped (&blob_it); 02154 middle = (new_box.left () + new_box.right ()) / 2.0; 02155 yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); 02156 yshifts.add (yshift, blobindex); 02157 blobindex++; 02158 blobcount++; 02159 } 02160 while (!new_it.cycled_list ()); 02161 if (blobcount > textord_spline_minblobs || segments == 1) { 02162 xstarts[segments] = new_box.right (); 02163 } 02164 else { 02165 xstarts[--segments] = new_box.right (); 02166 } 02167 if (textord_oldbl_debug) 02168 tprintf ("Made %d segments on row at (%d,%d)\n", 02169 segments, box.right (), box.bottom ()); 02170 return needs_curve; 02171 }
Make rough characters.
Test wide objects for being potential underlines; if so, put them in a separate list in the block.
Definition at line 1679 of file makerow.cpp.
References TO_ROW::blob_list(), BOX::bottom(), C_BLOB::bounding_box(), PBLOB::bounding_box(), cprintf(), crotate_cblob(), TO_ROW::intercept(), BOX::left(), NULL, BOX::right(), rotate_blob(), rotate_cblob(), rotation, test_underline(), BOX::top(), tprintf(), and BOX::width().
Referenced by cleanup_rows().
01684 { 01685 BLOBNBOX *blob; //current blob 01686 PBLOB *poly_blob; //rotated blob 01687 C_BLOB *rotated_blob; //rotated blob 01688 TO_ROW *row; //current row 01689 float length; //of g_vec 01690 BOX blob_box; 01691 FCOORD blob_rotation; //inverse of rotation 01692 FCOORD g_vec; //skew rotation 01693 BLOBNBOX_IT blob_it; //iterator 01694 //iterator 01695 BLOBNBOX_IT under_it = &block->underlines; 01696 TO_ROW_IT row_it = block->get_rows (); 01697 01698 #ifdef TEXT_VERBOSE 01699 // gets a 'u', see ccmain/tesseractmain.dox 01700 cprintf("u"); 01701 #endif 01702 //length of vector 01703 length = sqrt (1 + gradient * gradient); 01704 g_vec = FCOORD (1 / length, -gradient / length); 01705 blob_rotation = FCOORD (rotation.x (), -rotation.y ()); 01706 blob_rotation.rotate (g_vec); //unoding everything 01707 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 01708 row = row_it.data (); 01709 //get blobs 01710 blob_it.set_to_list (row->blob_list ()); 01711 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); 01712 blob_it.forward ()) { 01713 blob = blob_it.data (); 01714 blob_box = blob->bounding_box (); 01715 if (blob_box.width () > block->line_size * textord_underline_width) { 01716 if (textord_cblob_blockocc && blob->cblob () != NULL) { 01717 rotated_blob = crotate_cblob (blob->cblob (), 01718 blob_rotation); 01719 if (test_underline (testing_on && textord_show_final_rows, 01720 rotated_blob, (INT16) row->intercept (), 01721 (INT16) (block->line_size * 01722 (textord_merge_x + 01723 textord_merge_asc / 2.0f)))) { 01724 under_it.add_after_then_move (blob_it.extract ()); 01725 if (testing_on && textord_show_final_rows) { 01726 tprintf ("Underlined blob at (%d,%d)->(%d,%d) ", 01727 rotated_blob->bounding_box ().left (), 01728 rotated_blob->bounding_box ().bottom (), 01729 rotated_blob->bounding_box ().right (), 01730 rotated_blob->bounding_box ().top ()); 01731 tprintf ("(Was (%d,%d)->(%d,%d))\n", 01732 blob_box.left (), blob_box.bottom (), 01733 blob_box.right (), blob_box.top ()); 01734 } 01735 } 01736 delete rotated_blob; 01737 } 01738 else { 01739 if (blob->blob () != NULL) { 01740 // if (testing_on && textord_show_final_rows) 01741 // tprintf("Rotating by (%g,%g)\n", 01742 // blob_rotation.x(),blob_rotation.y()); 01743 poly_blob = rotate_blob (blob->blob (), blob_rotation); 01744 } 01745 else 01746 poly_blob = rotate_cblob (blob->cblob (), 01747 block->line_size, 01748 blob_rotation); 01749 if (test_underline 01750 (testing_on 01751 && textord_show_final_rows, poly_blob, 01752 row->intercept (), 01753 block->line_size * (textord_merge_x + 01754 textord_merge_asc / 2))) { 01755 if (testing_on && textord_show_final_rows) { 01756 tprintf ("Underlined blob at (%d,%d)->(%d,%d) ", 01757 poly_blob->bounding_box ().left (), 01758 poly_blob->bounding_box ().bottom (), 01759 poly_blob->bounding_box ().right (), 01760 poly_blob->bounding_box ().top ()); 01761 tprintf ("(Was (%d,%d)->(%d,%d))\n", 01762 blob_box.left (), blob_box.bottom (), 01763 blob_box.right (), blob_box.top ()); 01764 } 01765 under_it.add_after_then_move (blob_it.extract ()); 01766 } 01767 delete poly_blob; 01768 } 01769 } 01770 } 01771 } 01772 }
static void vigorous_noise_removal | ( | TO_BLOCK * | block | ) | [static] |
Wipe out tiny blobs (but check for dot above 'i').
Iterate row, estimating xheight and looking for blobs smaller than kNoiseSize * that xheight. If found, first exclude the possibility it's the dot above the letter 'i' and if so, nuke it.
Definition at line 453 of file makerow.cpp.
References STATS::add(), dot_of_i(), BOX::height(), kMinSize, kNoiseSize, STATS::median(), and NULL.
Referenced by cleanup_rows().
00453 { 00454 TO_ROW_IT row_it = block->get_rows (); 00455 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00456 TO_ROW* row = row_it.data(); 00457 BLOBNBOX_IT b_it = row->blob_list(); 00458 // Estimate the xheight on the row. 00459 int max_height = 0; 00460 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00461 BLOBNBOX* blob = b_it.data(); 00462 if (blob->bounding_box().height() > max_height) 00463 max_height = blob->bounding_box().height(); 00464 } 00465 STATS hstats(0, max_height + 1); 00466 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00467 BLOBNBOX* blob = b_it.data(); 00468 int height = blob->bounding_box().height(); 00469 if (height >= kMinSize) 00470 hstats.add(blob->bounding_box().height(), 1); 00471 } 00472 float xheight = hstats.median(); 00473 // Delete small objects. 00474 BLOBNBOX* prev = NULL; 00475 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00476 BLOBNBOX* blob = b_it.data(); 00477 const BOX& box = blob->bounding_box(); 00478 if (box.height() < kNoiseSize * xheight) { 00479 // Small so delete unless it looks like an i dot. 00480 if (prev != NULL) { 00481 if (dot_of_i(blob, prev, row)) 00482 continue; // Looks OK. 00483 } 00484 if (!b_it.at_last()) { 00485 BLOBNBOX* next = b_it.data_relative(1); 00486 if (dot_of_i(blob, next, row)) 00487 continue; // Looks OK. 00488 } 00489 // It might be noise so get rid of it. 00490 if (blob->blob() != NULL) 00491 delete blob->blob(); 00492 if (blob->cblob() != NULL) 00493 delete blob->cblob(); 00494 delete b_it.extract(); 00495 } else { 00496 prev = blob; 00497 } 00498 } 00499 } 00500 }
const int kMinSize = 8 |
Min pixels to be xheight.
Definition at line 346 of file makerow.cpp.
Referenced by vigorous_noise_removal().
const double kNoiseSize = 0.5 |
Fraction of xheight.
Definition at line 344 of file makerow.cpp.
Referenced by vigorous_noise_removal().