textord/makerow.cpp File Reference

#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "lmedsq.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"

Go to the source code of this file.

Defines

Functions

Variables


Define Documentation

#define EXTERN

Note:
File: makerow.cpp (Formerly makerows.c)
Code to arrange blobs into rows of text.
Author:
Ray Smith
Date:
Sep 21 14:34:48 BST 1992
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Definition at line 40 of file makerow.cpp.

#define MAX_HEIGHT_MODES   12

FIX: What's a mode?

Definition at line 123 of file makerow.cpp.

Referenced by compute_row_xheight().


Function Documentation

void adjust_row_limits ( TO_BLOCK block  ) 

Tidy limits.

Change the limits of rows to suit the default fractions(tweeks).

Definition at line 1150 of file makerow.cpp.

References FALSE, TO_ROW::intercept(), TO_ROW::max_y(), TO_ROW::min_y(), TO_ROW::set_limits(), ymax, and ymin.

Referenced by expand_rows().

01152                         {
01153   TO_ROW *row;                   //current row
01154   float size;                    //size of row
01155   float ymax;                    //top of row
01156   float ymin;                    //bottom of row
01157   TO_ROW_IT row_it = block->get_rows ();
01158 
01159   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01160     row = row_it.data ();
01161     size = row->max_y () - row->min_y ();
01162     size /= textord_merge_x + textord_merge_asc + textord_merge_desc;
01163     ymax = size * (textord_merge_x + textord_merge_asc);
01164     ymin = -size * textord_merge_desc;
01165     row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
01166     row->merged = FALSE;
01167   }
01168 }

void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Find lines.

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

Definition at line 2275 of file makerow.cpp.

References TO_ROW::add_blob(), ASSIGN, blob_x_order(), cprintf(), draw2d, f, TO_ROW::initial_min_y(), TO_ROW::max_y(), TO_ROW::min_y(), most_overlapping_row(), move2d, NEW_ROW, NULL, REJECT, to_win, and tprintf().

Referenced by cleanup_rows(), expand_rows(), and make_initial_textrows().

02282                            {
02283   OVERLAP_STATE overlap_result;  //what to do with it
02284   float ycoord;                  //current y
02285   float top, bottom;             //of blob
02286   float g_length = 1.0f;         //from gradient
02287   INT16 row_count;               //no of rows
02288   INT16 left_x;                  //left edge
02289   INT16 last_x;                  //previous edge
02290   float block_skew;              //y delta
02291   float smooth_factor;           //for new coords
02292   float near_dist;               //dist to nearest row
02293   ICOORD testpt;                 //testing only
02294   BLOBNBOX *blob;                //current blob
02295   TO_ROW *row;                   //current row
02296   TO_ROW *dest_row;              //row to put blob in
02297                                  //iterators
02298   BLOBNBOX_IT blob_it = &block->blobs;
02299   TO_ROW_IT row_it = block->get_rows ();
02300 
02301 #ifdef TEXT_VERBOSE
02302   // gets a 'l', see ccmain/tesseractmain.dox
02303   cprintf("l");
02304 #endif
02305   ycoord =
02306     (block->block->bounding_box ().bottom () +
02307     block->block->bounding_box ().top ()) / 2.0f;
02308   if (gradient != NULL)
02309     g_length = sqrt (1 + *gradient * *gradient);
02310 #ifndef GRAPHICS_DISABLED
02311   if (drawing_skew)
02312     move2d (to_win, block->block->bounding_box ().left (), ycoord);
02313 #endif
02314   testpt = ICOORD (textord_test_x, textord_test_y);
02315   blob_it.sort (blob_x_order);
02316   smooth_factor = 1.0;
02317   block_skew = 0.0f;
02318   row_count = row_it.length ();  //might have rows
02319   if (!blob_it.empty ()) {
02320     left_x = blob_it.data ()->bounding_box ().left ();
02321   }
02322   else {
02323     left_x = block->block->bounding_box ().left ();
02324   }
02325   last_x = left_x;
02326   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
02327     blob = blob_it.data ();
02328     if (gradient != NULL) {
02329       block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
02330         + *gradient / g_length * blob->bounding_box ().left ();
02331     }
02332     else if (blob->bounding_box ().left () - last_x > block->line_size / 2
02333       && last_x - left_x > block->line_size * 2
02334     && textord_interpolating_skew) {
02335       //                      tprintf("Interpolating skew from %g",block_skew);
02336       block_skew *= (float) (blob->bounding_box ().left () - left_x)
02337         / (last_x - left_x);
02338       //                      tprintf("to %g\n",block_skew);
02339     }
02340     last_x = blob->bounding_box ().left ();
02341     top = blob->bounding_box ().top () - block_skew;
02342     bottom = blob->bounding_box ().bottom () - block_skew;
02343 #ifndef GRAPHICS_DISABLED
02344     if (drawing_skew)
02345       draw2d (to_win, blob->bounding_box ().left (), ycoord + block_skew);
02346 #endif
02347     if (!row_it.empty ()) {
02348       for (row_it.move_to_first ();
02349         !row_it.at_last () && row_it.data ()->min_y () > top;
02350         row_it.forward ());
02351       row = row_it.data ();
02352       if (row->min_y () <= top && row->max_y () >= bottom) {
02353       //any overlap
02354         dest_row = row;
02355         overlap_result = most_overlapping_row (&row_it, dest_row,
02356           top, bottom,
02357           block->line_size,
02358           blob->bounding_box ().
02359           contains (testpt));
02360         if (overlap_result == NEW_ROW && !reject_misses)
02361           overlap_result = ASSIGN;
02362       }
02363       else {
02364         overlap_result = NEW_ROW;
02365         if (!make_new_rows) {
02366           near_dist = row_it.data_relative (-1)->min_y () - top;
02367                                  //below bottom
02368           if (bottom < row->min_y ()) {
02369             if (row->min_y () - bottom <=
02370               (block->line_spacing -
02371             block->line_size) * textord_merge_desc) {
02372                                  //done it
02373               overlap_result = ASSIGN;
02374               dest_row = row;
02375             }
02376           }
02377           else if (near_dist > 0
02378           && near_dist < bottom - row->max_y ()) {
02379             row_it.backward ();
02380             dest_row = row_it.data ();
02381             if (dest_row->min_y () - bottom <=
02382               (block->line_spacing -
02383             block->line_size) * textord_merge_desc) {
02384                                  //done it
02385               overlap_result = ASSIGN;
02386             }
02387           }
02388           else {
02389             if (top - row->max_y () <=
02390               (block->line_spacing -
02391               block->line_size) * (textord_merge_x +
02392             textord_merge_asc)) {
02393                                  //done it
02394               overlap_result = ASSIGN;
02395               dest_row = row;
02396             }
02397           }
02398         }
02399       }
02400       if (overlap_result == ASSIGN)
02401         dest_row->add_blob (blob_it.extract (), top, bottom,
02402           block->line_size);
02403       if (overlap_result == NEW_ROW) {
02404         if (make_new_rows && top - bottom < block->max_blob_size) {
02405           dest_row =
02406             new TO_ROW (blob_it.extract (), top, bottom,
02407             block->line_size);
02408           row_count++;
02409           if (bottom > row_it.data ()->min_y ())
02410             row_it.add_before_then_move (dest_row);
02411           //insert in right place
02412           else
02413             row_it.add_after_then_move (dest_row);
02414           smooth_factor =
02415             1.0 / (row_count * textord_skew_lag +
02416             textord_skewsmooth_offset);
02417         }
02418         else
02419           overlap_result = REJECT;
02420       }
02421     }
02422     else if (make_new_rows && top - bottom < block->max_blob_size) {
02423       overlap_result = NEW_ROW;
02424       dest_row =
02425         new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
02426       row_count++;
02427       row_it.add_after_then_move (dest_row);
02428       smooth_factor = 1.0 / (row_count * textord_skew_lag + 1);
02429     }
02430     else
02431       overlap_result = REJECT;
02432     if (blob->bounding_box ().contains (testpt)) {
02433       if (overlap_result != REJECT) {
02434         tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
02435           dest_row->min_y (), dest_row->max_y (), pass);
02436       }
02437       else {
02438         tprintf ("Test blob assigned to no row on pass %d\n", pass);
02439       }
02440     }
02441     if (overlap_result != REJECT) {
02442       while (!row_it.at_first ()
02443         && row_it.data ()->min_y () >
02444       row_it.data_relative (-1)->min_y ()) {
02445         row = row_it.extract ();
02446         row_it.backward ();
02447         row_it.add_before_then_move (row);
02448       }
02449       while (!row_it.at_last ()
02450         && row_it.data ()->min_y () <
02451       row_it.data_relative (1)->min_y ()) {
02452         row = row_it.extract ();
02453         row_it.forward ();
02454                                  //keep rows in order
02455         row_it.add_after_then_move (row);
02456       }
02457       block_skew = (1 - smooth_factor) * block_skew
02458         + smooth_factor * (blob->bounding_box ().bottom () -
02459         dest_row->initial_min_y ());
02460     }
02461   }
02462   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
02463     if (row_it.data ()->blob_list ()->empty ())
02464       delete row_it.extract ();  //discard empty rows
02465   }
02466 }

int blob_x_order ( const void *  item1,
const void *  item2 
)

Sort function.

Sort function to sort blobs in x from page's left-hand side.

Definition at line 2579 of file makerow.cpp.

Referenced by assign_blobs_to_rows(), cleanup_rows(), and most_overlapping_row().

02581                                     {
02582                                  //converted ptr
02583   BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
02584                                  //converted ptr
02585   BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
02586 
02587   if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
02588     return -1;
02589   else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
02590     return 1;
02591   else
02592     return 0;
02593 }

void cleanup_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
INT32  block_edge,
BOOL8  testing_on 
)

Find lines.

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 507 of file makerow.cpp.

References assign_blobs_to_rows(), blob_x_order(), compute_block_xheight(), CORAL, create_to_win(), delete_non_dropout_rows(), draw_meanlines(), expand_rows(), FALSE, fit_parallel_rows(), MAGENTA, make_spline_rows(), NO_WINDOW, plot_blob_list(), pre_associate_blobs(), restore_underlined_blobs(), rotation, separate_underlines(), to_win, tprintf(), TRUE, vigorous_noise_removal(), WHITE, and YELLOW.

Referenced by make_rows().

00514                    {
00515                                  //iterators
00516   BLOBNBOX_IT blob_it = &block->blobs;
00517   TO_ROW_IT row_it = block->get_rows ();
00518 
00519 #ifndef GRAPHICS_DISABLED
00520   if (textord_show_parallel_rows && testing_on) {
00521     if (to_win == NO_WINDOW)
00522       create_to_win(page_tr);
00523   }
00524 #endif
00525                                  //get row coords
00526   fit_parallel_rows(block,
00527                     gradient,
00528                     rotation,
00529                     block_edge,
00530                     textord_show_parallel_rows &&testing_on);
00531   delete_non_dropout_rows(block,
00532                           gradient,
00533                           rotation,
00534                           block_edge,
00535                           textord_show_parallel_rows &&testing_on);
00536   expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
00537   blob_it.set_to_list (&block->blobs);
00538   row_it.set_to_list (block->get_rows ());
00539   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00540     blob_it.add_list_after (row_it.data ()->blob_list ());
00541   //give blobs back
00542   assign_blobs_to_rows (block, &gradient, 1, TRUE, TRUE, FALSE);
00543   //now new rows must be genuine
00544   blob_it.set_to_list (&block->blobs);
00545   blob_it.add_list_after (&block->large_blobs);
00546   assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
00547   //safe to use big ones now
00548   blob_it.set_to_list (&block->blobs);
00549                                  //throw all blobs in
00550   blob_it.add_list_after (&block->noise_blobs);
00551   blob_it.add_list_after (&block->small_blobs);
00552   assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
00553   //no rows for noise
00554   row_it.set_to_list (block->get_rows ());
00555   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00556     row_it.data ()->blob_list ()->sort (blob_x_order);
00557   fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
00558   // if () new in v1.03
00559   if (textord_heavy_nr) {
00560     vigorous_noise_removal(block);
00561   }
00562   separate_underlines(block, gradient, rotation, testing_on);
00563   pre_associate_blobs(page_tr, block, rotation, testing_on);
00564 
00565 #ifndef GRAPHICS_DISABLED
00566   if (textord_show_final_rows && testing_on) {
00567     if (to_win == NO_WINDOW)
00568       create_to_win(page_tr);
00569   }
00570 #endif
00571 
00572   fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);
00573   //              textord_show_final_rows && testing_on);
00574   make_spline_rows(block,
00575                    gradient,
00576                    rotation,
00577                    block_edge,
00578                    textord_show_final_rows &&testing_on);
00579   if (!textord_old_xheight || !textord_old_baselines)
00580     compute_block_xheight(block, gradient);
00581   if (textord_restore_underlines)
00582                                  //fix underlines
00583     restore_underlined_blobs(block);
00584 #ifndef GRAPHICS_DISABLED
00585   if (textord_show_final_rows && testing_on) {
00586     plot_blob_list (to_win, &block->blobs, MAGENTA, WHITE);
00587     //show discarded blobs
00588     plot_blob_list (to_win, &block->underlines, YELLOW, CORAL);
00589   }
00590   if (textord_show_final_rows && testing_on && block->blobs.length () > 0)
00591     tprintf ("%d blobs discarded as noise\n", block->blobs.length ());
00592   if (textord_show_final_rows && testing_on) {
00593     draw_meanlines(block, gradient, block_edge, WHITE, rotation);
00594   }
00595 #endif
00596 }

void compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Find lines xheight.

Compute the xheight of the individual rows, then correlate them and interpret ascenderless lines, correcting xheights.

Definition at line 1267 of file makerow.cpp.

References STATS::add(), TO_ROW::ascrise, compute_row_xheight(), correct_row_xheight(), TO_ROW::descdrop, STATS::ile(), median_block_xheight(), STATS::set_range(), and TO_ROW::xheight.

Referenced by cleanup_rows().

01270                             {
01271   TO_ROW *row;                   //current row
01272   int xh_count, desc_count;      //no of samples
01273   float block_median;            //median blob size
01274   int asc_count, cap_count;
01275   INT32 min_size, max_size;      //limits on xheight
01276   INT32 evidence;                //no of samples on row
01277   float xh_sum, desc_sum;        //for averages
01278   float asc_sum, cap_sum;
01279   TO_ROW_IT row_it = block->get_rows ();
01280   STATS row_heights;             //block evidence
01281 
01282   if (row_it.empty ())
01283     return;                      //no rows
01284   block_median = median_block_xheight (block, gradient);
01285   block_median *= 2;
01286   if (block_median < block->line_size)
01287     block_median = block->line_size;
01288   //      tprintf("Block median=%g, linesize=%g\n",
01289   //              block_median,block->line_size);
01290   max_size = (INT32) ceil (block_median);
01291   min_size = (INT32) floor (block_median * textord_minxh);
01292   row_heights.set_range (min_size, max_size + 1);
01293   xh_count = desc_count = asc_count = cap_count = 0;
01294   xh_sum = desc_sum = asc_sum = cap_sum = 0.0f;
01295   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01296     row = row_it.data ();
01297     evidence = compute_row_xheight (row, min_size, max_size, gradient);
01298     if (row->xheight > 0 && row->ascrise > 0) {
01299       row_heights.add ((INT32) row->xheight, evidence);
01300       xh_count += evidence;
01301       asc_sum += row->ascrise;
01302       asc_count++;
01303     }
01304     else if (row->xheight > 0) {
01305       cap_sum += row->xheight;   //assume just caps
01306       cap_count++;
01307     }
01308     if (row->descdrop != 0) {
01309       desc_sum += row->descdrop;
01310       desc_count++;
01311     }
01312   }
01313   if (xh_count > 0) {
01314                                  //median
01315     xh_sum = row_heights.ile (0.5);
01316     asc_sum /= asc_count;
01317   }
01318   else if (cap_count > 0) {
01319     cap_sum /= cap_count;        //must assume caps
01320     xh_sum =
01321       cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc);
01322     asc_sum =
01323       cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc);
01324   }
01325   else {
01326                                  //default sizes
01327     xh_sum = block_median * textord_merge_x;
01328     asc_sum = block_median * textord_merge_asc;
01329   }
01330   if (desc_count > 0) {
01331     desc_sum /= desc_count;
01332   }
01333   else {
01334     desc_sum = xh_sum * textord_merge_desc / textord_merge_x;
01335   }
01336   // tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n",
01337   //         xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count,
01338   //         cap_sum,cap_count);
01339   if (xh_sum < textord_min_xheight)
01340     xh_sum = (float) textord_min_xheight;
01341   block->xheight = xh_sum;
01342   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01343     correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum);
01344   }
01345 }

void compute_dropout_distances ( INT32 occupation,
INT32 thresholds,
INT32  line_count 
)

Project blobs.

Compute the distance from each coordinate to the nearest dropout.

Definition at line 963 of file makerow.cpp.

Referenced by delete_non_dropout_rows().

00967                                 {
00968   INT32 line_index;              //of thresholds line
00969   INT32 distance;                //from prev dropout
00970   INT32 next_dist;               //to next dropout
00971   INT32 back_index;              //for back filling
00972   INT32 prev_threshold;          //before overwrite
00973 
00974   distance = -line_count;
00975   line_index = 0;
00976   do {
00977     do {
00978       distance--;
00979       prev_threshold = thresholds[line_index];
00980                                  //distance from prev
00981       thresholds[line_index] = distance;
00982       line_index++;
00983     }
00984     while (line_index < line_count
00985       && (occupation[line_index] < thresholds[line_index]
00986       || occupation[line_index - 1] >= prev_threshold));
00987     if (line_index < line_count) {
00988       back_index = line_index - 1;
00989       next_dist = 1;
00990       while (next_dist < -distance && back_index >= 0) {
00991         thresholds[back_index] = next_dist;
00992         back_index--;
00993         next_dist++;
00994         distance++;
00995       }
00996       distance = 1;
00997     }
00998   }
00999   while (line_index < line_count);
01000 }

INT32 compute_height_modes ( STATS heights,
INT32  min_height,
INT32  max_height,
INT32 modes,
INT32  maxmodes 
)

Find lines.

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

Todo:
Assuming that the baseline represents the median height, ascenders would be one "mode" and descenders would be a second "mode". Since different ascenders/descenders "ascend"/vs only so high/vs, there may be a half dozen "modes"... but why is the limit (MAX_HEIGHT_MODES) so high (12)?

Definition at line 1557 of file makerow.cpp.

References MAX_INT32, and STATS::pile_count().

Referenced by compute_row_xheight().

01563                             {
01564   INT32 pile_count;              //no in source pile
01565   INT32 src_count;               //no of source entries
01566   INT32 src_index;               //current entry
01567   INT32 least_count;             //height of smalllest
01568   INT32 least_index;             //index of least
01569   INT32 dest_count;              //index in modes
01570 
01571   src_count = max_height + 1 - min_height;
01572   dest_count = 0;
01573   least_count = MAX_INT32;
01574   least_index = -1;
01575   for (src_index = 0; src_index < src_count; src_index++) {
01576     pile_count = heights->pile_count (min_height + src_index);
01577     if (pile_count > 0) {
01578       if (dest_count < maxmodes) {
01579         if (pile_count < least_count) {
01580                                  //find smallest in array
01581           least_count = pile_count;
01582           least_index = dest_count;
01583         }
01584         modes[dest_count++] = min_height + src_index;
01585       }
01586       else if (pile_count >= least_count) {
01587         while (least_index < maxmodes - 1) {
01588           modes[least_index] = modes[least_index + 1];
01589           //shuffle up
01590           least_index++;
01591         }
01592                                  //new one on end
01593         modes[maxmodes - 1] = min_height + src_index;
01594         if (pile_count == least_count) {
01595                                  //new smallest
01596           least_index = maxmodes - 1;
01597         }
01598         else {
01599           least_count = heights->pile_count (modes[0]);
01600           least_index = 0;
01601           for (dest_count = 1; dest_count < maxmodes; dest_count++) {
01602             pile_count = heights->pile_count (modes[dest_count]);
01603             if (pile_count < least_count) {
01604                                  //find smallest
01605               least_count = pile_count;
01606               least_index = dest_count;
01607             }
01608           }
01609         }
01610       }
01611     }
01612   }
01613   return dest_count;
01614 }

void compute_line_occupation ( TO_BLOCK block,
float  gradient,
INT32  min_y,
INT32  max_y,
INT32 occupation,
INT32 deltas 
)

Project blobs.

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

Definition at line 813 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::bottom(), INT32FORMAT, BOX::left(), BOX::right(), BOX::rotate(), rotation, and BOX::top().

Referenced by delete_non_dropout_rows().

00820                               {
00821   INT32 line_count;              //maxy-miny+1
00822   INT32 line_index;              //of scan line
00823   float top, bottom;             //coords of blob
00824   INT32 width;                   //of blob
00825   INT32 idx;                     //for -O3 bug on some platforms
00826   TO_ROW *row;                   //current row
00827   TO_ROW_IT row_it = block->get_rows ();
00828   BLOBNBOX *blob;                //current blob
00829   BLOBNBOX_IT blob_it;           //iterator
00830   float length;                  //of skew vector
00831   BOX blob_box;                  //bounding box
00832   FCOORD rotation;               //inverse of skew
00833 
00834   line_count = max_y - min_y + 1;
00835   length = sqrt (gradient * gradient + 1);
00836   rotation = FCOORD (1 / length, -gradient / length);
00837   for (line_index = 0; line_index < line_count; line_index++)
00838     deltas[line_index] = 0;
00839   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00840     row = row_it.data ();
00841     blob_it.set_to_list (row->blob_list ());
00842     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
00843     blob_it.forward ()) {
00844       blob = blob_it.data ();
00845       blob_box = blob->bounding_box ();
00846       blob_box.rotate (rotation);//de-skew it
00847       top = blob_box.top ();
00848       bottom = blob_box.bottom ();
00849       width =
00850         (INT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
00851       if ((INT32) floor (bottom) < min_y
00852         || (INT32) floor (bottom) - min_y >= line_count)
00853         fprintf (stderr,
00854           "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
00855           INT32FORMAT ")\n", (INT32) floor (bottom), min_y, max_y);
00856                                  //count transitions
00857       //for -O3 bug on some platforms
00858       //deltas[(INT32) floor (bottom) - min_y] += width;
00859       idx = (INT32) floor (bottom) - min_y;
00860       deltas[idx] += width;
00861       if ((INT32) floor (top) < min_y
00862         || (INT32) floor (top) - min_y >= line_count)
00863         fprintf (stderr,
00864           "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
00865           INT32FORMAT ")\n", (INT32) floor (top), min_y, max_y);
00866       //deltas[(INT32) floor (top) - min_y] -= width;
00867       idx = (INT32) floor (top) - min_y;
00868       deltas[idx] -= width;
00869     }
00870   }
00871   occupation[0] = deltas[0];
00872   for (line_index = 1; line_index < line_count; line_index++)
00873     occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
00874 }

void compute_occupation_threshold ( INT32  low_window,
INT32  high_window,
INT32  line_count,
INT32 occupation,
INT32 thresholds 
)

Project blobs.

Compute thresholds for textline or not for the occupation array.

Definition at line 882 of file makerow.cpp.

Referenced by delete_non_dropout_rows().

00888                                    {
00889   INT32 line_index;              //of thresholds line
00890   INT32 low_index;               //in occupation
00891   INT32 high_index;              //in occupation
00892   INT32 sum;                     //current average
00893   INT32 divisor;                 //to get thresholds
00894   INT32 min_index;               //of min occ
00895   INT32 min_occ;                 //min in locality
00896   INT32 test_index;              //for finding min
00897 
00898   divisor =
00899     (INT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
00900   if (low_window + high_window < line_count) {
00901     for (sum = 0, high_index = 0; high_index < low_window; high_index++)
00902       sum += occupation[high_index];
00903     for (low_index = 0; low_index < high_window; low_index++, high_index++)
00904       sum += occupation[high_index];
00905     min_occ = occupation[0];
00906     min_index = 0;
00907     for (test_index = 1; test_index < high_index; test_index++) {
00908       if (occupation[test_index] <= min_occ) {
00909         min_occ = occupation[test_index];
00910         min_index = test_index;  //find min in region
00911       }
00912     }
00913     for (line_index = 0; line_index < low_window; line_index++)
00914       thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
00915     //same out to end
00916     for (low_index = 0; high_index < line_count; low_index++, high_index++) {
00917       sum -= occupation[low_index];
00918       sum += occupation[high_index];
00919       if (occupation[high_index] <= min_occ) {
00920                                  //find min in region
00921         min_occ = occupation[high_index];
00922         min_index = high_index;
00923       }
00924                                  //lost min from region
00925       if (min_index <= low_index) {
00926         min_occ = occupation[low_index + 1];
00927         min_index = low_index + 1;
00928         for (test_index = low_index + 2; test_index <= high_index;
00929         test_index++) {
00930           if (occupation[test_index] <= min_occ) {
00931             min_occ = occupation[test_index];
00932                                  //find min in region
00933             min_index = test_index;
00934           }
00935         }
00936       }
00937       thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
00938     }
00939   }
00940   else {
00941     min_occ = occupation[0];
00942     min_index = 0;
00943     for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
00944       if (occupation[low_index] < min_occ) {
00945         min_occ = occupation[low_index];
00946         min_index = low_index;
00947       }
00948       sum += occupation[low_index];
00949     }
00950     line_index = 0;
00951   }
00952   for (; line_index < line_count; line_index++)
00953     thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
00954   //same out to end
00955 }

void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Get average gradient.

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

Definition at line 251 of file makerow.cpp.

References ABORT, alloc_mem(), blob_count, TO_ROW::blob_list(), choose_nth_item(), cprintf(), ERRCODE::error(), free_mem(), TO_ROW::line_error(), TO_ROW::line_m(), MEMORY_OUT, and NULL.

Referenced by make_rows().

00255                         {
00256   INT32 row_count;               //total rows
00257   INT32 blob_count;              //total_blobs
00258   INT32 row_err;                 //integer error
00259   float *gradients;              //of rows
00260   float *errors;                 //of rows
00261   INT32 row_index;               //of total
00262   TO_ROW *row;                   //current row
00263   TO_BLOCK_IT block_it = blocks; //iterator
00264   TO_ROW_IT row_it;
00265 
00266   row_count = 0;
00267   blob_count = 0;
00268 
00269 #ifdef TEXT_VERBOSE
00270   // gets a 'q', see ccmain/tesseractmain.dox
00271   cprintf("q");
00272 #endif
00273   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00274   block_it.forward ()) {
00275     row_count += block_it.data ()->get_rows ()->length ();
00276     //count up rows
00277     row_it.set_to_list (block_it.data ()->get_rows ());
00278     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00279       blob_count += row_it.data ()->blob_list ()->length ();
00280   }
00281   if (row_count == 0) {
00282     page_m = 0.0f;
00283     page_err = 0.0f;
00284     return;
00285   }
00286   gradients = (float *) alloc_mem (blob_count * sizeof (float));
00287   //get mem
00288   errors = (float *) alloc_mem (blob_count * sizeof (float));
00289   if (gradients == NULL || errors == NULL)
00290     MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
00291 
00292   row_index = 0;
00293   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00294   block_it.forward ()) {
00295     row_it.set_to_list (block_it.data ()->get_rows ());
00296     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00297       row = row_it.data ();
00298       blob_count = row->blob_list ()->length ();
00299       row_err = (INT32) ceil (row->line_error ());
00300       if (row_err <= 0)
00301         row_err = 1;
00302       if (textord_biased_skewcalc) {
00303         blob_count /= row_err;
00304         for (blob_count /= row_err; blob_count > 0; blob_count--) {
00305           gradients[row_index] = row->line_m ();
00306           errors[row_index] = row->line_error ();
00307           row_index++;
00308         }
00309       }
00310       else if (blob_count >= textord_min_blobs_in_row) {
00311                                  //get gradient
00312         gradients[row_index] = row->line_m ();
00313         errors[row_index] = row->line_error ();
00314         row_index++;
00315       }
00316     }
00317   }
00318   if (row_index == 0) {
00319                                  //desperate
00320     for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00321     block_it.forward ()) {
00322       row_it.set_to_list (block_it.data ()->get_rows ());
00323       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
00324       row_it.forward ()) {
00325         row = row_it.data ();
00326         gradients[row_index] = row->line_m ();
00327         errors[row_index] = row->line_error ();
00328         row_index++;
00329       }
00330     }
00331   }
00332   row_count = row_index;
00333   row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile),
00334     gradients, row_count);
00335   page_m = gradients[row_index];
00336   row_index = choose_nth_item ((INT32) (row_count * textord_skew_ile),
00337     errors, row_count);
00338   page_err = errors[row_index];
00339   free_mem(gradients);
00340   free_mem(errors);
00341 }

INT32 compute_row_descdrop ( TO_ROW row,
float  gradient 
)

Find row descender drop.

Estimate the descdrop of this row.

Definition at line 1511 of file makerow.cpp.

References STATS::add(), blob_count, TO_ROW::blob_list(), f, STATS::mode(), TO_ROW::parallel_c(), STATS::pile_count(), and TO_ROW::xheight.

Referenced by compute_row_xheight().

01514                             {
01515   INT32 min_height = (INT32) floor (row->xheight * textord_descx_ratio_min);
01516   INT32 max_height = (INT32) floor (row->xheight * textord_descx_ratio_max);
01517   float xcentre;                 //centre of blob
01518   float height;                  //height of blob
01519   BLOBNBOX_IT blob_it = row->blob_list ();
01520   BLOBNBOX *blob;                //current blob
01521   INT32 blob_count;              //blobs in block
01522   INT32 blob_index;              //current blob
01523   STATS heights (min_height, max_height + 1);
01524 
01525   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01526     blob = blob_it.data ();
01527     if (!blob->joined_to_prev ()) {
01528       xcentre =
01529         (blob->bounding_box ().left () +
01530         blob->bounding_box ().right ()) / 2.0f;
01531       height =
01532         gradient * xcentre + row->parallel_c () -
01533         blob->bounding_box ().bottom ();
01534       if (height >= min_height && height <= max_height)
01535         heights.add ((INT32) floor (height + 0.5), 1);
01536     }
01537   }
01538   blob_index = heights.mode ();  //find mode
01539                                  //get count of mode
01540   blob_count = heights.pile_count (blob_index);
01541   return blob_count > 0 ? -blob_index : 0;
01542 }

void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Find lines stats.

Compute the linespacing and offset.

Definition at line 1176 of file makerow.cpp.

References ABORT, alloc_mem(), choose_nth_item(), ERRCODE::error(), free_mem(), TO_ROW::intercept(), MEMORY_OUT, NULL, TO_ROW::parallel_c(), row_spacing_order(), TO_ROW::spacing, and tprintf().

Referenced by expand_rows().

01179                         {
01180   INT32 row_index;               //of median
01181   TO_ROW *row;                   //current row
01182   TO_ROW *prev_row;              //previous row
01183   float iqr;                     //inter quartile range
01184   TO_ROW_IT row_it = block->get_rows ();
01185                                  //number of rows
01186   INT16 rowcount = row_it.length ();
01187   TO_ROW **rows;                 //for choose nth
01188 
01189   rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
01190   if (rows == NULL)
01191     MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
01192   rowcount = 0;
01193   prev_row = NULL;
01194   row_it.move_to_last ();        //start at bottom
01195   do {
01196     row = row_it.data ();
01197     if (prev_row != NULL) {
01198       rows[rowcount++] = prev_row;
01199       prev_row->spacing = row->intercept () - prev_row->intercept ();
01200       if (testing_on)
01201         tprintf ("Row at %g yields spacing of %g\n",
01202           row->intercept (), prev_row->spacing);
01203     }
01204     prev_row = row;
01205     row_it.backward ();
01206   }
01207   while (!row_it.at_last ());
01208   block->key_row = prev_row;
01209   block->baseline_offset =
01210     fmod (prev_row->parallel_c (), block->line_spacing);
01211   if (testing_on)
01212     tprintf ("Blob based spacing=(%g,%g), offset=%g",
01213       block->line_size, block->line_spacing, block->baseline_offset);
01214   if (rowcount > 0) {
01215     row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
01216       sizeof (TO_ROW *), row_spacing_order);
01217     iqr = rows[row_index]->spacing;
01218     row_index = choose_nth_item (rowcount / 4, rows, rowcount,
01219       sizeof (TO_ROW *), row_spacing_order);
01220     iqr -= rows[row_index]->spacing;
01221     row_index = choose_nth_item (rowcount / 2, rows, rowcount,
01222       sizeof (TO_ROW *), row_spacing_order);
01223     block->key_row = rows[row_index];
01224     if (testing_on)
01225       tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
01226     if (rowcount > 2
01227     && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
01228       if (!textord_new_initial_xheight) {
01229         if (rows[row_index]->spacing < block->line_spacing
01230           && rows[row_index]->spacing > block->line_size)
01231           //within range
01232           block->line_size = rows[row_index]->spacing;
01233         //spacing=size
01234         else if (rows[row_index]->spacing > block->line_spacing)
01235           block->line_size = block->line_spacing;
01236         //too big so use max
01237       }
01238       else {
01239         if (rows[row_index]->spacing < block->line_spacing)
01240           block->line_size = rows[row_index]->spacing;
01241         else
01242           block->line_size = block->line_spacing;
01243         //too big so use max
01244       }
01245       if (block->line_size < textord_min_xheight)
01246         block->line_size = (float) textord_min_xheight;
01247       block->line_spacing = rows[row_index]->spacing;
01248       block->max_blob_size =
01249         block->line_spacing * textord_excess_blobsize;
01250     }
01251     block->baseline_offset = fmod (rows[row_index]->intercept (),
01252       block->line_spacing);
01253   }
01254   if (testing_on)
01255     tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
01256       block->line_size, block->line_spacing, block->baseline_offset);
01257   free_mem(rows);
01258 }

INT32 compute_row_xheight ( TO_ROW row,
INT32  min_height,
INT32  max_height,
float  gradient 
)

Find row xheight.

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time.

Definition at line 1408 of file makerow.cpp.

References STATS::add(), TO_ROW::ascrise, TO_ROW::baseline, blob_count, TO_ROW::blob_list(), compute_height_modes(), compute_row_descdrop(), TO_ROW::descdrop, f, FALSE, STATS::get_total(), MAX_HEIGHT_MODES, MAX_INT32, STATS::mode(), TO_ROW::parallel_c(), STATS::pile_count(), tprintf(), TRUE, TO_ROW::xheight, and QSPLINE::y().

Referenced by compute_block_xheight().

01413                            {
01414   BOOL8 in_best_pile;            //control of mode size
01415   INT32 prev_size;               //previous size
01416   float xcentre;                 //centre of blob
01417   float height;                  //height of blob
01418   BLOBNBOX_IT blob_it = row->blob_list ();
01419   BLOBNBOX *blob;                //current blob
01420   INT32 blob_count;              //blobs in block
01421   INT32 x;                       //xheight index
01422   INT32 asc;                     //ascender index
01423   INT32 blob_index;              //current blob
01424   INT32 mode_count;              //no of modes
01425   INT32 best_count;              //count of best x so far
01426   float ratio;                   //size ratio
01427   INT32 modes[MAX_HEIGHT_MODES]; //biggest piles
01428   STATS heights (min_height, max_height + 1);
01429 
01430   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01431     blob = blob_it.data ();
01432     if (!blob->joined_to_prev ()) {
01433       xcentre =
01434         (blob->bounding_box ().left () +
01435         blob->bounding_box ().right ()) / 2.0f;
01436       height = blob->bounding_box ().top ();
01437       if (textord_fix_xheight_bug)
01438         height -= row->baseline.y (xcentre);
01439       else
01440         height -= gradient * xcentre + row->parallel_c ();
01441       if (height >= min_height && height <= max_height
01442         && (!textord_xheight_tweak || height > textord_min_xheight))
01443         heights.add ((INT32) floor (height + 0.5), 1);
01444     }
01445   }
01446   blob_index = heights.mode ();  //find mode
01447                                  //get count of mode
01448   blob_count = heights.pile_count (blob_index);
01449   if (textord_debug_xheights)
01450     tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n",
01451       min_height, max_height, blob_index, blob_count,
01452       heights.get_total (), row->blob_list ()->length ());
01453   row->ascrise = 0.0f;
01454   row->xheight = 0.0f;
01455   row->descdrop = 0.0f;          //undefined;
01456   in_best_pile = FALSE;
01457   prev_size = -MAX_INT32;
01458   best_count = 0;
01459   if (blob_count > 0) {
01460                                  //get biggest ones
01461     mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES);
01462     for (x = 0; x < mode_count - 1; x++) {
01463       if (modes[x] != prev_size + 1)
01464         in_best_pile = FALSE;    //had empty height
01465       if (heights.pile_count (modes[x])
01466         >= blob_count * textord_xheight_mode_fraction
01467       && (in_best_pile || heights.pile_count (modes[x]) > best_count)) {
01468         for (asc = x + 1; asc < mode_count; asc++) {
01469           ratio = (float) modes[asc] / modes[x];
01470           if (textord_ascx_ratio_min < ratio
01471             && ratio < textord_ascx_ratio_max
01472             && heights.pile_count (modes[asc])
01473           >= blob_count * textord_ascheight_mode_fraction) {
01474             if (heights.pile_count (modes[x]) > best_count) {
01475               in_best_pile = TRUE;
01476               best_count = heights.pile_count (modes[x]);
01477             }
01478             // tprintf("X=%d, asc=%d, count=%d,  ratio=%g\n",
01479             //         modes[x],modes[asc]-modes[x],
01480             //         heights.pile_count(modes[x]),
01481             //         ratio);
01482             prev_size = modes[x];
01483             row->xheight = (float) modes[x];
01484             row->ascrise = (float) (modes[asc] - modes[x]);
01485           }
01486         }
01487       }
01488     }
01489     if (row->xheight == 0) {
01490                                  //single mode
01491       row->xheight = (float) blob_index;
01492       row->ascrise = 0.0f;
01493       if (textord_debug_xheights)
01494         tprintf ("Single mode xheight set to %g\n", row->xheight);
01495     }
01496     else if (textord_debug_xheights)
01497       tprintf ("Multi-mode xheight set to %g, asc=%g\n",
01498           row->xheight, row->ascrise);
01499     row->descdrop = (float) compute_row_descdrop (row, gradient);
01500     //find descenders
01501   }
01502   return best_count;
01503 }

void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Fix bad row xht values.

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

Definition at line 1623 of file makerow.cpp.

References TO_ROW::all_caps, TO_ROW::ascrise, TO_ROW::descdrop, FALSE, TRUE, and TO_ROW::xheight.

Referenced by compute_block_xheight().

01627                                          {
01628   if (textord_row_xheights) {
01629     if (row->xheight <= 0)
01630       row->xheight = xheight;
01631     if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) {
01632       if (row->xheight >= xheight * (1 - textord_xheight_error_margin)
01633       && row->xheight <= xheight * (1 + textord_xheight_error_margin)) {
01634         row->all_caps = FALSE;
01635         row->ascrise = ascrise;
01636       }
01637       else if (row->xheight >=
01638         (xheight + ascrise) * (1 - textord_xheight_error_margin)
01639         && row->xheight <=
01640       (xheight + ascrise) * (1 + textord_xheight_error_margin)) {
01641         row->all_caps = TRUE;
01642                                  //it was caps
01643         row->ascrise = row->xheight - xheight;
01644         row->xheight = xheight;
01645       }
01646       else {
01647         row->all_caps = TRUE;
01648         row->ascrise = row->xheight * ascrise / (xheight + ascrise);
01649         row->xheight -= row->ascrise;
01650       }
01651     }
01652     else
01653       row->all_caps = FALSE;
01654     row->ascrise = ascrise;
01655     if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1))
01656       row->descdrop = descdrop;
01657   }
01658   else {
01659     if (row->xheight < xheight * (1 - textord_xheight_error_margin)
01660       || row->xheight > xheight * (1 + textord_xheight_error_margin))
01661       row->xheight = xheight;    //set to average
01662     row->all_caps = row->ascrise <= 0;
01663     if (row->ascrise < ascrise * (1 - textord_xheight_error_margin)
01664       || row->ascrise > ascrise * (1 + textord_xheight_error_margin))
01665       row->ascrise = ascrise;    //set to average
01666     if (row->descdrop < descdrop * (1 - textord_xheight_error_margin)
01667       || row->descdrop > descdrop * (1 + textord_xheight_error_margin))
01668       row->descdrop = descdrop;  //set to average
01669   }
01670 }

void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
INT32  block_edge,
BOOL8  testing_on 
)

Find lines.

Compute the linespacing and offset.

Definition at line 604 of file makerow.cpp.

References ABORT, alloc_mem(), BOX::bottom(), compute_dropout_distances(), compute_line_occupation(), compute_occupation_threshold(), cprintf(), deskew_block_coords(), draw_occupation(), ERRCODE::error(), find_best_dropout_row(), free_mem(), TO_ROW::intercept(), MEMORY_OUT, NULL, plot_parallel_row(), rotation, BOX::top(), and WHITE.

Referenced by cleanup_rows().

00610                               {
00611   BOX block_box;                 //deskewed block
00612   INT32 *deltas;                 //change in occupation
00613   INT32 *occupation;             //of pixel coords
00614   INT32 max_y;                   //in block
00615   INT32 min_y;
00616   INT32 line_index;              //of scan line
00617   INT32 line_count;              //no of scan lines
00618   INT32 distance;                //to drop-out
00619   INT32 xleft;                   //of block
00620   INT32 ybottom;                 //of block
00621   TO_ROW *row;                   //current row
00622   TO_ROW_IT row_it = block->get_rows ();
00623   BLOBNBOX_IT blob_it = &block->blobs;
00624 
00625 #ifdef TEXT_VERBOSE
00626   // gets a 'n', see ccmain/tesseractmain.dox
00627   cprintf("n");
00628 #endif
00629   if (row_it.length () == 0)
00630     return;                      //empty block
00631   block_box = deskew_block_coords (block, gradient);
00632   xleft = block->block->bounding_box ().left ();
00633   ybottom = block->block->bounding_box ().bottom ();
00634   min_y = block_box.bottom () - 1;
00635   max_y = block_box.top () + 1;
00636   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00637     line_index = (INT32) floor (row_it.data ()->intercept ());
00638     if (line_index <= min_y)
00639       min_y = line_index - 1;
00640     if (line_index >= max_y)
00641       max_y = line_index + 1;
00642   }
00643   line_count = max_y - min_y + 1;
00644   if (line_count <= 0)
00645     return;                      //empty block
00646   deltas = (INT32 *) alloc_mem (line_count * sizeof (INT32));
00647   occupation = (INT32 *) alloc_mem (line_count * sizeof (INT32));
00648   if (deltas == NULL || occupation == NULL)
00649     MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
00650 
00651   compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
00652   compute_occupation_threshold ((INT32)
00653     ceil (block->line_spacing *
00654     (textord_merge_desc +
00655     textord_merge_asc)),
00656     (INT32) ceil (block->line_spacing *
00657     (textord_merge_x +
00658     textord_merge_asc)),
00659     max_y - min_y + 1, occupation, deltas);
00660 #ifndef GRAPHICS_DISABLED
00661   if (testing_on) {
00662     draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
00663   }
00664 #endif
00665   compute_dropout_distances(occupation, deltas, line_count);
00666   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00667     row = row_it.data ();
00668     line_index = (INT32) floor (row->intercept ());
00669     distance = deltas[line_index - min_y];
00670     if (find_best_dropout_row (row, distance, block->line_spacing / 2,
00671     line_index, &row_it, testing_on)) {
00672 #ifndef GRAPHICS_DISABLED
00673       if (testing_on)
00674         plot_parallel_row(row, gradient, block_edge, WHITE, rotation);
00675 #endif
00676       blob_it.add_list_after (row_it.data ()->blob_list ());
00677       delete row_it.extract ();  //too far away
00678     }
00679   }
00680   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00681     blob_it.add_list_after (row_it.data ()->blob_list ());
00682   }
00683 
00684   free_mem(deltas);
00685   free_mem(occupation);
00686 }

BOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Block box.

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

Definition at line 777 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::rotate(), and rotation.

Referenced by delete_non_dropout_rows().

00780                          {
00781   BOX result;                    //block bounds
00782   BOX blob_box;                  //of block
00783   FCOORD rotation;               //deskew vector
00784   float length;                  //of gradient vector
00785   TO_ROW_IT row_it = block->get_rows ();
00786   TO_ROW *row;                   //current row
00787   BLOBNBOX *blob;                //current blob
00788   BLOBNBOX_IT blob_it;           //iterator
00789 
00790   length = sqrt (gradient * gradient + 1);
00791   rotation = FCOORD (1 / length, -gradient / length);
00792   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00793     row = row_it.data ();
00794     blob_it.set_to_list (row->blob_list ());
00795     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
00796     blob_it.forward ()) {
00797       blob = blob_it.data ();
00798       blob_box = blob->bounding_box ();
00799       blob_box.rotate (rotation);//de-skew it
00800       result += blob_box;
00801     }
00802   }
00803   return result;
00804 }

static bool dot_of_i ( BLOBNBOX dot,
BLOBNBOX i,
TO_ROW row 
) [static]

Return true if the dot looks like it is part of 'i'.

Doesn't work for any other diacritical.

To be considered part of an 'i' it:

Definition at line 360 of file makerow.cpp.

References BOX::bottom(), BOX::height(), BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), MAX, MIN, C_BLOB::out_list(), BOX::right(), BOX::top(), BOX::width(), and ICOORD::y().

Referenced by vigorous_noise_removal().

00360                                                               {
00361   const BOX& ibox = i->bounding_box();
00362   const BOX& dotbox = dot->bounding_box();
00363 
00364   // Must overlap horizontally by enough and be high enough.
00365   int overlap = MIN(dotbox.right(), ibox.right()) -
00366                 MAX(dotbox.left(), ibox.left());
00367   if (ibox.height() <= 2 * dotbox.height() ||
00368       (overlap * 2 < ibox.width() && overlap < dotbox.width()))
00369     return false;
00370 
00371   // The i or ! must be tall and thin to be good.
00372   if (ibox.height() > ibox.width() * 2)
00373     return true;
00374 
00375   // It might still be tall and thin, but it might be joined to something.
00376   // So search the outline for a piece of large height close to the edges
00377   // of the dot.
00378   const double kHeightFraction = 0.6;
00379   double target_height = MIN(dotbox.bottom(), ibox.top());
00380   target_height -= row->line_m()*dotbox.left() + row->line_c();
00381   target_height *= kHeightFraction;
00382   int left_min = dotbox.left() - dotbox.width();
00383   int middle = (dotbox.left() + dotbox.right())/2;
00384   int right_max = dotbox.right() + dotbox.width();
00385   int left_miny = 0;
00386   int left_maxy = 0;
00387   int right_miny = 0;
00388   int right_maxy = 0;
00389   bool found_left = false;
00390   bool found_right = false;
00391   bool in_left = false;
00392   bool in_right = false;
00393   C_BLOB* blob = i->cblob();
00394   C_OUTLINE_IT o_it = blob->out_list();
00395   for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {
00396     C_OUTLINE* outline = o_it.data();
00397     int length = outline->pathlength();
00398     ICOORD pos = outline->start_pos();
00399     for (int step = 0; step < length; pos += outline->step(step++)) {
00400       int x = pos.x();
00401       int y = pos.y();
00402       if (x >= left_min && x < middle && !found_left) {
00403         // We are in the left part so find min and max y.
00404         if (in_left) {
00405           if (y > left_maxy) left_maxy = y;
00406           if (y < left_miny) left_miny = y;
00407         } else {
00408           left_maxy = left_miny = y;
00409           in_left = true;
00410         }
00411       } else if (in_left) {
00412         // We just left the left so look for size.
00413         if (left_maxy - left_miny > target_height) {
00414           if (found_right)
00415             return true;
00416           found_left = true;
00417         }
00418         in_left = false;
00419       }
00420       if (x <= right_max && x > middle && !found_right) {
00421         // We are in the right part so find min and max y.
00422         if (in_right) {
00423           if (y > right_maxy) right_maxy = y;
00424           if (y < right_miny) right_miny = y;
00425         } else {
00426           right_maxy = right_miny = y;
00427           in_right = true;
00428         }
00429       } else if (in_right) {
00430         // We just left the right so look for size.
00431         if (right_maxy - right_miny > target_height) {
00432           if (found_left)
00433             return true;
00434           found_right = true;
00435         }
00436         in_right = false;
00437       }
00438     }
00439   }
00440   return false;
00441 }

void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
INT32  block_edge,
BOOL8  testing_on 
)

Find lines.

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

It seems to handle testing/random/falling_y_position.tif ok but not testing/random/angled_test4.tif - is this function responsible for the former working?

Definition at line 1015 of file makerow.cpp.

References adjust_row_limits(), assign_blobs_to_rows(), TO_ROW::blob_list(), compute_row_stats(), cprintf(), create_to_win(), FALSE, fit_parallel_rows(), TO_ROW::intercept(), TO_ROW::max_y(), TO_ROW::min_y(), NO_WINDOW, plot_parallel_row(), rotation, TO_ROW::set_limits(), to_win, TRUE, and WHITE.

Referenced by cleanup_rows().

01022                   {
01023   BOOL8 swallowed_row;           //eaten a neighbour
01024   float y_max, y_min;            //new row limits
01025   float y_bottom, y_top;         //allowed limits
01026   TO_ROW *test_row;              //next row
01027   TO_ROW *row;                   //current row
01028                                  //iterators
01029   BLOBNBOX_IT blob_it = &block->blobs;
01030   TO_ROW_IT row_it = block->get_rows ();
01031 
01032 #ifdef TEXT_VERBOSE
01033   // gets a 'x', see ccmain/tesseractmain.dox
01034   cprintf("x");
01035 #endif
01036 #ifndef GRAPHICS_DISABLED
01037   if (textord_show_expanded_rows && testing_on) {
01038     if (to_win == NO_WINDOW)
01039       create_to_win(page_tr);
01040   }
01041 #endif
01042 
01043   adjust_row_limits(block);  //shift min,max.
01044   if (textord_new_initial_xheight) {
01045     if (block->get_rows ()->length () == 0)
01046       return;
01047     compute_row_stats(block, textord_show_expanded_rows &&testing_on);
01048   }
01049   assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
01050   //get real membership
01051   if (block->get_rows ()->length () == 0)
01052     return;
01053   fit_parallel_rows(block,
01054                     gradient,
01055                     rotation,
01056                     block_edge,
01057                     textord_show_expanded_rows &&testing_on);
01058   if (!textord_new_initial_xheight)
01059     compute_row_stats(block, textord_show_expanded_rows &&testing_on);
01060   row_it.move_to_last ();
01061   do {
01062     row = row_it.data ();
01063     y_max = row->max_y ();       //get current limits
01064     y_min = row->min_y ();
01065     y_bottom = row->intercept () - block->line_size * textord_merge_desc;
01066     y_top = row->intercept () + block->line_size
01067       * (textord_merge_x + textord_merge_asc);
01068     if (y_min > y_bottom) {      //expansion allowed
01069                                  //expandable
01070       swallowed_row = TRUE;
01071       while (swallowed_row && !row_it.at_last ()) {
01072         swallowed_row = FALSE;
01073                                  //get next one
01074         test_row = row_it.data_relative (1);
01075                                  //overlaps space
01076         if (test_row->max_y () > y_bottom) {
01077           if (test_row->min_y () > y_bottom) {
01078             row_it.forward ();
01079 #ifndef GRAPHICS_DISABLED
01080             if (textord_show_expanded_rows && testing_on)
01081               plot_parallel_row(test_row,
01082                                 gradient,
01083                                 block_edge,
01084                                 WHITE,
01085                                 rotation);
01086 #endif
01087             blob_it.set_to_list (row->blob_list ());
01088             blob_it.add_list_after (test_row->blob_list ());
01089                                  //swallow complete row
01090             delete row_it.extract ();
01091             row_it.backward ();
01092             swallowed_row = TRUE;
01093           }
01094           else if (test_row->max_y () < y_min)
01095                                  //shorter limit
01096             y_bottom = test_row->max_y ();
01097           else
01098             y_bottom = y_min;    //can't expand it
01099         }
01100       }
01101       y_min = y_bottom;          //expand it
01102     }
01103     if (y_max < y_top) {         //expansion allowed
01104       swallowed_row = TRUE;
01105       while (swallowed_row && !row_it.at_first ()) {
01106         swallowed_row = FALSE;
01107                                  //get one above
01108         test_row = row_it.data_relative (-1);
01109         if (test_row->min_y () < y_top) {
01110           if (test_row->max_y () < y_top) {
01111             row_it.backward ();
01112             blob_it.set_to_list (row->blob_list ());
01113 #ifndef GRAPHICS_DISABLED
01114             if (textord_show_expanded_rows && testing_on)
01115               plot_parallel_row(test_row,
01116                                 gradient,
01117                                 block_edge,
01118                                 WHITE,
01119                                 rotation);
01120 #endif
01121             blob_it.add_list_after (test_row->blob_list ());
01122                                  //swallow complete row
01123             delete row_it.extract ();
01124             row_it.forward ();
01125             swallowed_row = TRUE;
01126           }
01127           else if (test_row->min_y () < y_max)
01128                                  //shorter limit
01129             y_top = test_row->min_y ();
01130           else
01131             y_top = y_max;       //can't expand it
01132 
01133         }
01134       }
01135       y_max = y_top;
01136     }
01137                                  //new limits
01138     row->set_limits (y_min, y_max);
01139     row_it.backward ();
01140   }
01141   while (!row_it.at_last ());
01142 }

BOOL8 find_best_dropout_row ( TO_ROW row,
INT32  distance,
float  dist_limit,
INT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Find neighbours.

Returns:
TRUE if the row should be deleted.
Delete this row if it has a neighbour with better dropout characteristics.

Definition at line 697 of file makerow.cpp.

References TO_ROW::believability(), FALSE, TO_ROW::intercept(), TO_ROW::parallel_c(), tprintf(), and TRUE.

Referenced by delete_non_dropout_rows().

00704                              {
00705   INT32 next_index;              //of neigbouring row
00706   INT32 row_offset;              //from current row
00707   INT32 abs_dist;                //absolute distance
00708   INT8 row_inc;                  //increment to row_index
00709   TO_ROW *next_row;              //nextious row
00710 
00711   if (testing_on)
00712     tprintf ("Row at %g(%g), dropout dist=%d,",
00713       row->intercept (), row->parallel_c (), distance);
00714   if (distance < 0) {
00715     row_inc = 1;
00716     abs_dist = -distance;
00717   }
00718   else {
00719     row_inc = -1;
00720     abs_dist = distance;
00721   }
00722   if (abs_dist > dist_limit) {
00723     if (testing_on) {
00724       tprintf (" too far - deleting\n");
00725     }
00726     return TRUE;
00727   }
00728   if (distance < 0 && !row_it->at_last ()
00729   || distance >= 0 && !row_it->at_first ()) {
00730     row_offset = row_inc;
00731     do {
00732       next_row = row_it->data_relative (row_offset);
00733       next_index = (INT32) floor (next_row->intercept ());
00734       if (distance < 0
00735         && next_index < line_index
00736         && next_index > line_index + distance + distance
00737         || distance >= 0
00738         && next_index > line_index
00739       && next_index < line_index + distance + distance) {
00740         if (testing_on) {
00741           tprintf (" nearer neighbour (%d) at %g\n",
00742             line_index + distance - next_index,
00743             next_row->intercept ());
00744         }
00745         return TRUE;             //other is nearer
00746       }
00747       else if (next_index == line_index
00748       || next_index == line_index + distance + distance) {
00749         if (row->believability () <= next_row->believability ()) {
00750           if (testing_on) {
00751             tprintf (" equal but more believable at %g (%g/%g)\n",
00752               next_row->intercept (),
00753               row->believability (),
00754               next_row->believability ());
00755           }
00756           return TRUE;           //other is more believable
00757         }
00758       }
00759       row_offset += row_inc;
00760     }
00761     while ((next_index == line_index
00762       || next_index == line_index + distance + distance)
00763       && row_offset < row_it->length ());
00764     if (testing_on)
00765       tprintf (" keeping\n");
00766   }
00767   return FALSE;
00768 }

void fit_lms_line ( TO_ROW row  ) 

Sort function.

Fit an LMS line to a row.

Definition at line 227 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::bottom(), BOX::left(), BOX::right(), and TO_ROW::set_line().

Referenced by make_initial_textrows().

00229                    {
00230   float m, c;                    //fitted line
00231   BOX box;                       //blob box
00232   LMS lms (row->blob_list ()->length ());
00233                                  //blobs
00234   BLOBNBOX_IT blob_it = row->blob_list ();
00235 
00236   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00237     box = blob_it.data ()->bounding_box ();
00238     lms.add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
00239   }
00240   lms.fit (m, c);
00241   row->set_line (m, c, lms.error ());
00242 }

void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Sort function.

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

Definition at line 1920 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::bottom(), cprintf(), BOX::left(), BOX::right(), TO_ROW::set_line(), and TO_ROW::set_parallel_line().

Referenced by fit_parallel_rows().

01923                        {
01924   float c;                       //fitted line
01925   int blobcount;                 //no of blobs
01926   BOX box;                       //blob box
01927   LMS lms (row->blob_list ()->length ());
01928                                  //blobs
01929   BLOBNBOX_IT blob_it = row->blob_list ();
01930 
01931 #ifdef TEXT_VERBOSE
01932   // gets a 'm', see ccmain/tesseractmain.dox
01933   cprintf("m");
01934 #endif
01935   blobcount = 0;
01936   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
01937     if (!blob_it.data ()->joined_to_prev ()) {
01938       box = blob_it.data ()->bounding_box ();
01939       lms.
01940         add (FCOORD ((box.left () + box.right ()) / 2.0, box.bottom ()));
01941       blobcount++;
01942     }
01943   }
01944   lms.constrained_fit (gradient, c);
01945   row->set_parallel_line (gradient, c, lms.error ());
01946   if (textord_straight_baselines && blobcount > lms_line_trials) {
01947     lms.fit (gradient, c);
01948   }
01949                                  //set the other too
01950   row->set_line (gradient, c, lms.error ());
01951 }

void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
INT32  block_edge,
BOOL8  testing_on 
)

Find lines.

Re-fit the rows in the block to the given gradient.

Definition at line 1879 of file makerow.cpp.

References fit_parallel_lms(), MAGENTA, plot_parallel_row(), RED, rotation, and row_y_order().

Referenced by cleanup_rows(), and expand_rows().

01885                         {
01886 #ifndef GRAPHICS_DISABLED
01887   COLOUR colour;                 //of row
01888 #endif
01889   TO_ROW_IT row_it = block->get_rows ();
01890 
01891   row_it.move_to_first ();
01892   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01893     if (row_it.data ()->blob_list ()->empty ())
01894       delete row_it.extract ();  //nothing in it
01895     else
01896       fit_parallel_lms (gradient, row_it.data ());
01897   }
01898 #ifndef GRAPHICS_DISABLED
01899   if (testing_on) {
01900     colour = RED;
01901     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01902       plot_parallel_row (row_it.data (), gradient,
01903         block_edge, colour, rotation);
01904       colour = (COLOUR) (colour + 1);
01905       if (colour > MAGENTA)
01906         colour = RED;
01907     }
01908   }
01909 #endif
01910   row_it.sort (row_y_order);     //may have gone out of order
01911 }

double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
INT32 segments,
INT32  xstarts[] 
)

Split baseline.

Returns:
TRUE if enough blobs were far enough away to need a quadratic.
Divide the baseline up into segments which require a different quadratic fitted to them.

Definition at line 2183 of file makerow.cpp.

References alloc_mem(), TO_ROW::blob_list(), BOX::bottom(), box_next_pre_chopped(), BOX::left(), BOX::right(), and tprintf().

Referenced by make_baseline_spline().

02188   {
02189   int blobcount;                 //no of blobs
02190   int blobindex;                 //current blob
02191   int index1, index2;            //blob numbers
02192   int blobs_per_segment;         //blobs in each
02193   BOX box;                       //blob box
02194   BOX new_box;                   //new_it box
02195   float middle;                  //xcentre of blob
02196                                  //blobs
02197   BLOBNBOX_IT blob_it = row->blob_list ();
02198   BLOBNBOX_IT new_it = blob_it;  //front end
02199   float b, c;                    //fitted curve
02200   LMS lms (row->blob_list ()->length ());
02201   double *coeffs;                //quadratic coeffs
02202   INT32 segment;                 //current segment
02203 
02204   box = box_next_pre_chopped (&blob_it);
02205   xstarts[0] = box.left ();
02206   blobcount = 1;
02207   while (!blob_it.at_first ()) {
02208     blobcount++;
02209     box = box_next_pre_chopped (&blob_it);
02210   }
02211   segments = blobcount / textord_spline_medianwin;
02212   if (segments < 1)
02213     segments = 1;
02214   blobs_per_segment = blobcount / segments;
02215   coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
02216   if (textord_oldbl_debug)
02217     tprintf
02218       ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
02219       blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
02220   segment = 1;
02221   for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
02222     box_next_pre_chopped(&new_it);
02223   index1 = 0;
02224   blobindex = index2;
02225   do {
02226     blobindex += blobs_per_segment;
02227     lms.clear ();
02228     while (index1 < blobindex || segment == segments && index1 < blobcount) {
02229       box = box_next_pre_chopped (&blob_it);
02230       middle = (box.left () + box.right ()) / 2.0;
02231       lms.add (FCOORD (middle, box.bottom ()));
02232       index1++;
02233       if (index1 == blobindex - blobs_per_segment / 2
02234       || index1 == blobcount - 1) {
02235         xstarts[segment] = box.left ();
02236       }
02237     }
02238     lms.fit (b, c);
02239     coeffs[segment * 3 - 3] = 0;
02240     coeffs[segment * 3 - 2] = b;
02241     coeffs[segment * 3 - 1] = c;
02242     segment++;
02243     if (segment > segments)
02244       break;
02245 
02246     blobindex += blobs_per_segment;
02247     lms.clear ();
02248     while (index2 < blobindex || segment == segments && index2 < blobcount) {
02249       new_box = box_next_pre_chopped (&new_it);
02250       middle = (new_box.left () + new_box.right ()) / 2.0;
02251       lms.add (FCOORD (middle, new_box.bottom ()));
02252       index2++;
02253       if (index2 == blobindex - blobs_per_segment / 2
02254       || index2 == blobcount - 1) {
02255         xstarts[segment] = new_box.left ();
02256       }
02257     }
02258     lms.fit (b, c);
02259     coeffs[segment * 3 - 3] = 0;
02260     coeffs[segment * 3 - 2] = b;
02261     coeffs[segment * 3 - 1] = c;
02262     segment++;
02263   }
02264   while (segment <= segments);
02265   return coeffs;
02266 }

void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Fit an LMS line to a row.

Make the fit parallel to the given gradient and set the row accordingly.

Definition at line 2011 of file makerow.cpp.

References alloc_mem(), TO_ROW::blob_list(), BOX::bottom(), free_mem(), BOX::left(), linear_spline_baseline(), BOX::right(), and segment_baseline().

Referenced by make_spline_rows().

02014                            {
02015   float b, c;                    //fitted curve
02016   float middle;                  //x middle of blob
02017   BOX box;                       //blob box
02018   LMS lms (row->blob_list ()->length ());
02019                                  //blobs
02020   BLOBNBOX_IT blob_it = row->blob_list ();
02021   INT32 *xstarts;                //spline boundaries
02022   double *coeffs;                //quadratic coeffs
02023   INT32 segments;                //no of segments
02024   INT32 segment;                 //current segment
02025 
02026   xstarts =
02027     (INT32 *) alloc_mem ((row->blob_list ()->length () + 1) * sizeof (INT32));
02028   if (segment_baseline (row, block, segments, xstarts)
02029   && !textord_straight_baselines && !textord_parallel_baselines) {
02030     if (textord_quadratic_baselines) {
02031       coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
02032       for (segment = 0; segment < segments; segment++) {
02033         lms.clear ();
02034         for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
02035         blob_it.forward ()) {
02036           if (!blob_it.data ()->joined_to_prev ()) {
02037             box = blob_it.data ()->bounding_box ();
02038             middle = (box.left () + box.right ()) / 2.0;
02039             if (middle >= xstarts[segment]
02040             && middle < xstarts[segment + 1]) {
02041               lms.add (FCOORD (middle, box.bottom ()));
02042             }
02043           }
02044         }
02045         if (textord_quadratic_baselines)
02046           lms.fit_quadratic (block->line_size *
02047             textord_spline_outlier_fraction,
02048             coeffs[segment * 3], b, c);
02049         else {
02050           lms.fit (b, c);
02051           coeffs[segment * 3] = 0;
02052         }
02053         coeffs[segment * 3 + 1] = b;
02054         coeffs[segment * 3 + 2] = c;
02055       }
02056     }
02057     else
02058       coeffs = linear_spline_baseline (row, block, segments, xstarts);
02059   }
02060   else {
02061     xstarts[1] = xstarts[segments];
02062     segments = 1;
02063     coeffs = (double *) alloc_mem (3 * sizeof (double));
02064     coeffs[0] = 0;
02065     coeffs[1] = row->line_m ();
02066     coeffs[2] = row->line_c ();
02067   }
02068   row->baseline = QSPLINE (segments, xstarts, coeffs);
02069   free_mem(coeffs);
02070   free_mem(xstarts);
02071 }

void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Find lines.

Arrange the good blobs into rows of text.

Definition at line 187 of file makerow.cpp.

References assign_blobs_to_rows(), create_to_win(), fit_lms_line(), MAGENTA, NO_WINDOW, NULL, plot_to_row(), RED, rotation, to_win, and TRUE.

Referenced by make_rows().

00192                             {
00193   TO_ROW_IT row_it = block->get_rows ();
00194 
00195 #ifndef GRAPHICS_DISABLED
00196   COLOUR colour;                 //of row
00197 
00198   if (textord_show_initial_rows && testing_on) {
00199     if (to_win == NO_WINDOW)
00200       create_to_win(page_tr);
00201   }
00202 #endif
00203                                  //guess skew
00204   assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
00205   row_it.move_to_first ();
00206   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00207     fit_lms_line (row_it.data ());
00208 #ifndef GRAPHICS_DISABLED
00209   if (textord_show_initial_rows && testing_on) {
00210     colour = RED;
00211     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00212       plot_to_row (row_it.data (), colour, rotation);
00213       colour = (COLOUR) (colour + 1);
00214       if (colour > MAGENTA)
00215         colour = RED;
00216     }
00217   }
00218 #endif
00219 }

float make_rows ( ICOORD  page_tr,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  land_blocks,
TO_BLOCK_LIST *  port_blocks 
)

Make rows.

Arrange the blobs into rows.

Definition at line 130 of file makerow.cpp.

References cleanup_rows(), compute_page_skew(), cprintf(), f, and make_initial_textrows().

Referenced by textord_page().

00135                  {
00136   float port_m;                  //global skew
00137   float port_err;                //global noise
00138   //float land_m;                  //global skew
00139   //float land_err;                //global noise
00140   TO_BLOCK_IT block_it;          //iterator
00141 
00142   //don't do landscape for now
00143   //  block_it.set_to_list(land_blocks);
00144   //  for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
00145   //     make_initial_textrows(page_tr,block_it.data(),FCOORD(0,-1),
00146   //        (BOOL8)textord_test_landscape);
00147 
00148 #ifdef TEXT_VERBOSE
00149   // gets a 'r', see ccmain/tesseractmain.dox
00150   cprintf("r\n");
00151 #endif
00152   block_it.set_to_list (port_blocks);
00153   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00154     block_it.forward ())
00155   make_initial_textrows (page_tr, block_it.data (), FCOORD (1.0f, 0.0f),
00156       !(BOOL8) textord_test_landscape);
00157                                  //compute globally
00158   compute_page_skew(port_blocks, port_m, port_err); 
00159   //  compute_page_skew(land_blocks,land_m,land_err);  //compute globally
00160   //  tprintf("Portrait skew gradient=%g, error=%g.\n",
00161   //          port_m,port_err);
00162   //  tprintf("Landscape skew gradient=%g, error=%g.\n",
00163   //          land_m,land_err);
00164   block_it.set_to_list (port_blocks);
00165   for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
00166   block_it.forward ()) {
00167     cleanup_rows (page_tr, block_it.data (), port_m, FCOORD (1.0f, 0.0f),
00168       block_it.data ()->block->bounding_box ().left (),
00169       !(BOOL8) textord_test_landscape);
00170   }
00171   block_it.set_to_list (land_blocks);
00172   //  for (block_it.mark_cycle_pt();!block_it.cycled_list();block_it.forward())
00173   //  {
00174   //     cleanup_rows(page_tr,block_it.data(),land_m,FCOORD(0,-1),
00175   //                  -block_it.data()->block->bounding_box().top(),
00176   //                  (BOOL8)textord_test_landscape);
00177   //  }
00178   return port_m;                 //global skew
00179 }

void make_spline_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
INT32  block_edge,
BOOL8  testing_on 
)

Find lines.

Re-fit the rows in the block to the given gradient.

Definition at line 1959 of file makerow.cpp.

References MAGENTA, make_baseline_spline(), make_old_baselines(), RED, and to_win.

Referenced by cleanup_rows().

01965                        {
01966   COLOUR colour;                 //of row
01967   TO_ROW_IT row_it = block->get_rows ();
01968 
01969   row_it.move_to_first ();
01970   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01971     if (row_it.data ()->blob_list ()->empty ())
01972       delete row_it.extract ();  //nothing in it
01973     else
01974       make_baseline_spline (row_it.data (), block);
01975   }
01976   if (textord_old_baselines) {
01977 #ifndef GRAPHICS_DISABLED
01978     if (testing_on) {
01979       colour = RED;
01980       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
01981       row_it.forward ()) {
01982         row_it.data ()->baseline.plot (to_win, colour);
01983         colour = (COLOUR) (colour + 1);
01984         if (colour > MAGENTA)
01985           colour = RED;
01986       }
01987     }
01988 #endif
01989     make_old_baselines(block, testing_on);
01990   }
01991 #ifndef GRAPHICS_DISABLED
01992   if (testing_on) {
01993     colour = RED;
01994     for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01995       row_it.data ()->baseline.plot (to_win, colour);
01996       colour = (COLOUR) (colour + 1);
01997       if (colour > MAGENTA)
01998         colour = RED;
01999     }
02000   }
02001 #endif
02002 }

float median_block_xheight ( TO_BLOCK block,
float  gradient 
)

Find lines.

Compute the linespacing and offset.

Definition at line 1353 of file makerow.cpp.

References ABORT, alloc_mem(), ASSERT_HOST, blob_count, TO_ROW::blob_list(), choose_nth_item(), ERRCODE::error(), f, free_mem(), MEMORY_OUT, NULL, and TO_ROW::parallel_c().

Referenced by compute_block_xheight().

01356                             {
01357   TO_ROW *row;                   //current row
01358   float result;                  //output size
01359   float xcentre;                 //centre of blob
01360   TO_ROW_IT row_it = block->get_rows ();
01361   BLOBNBOX_IT blob_it;
01362   BLOBNBOX *blob;                //current blob
01363   float *heights;                //for choose nth
01364   INT32 blob_count;              //blobs in block
01365   INT32 blob_index;              //current blob
01366 
01367   blob_count = 0;
01368   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
01369     blob_count += row_it.data ()->blob_list ()->length ();
01370   heights = (float *) alloc_mem (blob_count * sizeof (float));
01371   if (heights == NULL)
01372     MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
01373 
01374   blob_index = 0;
01375   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01376     row = row_it.data ();
01377     blob_it.set_to_list (row->blob_list ());
01378     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01379     blob_it.forward ()) {
01380       blob = blob_it.data ();
01381       if (!blob->joined_to_prev ()) {
01382         xcentre =
01383           (blob->bounding_box ().left () +
01384           blob->bounding_box ().right ()) / 2.0f;
01385         heights[blob_index] =
01386           blob->bounding_box ().top () - gradient * xcentre -
01387           row->parallel_c ();
01388         if (heights[blob_index] > 0)
01389           blob_index++;
01390       }
01391     }
01392   }
01393   ASSERT_HOST (blob_index > 0);  //dont expect 0
01394   blob_count = blob_index;
01395   blob_index = choose_nth_item (blob_count / 2, heights, blob_count);
01396   result = heights[blob_index];
01397   free_mem(heights);
01398   return result;
01399 }

OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Find best row.

Parameters:
row_it Iterator
best_row Output row (results returned are for this row)
top Top of blob
bottom Bottom of blob
rowsize Max row size
testing_blob TRUE or FALSE, Test stuff
Returns:
OVERLAP_STATE code for best_row
Return the row which most overlaps the top and bottom of blob.

Definition at line 2482 of file makerow.cpp.

Referenced by assign_blobs_to_rows(), and restore_underlined_blobs().

02489                                     {
02490   OVERLAP_STATE result;          //result of tests
02491   float overlap;                 //of blob & row
02492   float bestover;                //nearest row
02493   float merge_top, merge_bottom; //size of merged row
02494   ICOORD testpt;                 //testing only
02495   TO_ROW *row;                   //current row
02496   TO_ROW *test_row;              //for multiple overlaps
02497   BLOBNBOX_IT blob_it;           //for merging rows
02498 
02499   result = ASSIGN;
02500   row = row_it->data ();
02501   bestover = top - bottom;
02502   if (top > row->max_y ())
02503     bestover -= top - row->max_y ();
02504   if (bottom < row->min_y ())
02505                                  //compute overlap
02506     bestover -= row->min_y () - bottom;
02507   if (testing_blob) {
02508     tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
02509       bottom, top, row->min_y (), row->max_y (), bestover);
02510   }
02511   test_row = row;
02512   do {
02513     if (!row_it->at_last ()) {
02514       row_it->forward ();
02515       test_row = row_it->data ();
02516       if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
02517         merge_top =
02518           test_row->max_y () >
02519           row->max_y ()? test_row->max_y () : row->max_y ();
02520         merge_bottom =
02521           test_row->min_y () <
02522           row->min_y ()? test_row->min_y () : row->min_y ();
02523         if (merge_top - merge_bottom <= rowsize) {
02524           if (testing_blob) {
02525             tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
02526               row->min_y (), row->max_y (),
02527               test_row->min_y (), test_row->max_y ());
02528           }
02529           test_row->set_limits (merge_bottom, merge_top);
02530           blob_it.set_to_list (test_row->blob_list ());
02531           blob_it.add_list_after (row->blob_list ());
02532           blob_it.sort (blob_x_order);
02533           row_it->backward ();
02534           delete row_it->extract ();
02535           row_it->forward ();
02536           bestover = -1.0f;      //force replacement
02537         }
02538         overlap = top - bottom;
02539         if (top > test_row->max_y ())
02540           overlap -= top - test_row->max_y ();
02541         if (bottom < test_row->min_y ())
02542           overlap -= test_row->min_y () - bottom;
02543         if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
02544           result = REJECT;
02545         }
02546         if (overlap > bestover) {
02547           bestover = overlap;    //find biggest overlap
02548           row = test_row;
02549         }
02550         if (testing_blob) {
02551           tprintf
02552             ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
02553             bottom, top, test_row->min_y (), test_row->max_y (),
02554             overlap, bestover);
02555         }
02556       }
02557     }
02558   }
02559   while (!row_it->at_last ()
02560     && test_row->min_y () <= top && test_row->max_y () >= bottom);
02561   while (row_it->data () != row)
02562     row_it->backward ();         //make it point to row
02563 
02564   //doesn't overlap much
02565   if (top - bottom - bestover > rowsize * textord_merge_x
02566      && (!textord_fix_makerow_bug || bestover < rowsize * textord_merge_x)
02567      && result == ASSIGN)
02568     result = NEW_ROW;            //doesn't overlap enough
02569   best_row = row;
02570   return result;
02571 }

void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Make rough chars.

Associate overlapping blobs and fake chop wide blobs.

Definition at line 1780 of file makerow.cpp.

References BOX::bottom(), create_to_win(), INT_HOLLOW, interior_style, BOX::left(), MAGENTA, NO_WINDOW, perimeter_color_index, rectangle, RED, BOX::right(), BOX::rotate(), rotation, to_win, BOX::top(), TRUE, and BOX::width().

Referenced by cleanup_rows().

01785                           {
01786 #ifndef GRAPHICS_DISABLED
01787   COLOUR colour;                 //of boxes
01788 #endif
01789   INT16 overlap;                 //of adjacent boxes
01790   BLOBNBOX *blob;                //current blob
01791   BLOBNBOX *nextblob;            //next in list
01792   BOX blob_box;
01793   BOX next_box;                  //next blob
01794   FCOORD blob_rotation;          //inverse of rotation
01795   BLOBNBOX_IT blob_it;           //iterator
01796   BLOBNBOX_IT start_it;          //iterator
01797   TO_ROW_IT row_it = block->get_rows ();
01798 
01799 #ifndef GRAPHICS_DISABLED
01800   colour = RED;
01801 #endif
01802 
01803   blob_rotation = FCOORD (rotation.x (), -rotation.y ());
01804   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01805                                  //get blobs
01806     blob_it.set_to_list (row_it.data ()->blob_list ());
01807     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01808     blob_it.forward ()) {
01809       blob = blob_it.data ();
01810       blob_box = blob->bounding_box ();
01811       start_it = blob_it;        //save start point
01812       //   if (testing_on && textord_show_final_blobs)
01813       //   {
01814       //      tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
01815       //              blob_box.left(),blob_box.bottom(),
01816       //              blob_box.right(),blob_box.top(),
01817       //              (void*)blob,blob_it.length());
01818       //   }
01819       do {
01820         if (!blob_it.at_last ()) {
01821           nextblob = blob_it.data_relative (1);
01822           next_box = nextblob->bounding_box ();
01823           overlap = next_box.width ();
01824           if (blob_box.left () > next_box.left ())
01825             overlap -= blob_box.left () - next_box.left ();
01826           if (blob_box.right () < next_box.right ())
01827             overlap -= next_box.right () - blob_box.right ();
01828           if (overlap >= next_box.width () / 2
01829           || overlap >= blob_box.width () / 2) {
01830                                  //merge new blob
01831             blob->merge (nextblob);
01832                                  //get bigger box
01833             blob_box = blob->bounding_box ();
01834             blob_it.forward ();
01835           }
01836           else
01837             overlap = -1;        //no overlap
01838         }
01839         else
01840           overlap = -1;          //no overlap
01841       }
01842       while (overlap >= 0);
01843       blob->chop (&start_it, &blob_it,
01844         blob_rotation,
01845         block->line_size * textord_merge_x *
01846         textord_chop_width);
01847       //attempt chop
01848     }
01849 #ifndef GRAPHICS_DISABLED
01850     if (testing_on && textord_show_final_blobs) {
01851       if (to_win == NO_WINDOW)
01852         create_to_win(page_tr);
01853       perimeter_color_index(to_win, colour);
01854       interior_style(to_win, INT_HOLLOW, TRUE);
01855       for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01856       blob_it.forward ()) {
01857         blob = blob_it.data ();
01858         blob_box = blob->bounding_box ();
01859         blob_box.rotate (rotation);
01860         if (!blob->joined_to_prev ()) {
01861           rectangle (to_win, blob_box.left (), blob_box.bottom (),
01862             blob_box.right (), blob_box.top ());
01863         }
01864       }
01865       colour = (COLOUR) (colour + 1);
01866       if (colour > MAGENTA)
01867         colour = RED;
01868     }
01869 #endif
01870   }
01871 }

int row_spacing_order ( const void *  item1,
const void *  item2 
)

Sort function.

Qsort style function to compare 2 TO_ROWS based on their spacing value.

Definition at line 2623 of file makerow.cpp.

References TO_ROW::spacing.

Referenced by compute_row_stats().

02625                                          {
02626                                  //converted ptr
02627   TO_ROW *row1 = *(TO_ROW **) item1;
02628                                  //converted ptr
02629   TO_ROW *row2 = *(TO_ROW **) item2;
02630 
02631   if (row1->spacing < row2->spacing)
02632     return -1;
02633   else if (row1->spacing > row2->spacing)
02634     return 1;
02635   else
02636     return 0;
02637 }

int row_y_order ( const void *  item1,
const void *  item2 
)

Sort function.

Sort function to sort rows in y from page's top.

Definition at line 2601 of file makerow.cpp.

References TO_ROW::parallel_c().

Referenced by fit_parallel_rows().

02603                                    {
02604                                  //converted ptr
02605   TO_ROW *row1 = *(TO_ROW **) item1;
02606                                  //converted ptr
02607   TO_ROW *row2 = *(TO_ROW **) item2;
02608 
02609   if (row1->parallel_c () > row2->parallel_c ())
02610     return -1;
02611   else if (row1->parallel_c () < row2->parallel_c ())
02612     return 1;
02613   else
02614     return 0;
02615 }

BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
INT32 segments,
INT32  xstarts[] 
)

Split baseline.

Returns:
TRUE if enough blobs were far enough away to need a quadratic.
Divide the baseline up into segments which require a different quadratic fitted to them.

Definition at line 2083 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::bottom(), box_next_pre_chopped(), FALSE, BOX::left(), TO_ROW::line_c(), TO_ROW::line_m(), BOX::right(), tprintf(), and TRUE.

Referenced by make_baseline_spline().

02088   {
02089   BOOL8 needs_curve;             //needs curved line
02090   int blobcount;                 //no of blobs
02091   int blobindex;                 //current blob
02092   int last_state;                //above, on , below
02093   int state;                     //of current blob
02094   float yshift;                  //from baseline
02095   BOX box;                       //blob box
02096   BOX new_box;                   //new_it box
02097   float middle;                  //xcentre of blob
02098                                  //blobs
02099   BLOBNBOX_IT blob_it = row->blob_list ();
02100   BLOBNBOX_IT new_it = blob_it;  //front end
02101   SORTED_FLOATS yshifts;         //shifts from baseline
02102 
02103   needs_curve = FALSE;
02104   box = box_next_pre_chopped (&blob_it);
02105   xstarts[0] = box.left ();
02106   segments = 1;
02107   blobcount = row->blob_list ()->length ();
02108   if (textord_oldbl_debug)
02109     tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
02110       blobcount, box.left (), box.bottom ());
02111   if (blobcount <= textord_spline_medianwin
02112   || blobcount < textord_spline_minblobs) {
02113     blob_it.move_to_last ();
02114     box = blob_it.data ()->bounding_box ();
02115     xstarts[1] = box.right ();
02116     return FALSE;
02117   }
02118   last_state = 0;
02119   new_it.mark_cycle_pt ();
02120   for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
02121     new_box = box_next_pre_chopped (&new_it);
02122     middle = (new_box.left () + new_box.right ()) / 2.0;
02123     yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
02124                                  //record shift
02125     yshifts.add (yshift, blobindex);
02126     if (new_it.cycled_list ()) {
02127       xstarts[1] = new_box.right ();
02128       return FALSE;
02129     }
02130   }
02131   for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
02132     box = box_next_pre_chopped (&blob_it);
02133   do {
02134     new_box = box_next_pre_chopped (&new_it);
02135                                  //get middle one
02136     yshift = yshifts[textord_spline_medianwin / 2];
02137     if (yshift > textord_spline_shift_fraction * block->line_size)
02138       state = 1;
02139     else if (-yshift > textord_spline_shift_fraction * block->line_size)
02140       state = -1;
02141     else
02142       state = 0;
02143     if (state != 0)
02144       needs_curve = TRUE;
02145     //              tprintf("State=%d, prev=%d, shift=%g\n",
02146     //                      state,last_state,yshift);
02147     if (state != last_state && blobcount > textord_spline_minblobs) {
02148       xstarts[segments++] = box.left ();
02149       blobcount = 0;
02150     }
02151     last_state = state;
02152     yshifts.remove (blobindex - textord_spline_medianwin);
02153     box = box_next_pre_chopped (&blob_it);
02154     middle = (new_box.left () + new_box.right ()) / 2.0;
02155     yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
02156     yshifts.add (yshift, blobindex);
02157     blobindex++;
02158     blobcount++;
02159   }
02160   while (!new_it.cycled_list ());
02161   if (blobcount > textord_spline_minblobs || segments == 1) {
02162     xstarts[segments] = new_box.right ();
02163   }
02164   else {
02165     xstarts[--segments] = new_box.right ();
02166   }
02167   if (textord_oldbl_debug)
02168     tprintf ("Made %d segments on row at (%d,%d)\n",
02169       segments, box.right (), box.bottom ());
02170   return needs_curve;
02171 }

void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Make rough characters.

Test wide objects for being potential underlines; if so, put them in a separate list in the block.

Definition at line 1679 of file makerow.cpp.

References TO_ROW::blob_list(), BOX::bottom(), C_BLOB::bounding_box(), PBLOB::bounding_box(), cprintf(), crotate_cblob(), TO_ROW::intercept(), BOX::left(), NULL, BOX::right(), rotate_blob(), rotate_cblob(), rotation, test_underline(), BOX::top(), tprintf(), and BOX::width().

Referenced by cleanup_rows().

01684                           {
01685   BLOBNBOX *blob;                //current blob
01686   PBLOB *poly_blob;              //rotated blob
01687   C_BLOB *rotated_blob;          //rotated blob
01688   TO_ROW *row;                   //current row
01689   float length;                  //of g_vec
01690   BOX blob_box;
01691   FCOORD blob_rotation;          //inverse of rotation
01692   FCOORD g_vec;                  //skew rotation
01693   BLOBNBOX_IT blob_it;           //iterator
01694                                  //iterator
01695   BLOBNBOX_IT under_it = &block->underlines;
01696   TO_ROW_IT row_it = block->get_rows ();
01697 
01698 #ifdef TEXT_VERBOSE
01699   // gets a 'u', see ccmain/tesseractmain.dox
01700   cprintf("u");
01701 #endif
01702                                  //length of vector
01703   length = sqrt (1 + gradient * gradient);
01704   g_vec = FCOORD (1 / length, -gradient / length);
01705   blob_rotation = FCOORD (rotation.x (), -rotation.y ());
01706   blob_rotation.rotate (g_vec);  //unoding everything
01707   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
01708     row = row_it.data ();
01709                                  //get blobs
01710     blob_it.set_to_list (row->blob_list ());
01711     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
01712     blob_it.forward ()) {
01713       blob = blob_it.data ();
01714       blob_box = blob->bounding_box ();
01715       if (blob_box.width () > block->line_size * textord_underline_width) {
01716         if (textord_cblob_blockocc && blob->cblob () != NULL) {
01717           rotated_blob = crotate_cblob (blob->cblob (),
01718             blob_rotation);
01719           if (test_underline (testing_on && textord_show_final_rows,
01720             rotated_blob, (INT16) row->intercept (),
01721             (INT16) (block->line_size *
01722             (textord_merge_x +
01723           textord_merge_asc / 2.0f)))) {
01724             under_it.add_after_then_move (blob_it.extract ());
01725             if (testing_on && textord_show_final_rows) {
01726               tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
01727                 rotated_blob->bounding_box ().left (),
01728                 rotated_blob->bounding_box ().bottom (),
01729                 rotated_blob->bounding_box ().right (),
01730                 rotated_blob->bounding_box ().top ());
01731               tprintf ("(Was (%d,%d)->(%d,%d))\n",
01732                 blob_box.left (), blob_box.bottom (),
01733                 blob_box.right (), blob_box.top ());
01734             }
01735           }
01736           delete rotated_blob;
01737         }
01738         else {
01739           if (blob->blob () != NULL) {
01740             //  if (testing_on && textord_show_final_rows)
01741             //      tprintf("Rotating by (%g,%g)\n",
01742             //              blob_rotation.x(),blob_rotation.y());
01743             poly_blob = rotate_blob (blob->blob (), blob_rotation);
01744           }
01745           else
01746             poly_blob = rotate_cblob (blob->cblob (),
01747               block->line_size,
01748               blob_rotation);
01749           if (test_underline
01750             (testing_on
01751             && textord_show_final_rows, poly_blob,
01752             row->intercept (),
01753             block->line_size * (textord_merge_x +
01754           textord_merge_asc / 2))) {
01755             if (testing_on && textord_show_final_rows) {
01756               tprintf ("Underlined blob at (%d,%d)->(%d,%d) ",
01757                 poly_blob->bounding_box ().left (),
01758                 poly_blob->bounding_box ().bottom (),
01759                 poly_blob->bounding_box ().right (),
01760                 poly_blob->bounding_box ().top ());
01761               tprintf ("(Was (%d,%d)->(%d,%d))\n",
01762                 blob_box.left (), blob_box.bottom (),
01763                 blob_box.right (), blob_box.top ());
01764             }
01765             under_it.add_after_then_move (blob_it.extract ());
01766           }
01767           delete poly_blob;
01768         }
01769       }
01770     }
01771   }
01772 }

static void vigorous_noise_removal ( TO_BLOCK block  )  [static]

Wipe out tiny blobs (but check for dot above 'i').

Iterate row, estimating xheight and looking for blobs smaller than kNoiseSize * that xheight. If found, first exclude the possibility it's the dot above the letter 'i' and if so, nuke it.

Todo:
Is it ONLY done in preparation for restore_underlined_blobs()?

Definition at line 453 of file makerow.cpp.

References STATS::add(), dot_of_i(), BOX::height(), kMinSize, kNoiseSize, STATS::median(), and NULL.

Referenced by cleanup_rows().

00453                                                     {
00454   TO_ROW_IT row_it = block->get_rows ();
00455   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00456     TO_ROW* row = row_it.data();
00457     BLOBNBOX_IT b_it = row->blob_list();
00458     // Estimate the xheight on the row.
00459     int max_height = 0;
00460     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00461       BLOBNBOX* blob = b_it.data();
00462       if (blob->bounding_box().height() > max_height)
00463         max_height = blob->bounding_box().height();
00464     }
00465     STATS hstats(0, max_height + 1);
00466     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00467       BLOBNBOX* blob = b_it.data();
00468       int height = blob->bounding_box().height();
00469       if (height >= kMinSize)
00470         hstats.add(blob->bounding_box().height(), 1);
00471     }
00472     float xheight = hstats.median();
00473     // Delete small objects.
00474     BLOBNBOX* prev = NULL;
00475     for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00476       BLOBNBOX* blob = b_it.data();
00477       const BOX& box = blob->bounding_box();
00478       if (box.height() < kNoiseSize * xheight) {
00479         // Small so delete unless it looks like an i dot.
00480         if (prev != NULL) {
00481           if (dot_of_i(blob, prev, row))
00482             continue;  // Looks OK.
00483         }
00484         if (!b_it.at_last()) {
00485           BLOBNBOX* next = b_it.data_relative(1);
00486           if (dot_of_i(blob, next, row))
00487             continue;  // Looks OK.
00488         }
00489         // It might be noise so get rid of it.
00490         if (blob->blob() != NULL)
00491           delete blob->blob();
00492         if (blob->cblob() != NULL)
00493           delete blob->cblob();
00494         delete b_it.extract();
00495       } else {
00496         prev = blob;
00497       }
00498     }
00499   }
00500 }


Variable Documentation

const int kMinSize = 8

Min pixels to be xheight.

Definition at line 346 of file makerow.cpp.

Referenced by vigorous_noise_removal().

const double kNoiseSize = 0.5

Fraction of xheight.

Definition at line 344 of file makerow.cpp.

Referenced by vigorous_noise_removal().


Generated on Wed Feb 28 19:49:25 2007 for Tesseract by  doxygen 1.5.1