textord/oldbasel.cpp File Reference

#include "mfcpch.h"
#include "statistc.h"
#include "quadlsq.h"
#include "lmedsq.h"
#include "makerow.h"
#include "drawtord.h"
#include "oldbasel.h"
#include "tprintf.h"

Go to the source code of this file.

Defines

Functions

Variables


Define Documentation

#define ABS (  )     ((x)<0 ? (-(x)) : (x))

Definition at line 82 of file oldbasel.cpp.

Referenced by choose_partition(), get_ydiffs(), and make_first_baseline().

#define DELTAHEIGHT   5.0

Small amount of diff.

Definition at line 74 of file oldbasel.cpp.

#define DESCENDER_FRACTION   0.5

Descender/x-height.

Definition at line 56 of file oldbasel.cpp.

#define EXTERN

Note:
File: oldbasel.cpp (Formerly oldbl.c)
A reimplementation of the old baseline algorithm.
Author:
Ray Smith
Date:
Wed Oct 6 09:41:48 BST 1993
 * (C) Copyright 1993, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Definition at line 29 of file oldbasel.cpp.

#define GOODHEIGHT   5

Definition at line 76 of file oldbasel.cpp.

#define HEIGHTBUCKETS   200

Num of buckets.

Definition at line 72 of file oldbasel.cpp.

Referenced by make_first_xheight().

#define MAXBADRUN   2

Max non best for failed.

Definition at line 70 of file oldbasel.cpp.

Referenced by find_lesser_parts(), and merge_oldbl_parts().

#define MAXHEIGHT   300

Max blob height.

Definition at line 66 of file oldbasel.cpp.

Referenced by get_blob_coords(), and old_first_xheight().

#define MAXHEIGHTVARIANCE   0.15

Accepted variation in x-height.

Definition at line 64 of file oldbasel.cpp.

#define MAXLOOPS   10

Definition at line 77 of file oldbasel.cpp.

#define MAXOVERLAP   0.1

max 10% missed overlap

Definition at line 68 of file oldbasel.cpp.

Referenced by correlate_neighbours(), make_first_baseline(), and make_holed_baseline().

#define MAXPARTS   6

Definition at line 79 of file oldbasel.cpp.

Referenced by choose_partition(), find_lesser_parts(), find_textlines(), and partition_line().

#define MIN_ASC_FRACTION   0.20

Min size of ascenders.

Definition at line 58 of file oldbasel.cpp.

#define MIN_DESC_FRACTION   0.25

Min size of descenders.

Definition at line 60 of file oldbasel.cpp.

#define MINASCRISE   2.0

Min ascender/desc step.

Definition at line 62 of file oldbasel.cpp.

Referenced by find_lesser_parts(), and find_textlines().

#define MODENUM   10

Definition at line 78 of file oldbasel.cpp.

Referenced by make_first_xheight(), and pick_x_height().

#define SPLINESIZE   23

Definition at line 80 of file oldbasel.cpp.

#define TURNLIMIT   1

Min size for turning point.

Definition at line 52 of file oldbasel.cpp.

Referenced by segment_spline().

#define X_HEIGHT_FRACTION   0.7

x-height/caps height

Definition at line 54 of file oldbasel.cpp.


Function Documentation

int choose_partition ( register float  diff,
float  partdiffs[],
int  lastpart,
float  jumplimit,
int *  partcount 
)

Select partition.

Choose a partition for the point and return the index.

Definition at line 956 of file oldbasel.cpp.

References ABS, MAXPARTS, and tprintf().

Referenced by partition_line().

00962   {
00963   register int partition;        /*partition no */
00964   int bestpart;                  /*best new partition */
00965   float bestdelta;               /*best gap from a part */
00966   static float drift;            /*drift from spline */
00967   float delta;                   /*diff from part */
00968   static float lastdelta;        /*previous delta */
00969 
00970   if (lastpart < 0) {
00971     partdiffs[0] = diff;
00972     lastpart = 0;                /*first point */
00973     drift = 0.0f;
00974     lastdelta = 0.0f;
00975   }
00976                                  /*adjusted diff from part */
00977   delta = diff - partdiffs[lastpart] - drift;
00978   if (textord_oldbl_debug) {
00979     tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
00980   }
00981   if (ABS (delta) > jumplimit / 2) {
00982                                  /*delta on part 0 */
00983     bestdelta = diff - partdiffs[0] - drift;
00984     bestpart = 0;                /*0 best so far */
00985     for (partition = 1; partition < *partcount; partition++) {
00986       delta = diff - partdiffs[partition] - drift;
00987       if (ABS (delta) < ABS (bestdelta)) {
00988         bestdelta = delta;
00989         bestpart = partition;    /*part with nearest jump */
00990       }
00991     }
00992     delta = bestdelta;
00993                                  /*too far away */
00994     if (ABS (bestdelta) > jumplimit
00995     && *partcount < MAXPARTS) {  /*and spare part left */
00996       bestpart = (*partcount)++; /*best was new one */
00997                                  /*start new one */
00998       partdiffs[bestpart] = diff - drift;
00999       delta = 0.0f;
01000     }
01001   }
01002   else {
01003     bestpart = lastpart;         /*best was last one */
01004   }
01005 
01006   if (bestpart == lastpart
01007     && (ABS (delta - lastdelta) < jumplimit / 2
01008     || ABS (delta) < jumplimit / 2))
01009                                  /*smooth the drift */
01010     drift = (3 * drift + delta) / 3;
01011   lastdelta = delta;
01012 
01013   if (textord_oldbl_debug) {
01014     tprintf ("P=%d\n", bestpart);
01015   }
01016 
01017   return bestpart;
01018 }

void correlate_lines ( TO_BLOCK block  ) 

Cleanup lines.

Correlate the x-heights and ascender heights of a block to fill-in the ascender height and descender height for rows without one. Also fix baselines of rows without a decent fit.

Definition at line 127 of file oldbasel.cpp.

References alloc_mem(), correlate_neighbours(), correlate_with_stats(), and free_mem().

Referenced by make_old_baselines().

00129                       {
00130   TO_ROW **rows;                 //array of ptrs
00131   int rowcount;                  /*no of rows to do */
00132   register int rowindex;         /*no of row */
00133                                  //iterator
00134   TO_ROW_IT row_it = block->get_rows ();
00135 
00136   rowcount = row_it.length ();
00137   if (rowcount == 0) {
00138                                  //default value
00139     block->xheight = block->line_size;
00140     return;                      /*none to do */
00141   }
00142   rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
00143   rowindex = 0;
00144   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00145                                  //make array
00146     rows[rowindex++] = row_it.data ();
00147 
00148                                  /*try to fix bad lines */
00149   correlate_neighbours(block, rows, rowcount);
00150 
00151   block->xheight = (float) correlate_with_stats (rows, rowcount);
00152   /*use stats */
00153   if (block->xheight <= 0)
00154                                  //desperate
00155     block->xheight = block->line_size * textord_merge_x;
00156   if (block->xheight < textord_min_xheight)
00157     block->xheight = (float) textord_min_xheight;
00158 
00159   free_mem(rows);
00160 }

void correlate_neighbours ( TO_BLOCK block,
TO_ROW **  rows,
int  rowcount 
)

Fix bad rows.

Try to fix rows that had a bad spline fit by using neighbours.

Definition at line 168 of file oldbasel.cpp.

References TO_ROW::baseline, baseline, find_textlines(), MAXOVERLAP, QSPLINE::overlap(), and TO_ROW::xheight.

Referenced by correlate_lines().

00172                            {
00173   TO_ROW *row;                   /*current row */
00174   register int rowindex;         /*no of row */
00175   register int otherrow;         /*second row */
00176   int upperrow;                  /*row above to use */
00177   int lowerrow;                  /*row below to use */
00178   float biggest;
00179 
00180   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00181     row = rows[rowindex];        /*current row */
00182     if (row->xheight < 0) {
00183                                  /*quadratic failed */
00184       for (otherrow = rowindex - 2;
00185         otherrow >= 0
00186         && (rows[otherrow]->xheight < 0.0
00187         || !row->baseline.overlap (&rows[otherrow]->baseline,
00188         MAXOVERLAP)); otherrow--);
00189       upperrow = otherrow;       /*decent row above */
00190       for (otherrow = rowindex + 1;
00191         otherrow < rowcount
00192         && (rows[otherrow]->xheight < 0.0
00193         || !row->baseline.overlap (&rows[otherrow]->baseline,
00194         MAXOVERLAP)); otherrow++);
00195       lowerrow = otherrow;       /*decent row below */
00196       if (upperrow >= 0)
00197         find_textlines (block, row, 2, &rows[upperrow]->baseline);
00198       if (row->xheight < 0 && lowerrow < rowcount)
00199         find_textlines (block, row, 2, &rows[lowerrow]->baseline);
00200       if (row->xheight < 0) {
00201         if (upperrow >= 0)
00202           find_textlines (block, row, 1, &rows[upperrow]->baseline);
00203         else if (lowerrow < rowcount)
00204           find_textlines (block, row, 1, &rows[lowerrow]->baseline);
00205       }
00206     }
00207   }
00208 
00209   for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
00210     row = rows[rowindex];        /*current row */
00211     if (row->xheight < 0)        /*linear failed */
00212                                  /*make do */
00213         row->xheight = -row->xheight;
00214     biggest = MAX (biggest, row->xheight);
00215   }
00216 }

int correlate_with_stats ( TO_ROW **  rows,
int  rowcount 
)

Fix xheights.

Correlate the x-heights and ascender heights of a block to fill-in the ascender height and descender height for rows without one.

Definition at line 225 of file oldbasel.cpp.

References TO_ROW::ascrise, TO_ROW::descdrop, and TO_ROW::xheight.

Referenced by correlate_lines().

00228                           {
00229   TO_ROW *row;                   /*current row */
00230   register int rowindex;         /*no of row */
00231   float lineheight;              /*mean x-height */
00232   float ascheight;               /*average ascenders */
00233   float minascheight;            /*min allowed ascheight */
00234   int xcount;                    /*no of samples for xheight */
00235   float fullheight;              /*mean top height */
00236   int fullcount;                 /*no of samples */
00237   float descheight;              /*mean descender drop */
00238   float mindescheight;           /*min allowed descheight */
00239   int desccount;                 /*no of samples */
00240   float xshift;                  /*shift in xheight */
00241 
00242                                  /*no samples */
00243   xcount = fullcount = desccount = 0;
00244   lineheight = ascheight = fullheight = descheight = 0.0;
00245   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00246     row = rows[rowindex];        /*current row */
00247     if (row->ascrise > 0.0) {    /*got ascenders? */
00248       lineheight += row->xheight;/*average x-heights */
00249       ascheight += row->ascrise; /*average ascenders */
00250       xcount++;
00251     }
00252     else {
00253       fullheight += row->xheight;/*assume full height */
00254       fullcount++;
00255     }
00256     if (row->descdrop < 0.0) {   /*got descenders? */
00257                                  /*average descenders */
00258       descheight += row->descdrop;
00259       desccount++;
00260     }
00261   }
00262 
00263   if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
00264     lineheight /= xcount;        /*average x-height */
00265                                  /*average caps height */
00266     fullheight = lineheight + ascheight / xcount;
00267                                  /*must be decent size */
00268     if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
00269       fullheight = lineheight * (1 + MIN_ASC_FRACTION);
00270   }
00271   else {
00272     fullheight /= fullcount;     /*average max height */
00273                                  /*guess x-height */
00274     lineheight = fullheight * X_HEIGHT_FRACTION;
00275   }
00276   if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
00277     descheight /= desccount;     /*average descenders */
00278   else
00279                                  /*guess descenders */
00280     descheight = -lineheight * DESCENDER_FRACTION;
00281 
00282   minascheight = lineheight * MIN_ASC_FRACTION;
00283   mindescheight = -lineheight * MIN_DESC_FRACTION;
00284   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00285     row = rows[rowindex];        /*do each row */
00286     row->all_caps = FALSE;
00287     if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
00288     /*no ascenders */
00289       if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00290       && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
00291         row->ascrise = fullheight - lineheight;
00292                                  /*shift in x */
00293         xshift = lineheight - row->xheight;
00294                                  /*set to average */
00295         row->xheight = lineheight;
00296 
00297       }
00298       else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
00299       && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
00300         row->ascrise = row->xheight - lineheight;
00301         xshift = -row->ascrise;  /*shift in x */
00302                                  /*set to average */
00303         row->xheight = lineheight;
00304         row->all_caps = TRUE;
00305       }
00306       else {
00307         row->ascrise = (fullheight - lineheight) * row->xheight
00308           / fullheight;
00309         xshift = -row->ascrise;  /*shift in x */
00310                                  /*scale it */
00311         row->xheight -= row->ascrise;
00312         row->all_caps = TRUE;
00313       }
00314       if (row->ascrise < minascheight)
00315         row->ascrise =
00316           row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
00317     }
00318     if (row->descdrop > mindescheight) {
00319       if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00320         && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
00321                                  /*set to average */
00322           row->descdrop = descheight;
00323       else
00324         row->descdrop = -row->xheight * DESCENDER_FRACTION;
00325     }
00326   }
00327   return (int) lineheight;       //block xheight
00328 }

void find_lesser_parts ( TO_ROW row,
BOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  partcount,
int  bestpart 
)

Get descenders.

Average the step from the spline for the other partitions and find the commonest partition which has a descender.

Definition at line 1375 of file oldbasel.cpp.

References TO_ROW::baseline, BOX::bottom(), TO_ROW::descdrop, MAXBADRUN, MAXPARTS, MINASCRISE, TO_ROW::xheight, and QSPLINE::y().

Referenced by find_textlines().

01383   {
01384   register int blobindex;        /*index of blob */
01385   register int partition;        /*current partition */
01386   int xcentre;                   /*centre of blob */
01387   int poscount;                  /*count of best up step */
01388   int negcount;                  /*count of best down step */
01389   float partsteps[MAXPARTS];     /*average step to part */
01390   float bestpos;                 /*best up step */
01391   float bestneg;                 /*best down step */
01392   int runlength;                 /*length of bad run */
01393   int biggestrun;                /*biggest bad run */
01394 
01395   biggestrun = 0;
01396   for (partition = 0; partition < partcount; partition++)
01397     partsteps[partition] = 0.0;  /*zero accumulators */
01398   for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
01399     xcentre = (blobcoords[blobindex].left ()
01400       + blobcoords[blobindex].right ()) >> 1;
01401                                  /*in other parts */
01402     if (partids[blobindex] != bestpart) {
01403       runlength++;               /*run of non bests */
01404       if (runlength > biggestrun)
01405         biggestrun = runlength;
01406       partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
01407         - row->baseline.y (xcentre);
01408     }
01409     else
01410       runlength = 0;
01411   }
01412   if (biggestrun > MAXBADRUN)
01413     row->xheight = -1.0f;        /*failed */
01414   else
01415     row->xheight = 1.0f;         /*success */
01416   poscount = negcount = 0;
01417   bestpos = bestneg = 0.0;       /*no step yet */
01418   for (partition = 0; partition < partcount; partition++) {
01419     if (partition != bestpart) {
01420       partsteps[partition] /= partsizes[partition];
01421       if (partsteps[partition] >= MINASCRISE
01422       && partsizes[partition] > poscount) {
01423                                  /*ascender rise */
01424         bestpos = partsteps[partition];
01425                                  /*2nd most popular */
01426         poscount = partsizes[partition];
01427       }
01428       if (partsteps[partition] <= -MINASCRISE
01429       && partsizes[partition] > negcount) {
01430                                  /*ascender rise */
01431         bestneg = partsteps[partition];
01432                                  /*2nd most popular */
01433         negcount = partsizes[partition];
01434       }
01435     }
01436   }
01437                                  /*average x-height */
01438   partsteps[bestpart] /= blobcount;
01439   row->descdrop = bestneg;
01440 }

void find_textlines ( TO_BLOCK block,
TO_ROW row,
int  degree,
QSPLINE spline 
)

Get baseline.

Compute the baseline for the given row.

Definition at line 336 of file oldbasel.cpp.

References alloc_mem(), TO_ROW::ascrise, TO_ROW::baseline, TO_ROW::blob_list(), TO_ROW::descdrop, QSPLINE::extrapolate(), find_lesser_parts(), free_mem(), get_blob_coords(), GOLDENROD, TO_ROW::line_m(), make_first_baseline(), make_first_xheight(), make_holed_baseline(), MAXPARTS, MINASCRISE, old_first_xheight(), partition_coords(), partition_line(), QSPLINE::plot(), segment_spline(), SPLINESIZE, split_stepped_spline(), to_win, tprintf(), and TO_ROW::xheight.

Referenced by correlate_neighbours(), and make_old_baselines().

00341                      {
00342   int partcount;                 /*no of partitions of */
00343   BOOL8 holed_line;              //lost too many blobs
00344   int bestpart;                  /*biggest partition */
00345   char *partids;                 /*partition no of each blob */
00346   int partsizes[MAXPARTS];       /*no in each partition */
00347   int lineheight;                /*guessed x-height */
00348   float jumplimit;               /*allowed delta change */
00349   int *xcoords;                  /*useful sample points */
00350   int *ycoords;                  /*useful sample points */
00351   BOX *blobcoords;               /*edges of blob rectangles */
00352   int blobcount;                 /*no of blobs on line */
00353   float *ydiffs;                 /*diffs from 1st approx */
00354   int pointcount;                /*no of coords */
00355   int xstarts[SPLINESIZE + 1];   //segment boundaries
00356   int segments;                  //no of segments
00357 
00358                                  //no of blobs in row
00359   blobcount = row->blob_list ()->length ();
00360   partids = (char *) alloc_mem (blobcount * sizeof (char));
00361   xcoords = (int *) alloc_mem (blobcount * sizeof (int));
00362   ycoords = (int *) alloc_mem (blobcount * sizeof (int));
00363   blobcoords = (BOX *) alloc_mem (blobcount * sizeof (BOX));
00364   ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
00365 
00366   lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
00367     holed_line, blobcount);
00368                                  /*limit for line change */
00369   jumplimit = lineheight * textord_oldbl_jumplimit;
00370   if (jumplimit < MINASCRISE)
00371     jumplimit = MINASCRISE;
00372 
00373   if (textord_oldbl_debug) {
00374     tprintf
00375       ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
00376       block->line_size, lineheight, jumplimit);
00377   }
00378   if (holed_line)
00379     make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
00380       row->line_m ());
00381   else
00382     make_first_baseline (blobcoords, blobcount,
00383       xcoords, ycoords, spline, &row->baseline, jumplimit);
00384 #ifndef GRAPHICS_DISABLED
00385   if (textord_show_final_rows)
00386     row->baseline.plot (to_win, GOLDENROD);
00387 #endif
00388   if (blobcount > 1) {
00389     bestpart = partition_line (blobcoords, blobcount,
00390       &partcount, partids, partsizes,
00391       &row->baseline, jumplimit, ydiffs);
00392     pointcount = partition_coords (blobcoords, blobcount,
00393       partids, bestpart, xcoords, ycoords);
00394     segments = segment_spline (blobcoords, blobcount,
00395       xcoords, ycoords,
00396       degree, pointcount, xstarts);
00397     if (!holed_line) {
00398       do {
00399         row->baseline = QSPLINE (xstarts, segments,
00400           xcoords, ycoords, pointcount, degree);
00401       }
00402       while (textord_oldbl_split_splines
00403         && split_stepped_spline (&row->baseline, jumplimit / 2,
00404         xcoords, xstarts, segments));
00405     }
00406     find_lesser_parts(row,
00407                       blobcoords,
00408                       blobcount,
00409                       partids,
00410                       partsizes,
00411                       partcount,
00412                       bestpart);
00413 
00414   }
00415   else {
00416     row->xheight = -1.0f;        /*failed */
00417     row->descdrop = 0.0f;
00418     row->ascrise = 0.0f;
00419   }
00420   row->baseline.extrapolate (row->line_m (),
00421     block->block->bounding_box ().left (),
00422     block->block->bounding_box ().right ());
00423   if (textord_really_old_xheight)
00424     old_first_xheight (row, blobcoords, lineheight,
00425       blobcount, &row->baseline, jumplimit);
00426   else
00427     make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
00428       blobcount, &row->baseline, jumplimit);
00429   free_mem(partids);
00430   free_mem(xcoords);
00431   free_mem(ycoords);
00432   free_mem(blobcoords);
00433   free_mem(ydiffs);
00434 }

void find_top_modes ( STATS stats,
int  statnum,
int  modelist[],
int  modenum 
)

Get modes.

Fill the input array with the indices of the top ten modes of the input distribution.

Definition at line 1631 of file oldbasel.cpp.

References MAX_INT32, mode, and STATS::pile_count().

Referenced by make_first_xheight().

01635   {
01636   int mode_count;
01637   int last_i = 0;
01638   int last_max = MAX_INT32;
01639   int i;
01640   int mode;
01641   int total_max = 0;
01642 
01643   for (mode_count = 0; mode_count < modenum; mode_count++) {
01644     mode = 0;
01645     for (i = 0; i < statnum; i++) {
01646       if (stats->pile_count (i) > stats->pile_count (mode)) {
01647         if ((stats->pile_count (i) < last_max) ||
01648         ((stats->pile_count (i) == last_max) && (i > last_i))) {
01649           mode = i;
01650         }
01651       }
01652     }
01653     last_i = mode;
01654     last_max = stats->pile_count (last_i);
01655     total_max += last_max;
01656     if (last_max <= total_max / kMinModeFactor)
01657       mode = 0;
01658     modelist[mode_count] = mode;
01659   }
01660 }

int get_blob_coords ( TO_ROW row,
INT32  lineheight,
BOX blobcoords,
BOOL8 holed_line,
int &  outcount 
)

Get boxes.

Returns:
The first guess at the line height.
Fill the blobcoords array with the coordinates of the blobs in the row.

Definition at line 447 of file oldbasel.cpp.

References STATS::add(), TO_ROW::blob_list(), box_next_pre_chopped(), STATS::get_total(), BOX::height(), STATS::ile(), and MAXHEIGHT.

Referenced by find_textlines().

00453                      {
00454                                  //blobs
00455   BLOBNBOX_IT blob_it = row->blob_list ();
00456   register int blobindex;        /*no along text line */
00457   int losscount;                 //lost blobs
00458   int maxlosscount;              //greatest lost blobs
00459                                  /*height stat collection */
00460   STATS heightstat (0, MAXHEIGHT);
00461 
00462   if (blob_it.empty ())
00463     return 0;                    //none
00464   maxlosscount = 0;
00465   losscount = 0;
00466   blob_it.mark_cycle_pt ();
00467   blobindex = 0;
00468   do {
00469     blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
00470     if (blobcoords[blobindex].height () > lineheight * 0.25)
00471       heightstat.add (blobcoords[blobindex].height (), 1);
00472     if (blobindex == 0
00473       || blobcoords[blobindex].height () > lineheight * 0.25
00474     || blob_it.cycled_list ()) {
00475       blobindex++;               /*no of merged blobs */
00476       losscount = 0;
00477     }
00478     else {
00479       if (blobcoords[blobindex].height ()
00480         < blobcoords[blobindex].width () * oldbl_dot_error_size
00481         && blobcoords[blobindex].width ()
00482       < blobcoords[blobindex].height () * oldbl_dot_error_size) {
00483                                  //counts as dot
00484         blobindex++;
00485         losscount = 0;
00486       }
00487       else {
00488         losscount++;             //lost it
00489         if (losscount > maxlosscount)
00490                                  //remember max
00491             maxlosscount = losscount;
00492       }
00493     }
00494   }
00495   while (!blob_it.cycled_list ());
00496 
00497   holed_line = maxlosscount > oldbl_holed_losscount;
00498   outcount = blobindex;          /*total blobs */
00499 
00500   if (heightstat.get_total () > 1)
00501                                  /*guess x-height */
00502     return (int) heightstat.ile (0.25);
00503   else
00504     return blobcoords[0].height ();
00505 }

int get_ydiffs ( BOX  blobcoords[],
int  blobcount,
QSPLINE spline,
float  ydiffs[] 
)

Evaluate differences.

Returns:
The index of the blob in the middle of the "best behaved" region
Get the differences between the blobs and the spline, putting them in ydiffs.

Definition at line 906 of file oldbasel.cpp.

References ABS, BOX::bottom(), BOX::left(), MAX_INT32, QSPLINE::step(), and QSPLINE::y().

Referenced by partition_line().

00911   {
00912   register int blobindex;        /*current blob */
00913   int xcentre;                   /*xcoord */
00914   int lastx;                     /*last xcentre */
00915   float diffsum;                 /*sum of diffs */
00916   float diff;                    /*current difference */
00917   float drift;                   /*sum of spline steps */
00918   float bestsum;                 /*smallest diffsum */
00919   int bestindex;                 /*index of bestsum */
00920 
00921   diffsum = 0.0f;
00922   bestindex = 0;
00923   bestsum = (float) MAX_INT32;
00924   drift = 0.0f;
00925   lastx = blobcoords[0].left ();
00926                                  /*do each blob in row */
00927   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00928                                  /*centre of blob */
00929     xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
00930                                  //step functions in spline
00931     drift += spline->step (lastx, xcentre);
00932     lastx = xcentre;
00933     diff = blobcoords[blobindex].bottom ();
00934     diff -= spline->y (xcentre);
00935     diff += drift;
00936     ydiffs[blobindex] = diff;    /*store difference */
00937     if (blobindex > 2)
00938                                  /*remove old one */
00939       diffsum -= ABS (ydiffs[blobindex - 3]);
00940     diffsum += ABS (diff);       /*add new one */
00941     if (blobindex >= 2 && diffsum < bestsum) {
00942       bestsum = diffsum;         /*find min sum */
00943       bestindex = blobindex - 1; /*middle of set */
00944     }
00945   }
00946   return bestindex;
00947 }

void insert_spline_point ( int  xstarts[],
int  segment,
int  coord1,
int  coord2,
int &  segments 
)

Get descenders.

Insert a new spline point and shuffle up the others.

Definition at line 1352 of file oldbasel.cpp.

Referenced by split_stepped_spline().

01357   {
01358   int index;                     //for shuffling
01359 
01360   for (index = segments; index > segment; index--)
01361     xstarts[index + 1] = xstarts[index];
01362   segments++;
01363   xstarts[segment] = coord1;
01364   xstarts[segment + 1] = coord2;
01365 }

void make_first_baseline ( BOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
QSPLINE spline,
QSPLINE baseline,
float  jumplimit 
)

Initial approximation.

Make the first estimate at a baseline, either by shifting a supplied previous spline, or by doing a piecewise linear approximation using all the blobs.

Definition at line 516 of file oldbasel.cpp.

References ABS, baseline, BOX::bottom(), BOX::left(), MAXOVERLAP, NULL, BOX::right(), QSPLINE::segments, SPLINESIZE, QSPLINE::xcoords, and QSPLINE::y().

Referenced by find_textlines().

00524   {
00525   int leftedge;                  /*left edge of line */
00526   int rightedge;                 /*right edge of line */
00527   int blobindex;                 /*current blob */
00528   int segment;                   /*current segment */
00529   float prevy, thisy, nexty;     /*3 y coords */
00530   float y1, y2, y3;              /*3 smooth blobs */
00531   float maxmax, minmin;          /*absolute limits */
00532   int x2 = 0;                    /*right edge of old y3 */
00533   int ycount;                    /*no of ycoords in use */
00534   float yturns[SPLINESIZE];      /*y coords of turn pts */
00535   int xturns[SPLINESIZE];        /*xcoords of turn pts */
00536   int xstarts[SPLINESIZE + 1];
00537   int segments;                  //no of segments
00538   ICOORD shift;                  //shift of spline
00539 
00540   prevy = 0;
00541                                  /*left edge of row */
00542   leftedge = blobcoords[0].left ();
00543                                  /*right edge of line */
00544   rightedge = blobcoords[blobcount - 1].right ();
00545   if (spline == NULL             /*no given spline */
00546     || spline->segments < 3      /*or trivial */
00547                                  /*or too non-overlap */
00548     || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
00549     || spline->xcoords[spline->segments - 1] < rightedge
00550   - MAXOVERLAP * (rightedge - leftedge)) {
00551     if (textord_oldbl_paradef)
00552       return;                    //use default
00553     xstarts[0] = blobcoords[0].left () - 1;
00554     for (blobindex = 0; blobindex < blobcount; blobindex++) {
00555       xcoords[blobindex] = (blobcoords[blobindex].left ()
00556         + blobcoords[blobindex].right ()) / 2;
00557       ycoords[blobindex] = blobcoords[blobindex].bottom ();
00558     }
00559     xstarts[1] = blobcoords[blobcount - 1].right () + 1;
00560     segments = 1;                /*no of segments */
00561 
00562                                  /*linear */
00563     *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00564 
00565     if (blobcount >= 3) {
00566       y1 = y2 = y3 = 0.0f;
00567       ycount = 0;
00568       segment = 0;               /*no of segments */
00569       maxmax = minmin = 0.0f;
00570       thisy = ycoords[0] - baseline->y (xcoords[0]);
00571       nexty = ycoords[1] - baseline->y (xcoords[1]);
00572       for (blobindex = 2; blobindex < blobcount; blobindex++) {
00573         prevy = thisy;           /*shift ycoords */
00574         thisy = nexty;
00575         nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
00576                                  /*middle of smooth y */
00577         if (ABS (thisy - prevy) < jumplimit
00578          && ABS (thisy - nexty) < jumplimit) {
00579           y1 = y2;               /*shift window */
00580           y2 = y3;
00581           y3 = thisy;            /*middle point */
00582           ycount++;
00583                                  /*local max */
00584           if (ycount >= 3 && (y1 < y2 && y2 >= y3
00585                                  /*local min */
00586           || y1 > y2 && y2 <= y3)) {
00587             if (segment < SPLINESIZE - 2) {
00588                                  /*turning pt */
00589               xturns[segment] = x2;
00590               yturns[segment] = y2;
00591               segment++;         /*no of spline segs */
00592             }
00593           }
00594           if (ycount == 1) {
00595             maxmax = minmin = y3;/*initialise limits */
00596           }
00597           else {
00598             if (y3 > maxmax)
00599               maxmax = y3;       /*biggest max */
00600             if (y3 < minmin)
00601               minmin = y3;       /*smallest min */
00602           }
00603                                  /*possible turning pt */
00604           x2 = blobcoords[blobindex - 1].right ();
00605         }
00606       }
00607 
00608       jumplimit *= 1.2;
00609                                  /*must be wavy */
00610       if (maxmax - minmin > jumplimit) {
00611         ycount = segment;        /*no of segments */
00612         for (blobindex = 0, segment = 1; blobindex < ycount;
00613         blobindex++) {
00614           if (yturns[blobindex] > minmin + jumplimit
00615           || yturns[blobindex] < maxmax - jumplimit) {
00616                                  /*significant peak */
00617             if (segment == 1
00618               || yturns[blobindex] > prevy + jumplimit
00619             || yturns[blobindex] < prevy - jumplimit) {
00620                                  /*different to previous */
00621               xstarts[segment] = xturns[blobindex];
00622               segment++;
00623               prevy = yturns[blobindex];
00624             }
00625                                  /*bigger max */
00626             else if (prevy > minmin + jumplimit && yturns[blobindex] > prevy
00627                                  /*smaller min */
00628             || prevy < maxmax - jumplimit && yturns[blobindex] < prevy) {
00629               xstarts[segment - 1] = xturns[blobindex];
00630                                  /*improved previous */
00631               prevy = yturns[blobindex];
00632             }
00633           }
00634         }
00635         xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
00636         segments = segment;      /*no of segments */
00637                                  /*linear */
00638         *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00639       }
00640     }
00641   }
00642   else {
00643     *baseline = *spline;         /*copy it */
00644     shift = ICOORD (0, (INT16) (blobcoords[0].bottom ()
00645       - spline->y (blobcoords[0].right ())));
00646     baseline->move (shift);
00647   }
00648 }

void make_first_xheight ( TO_ROW row,
BOX  blobcoords[],
int  lineheight,
int  init_lineheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

Find xheight.

Makes an x-height spline by copying the baseline and shifting it. It estimates the x-height across the line to use as the shift. It also finds the ascender height if it can.

Definition at line 1536 of file oldbasel.cpp.

References STATS::add(), baseline, find_top_modes(), free_mem(), HEIGHTBUCKETS, make_height_array(), MODENUM, pick_x_height(), tprintf(), and TO_ROW::xheight.

Referenced by find_textlines().

01544   {
01545   int *heights;
01546   STATS heightstat (0, HEIGHTBUCKETS);
01547   int modelist[MODENUM];
01548   int blobindex;
01549   int mode_count;                //blobs to count in thr
01550   int sign_bit;
01551   int mode_threshold;
01552 
01553   sign_bit = row->xheight > 0 ? 1 : -1;
01554   heights = make_height_array (blobcoords, blobcount, baseline);
01555 
01556   mode_count = 0;
01557   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01558     if (heights[blobindex] > lineheight * oldbl_xhfract
01559       && blobcoords[blobindex].height () > init_lineheight * 0.25
01560       && heights[blobindex] > textord_min_xheight)
01561       heightstat.add (heights[blobindex], 1);
01562     if (blobcoords[blobindex].height () > init_lineheight * 0.25)
01563       mode_count++;
01564   }
01565 
01566   mode_threshold = (int) (blobcount * 0.1);
01567   if (oldbl_dot_error_size > 1 || oldbl_xhfix)
01568     mode_threshold = (int) (mode_count * 0.1);
01569 
01570   if (textord_oldbl_debug) {
01571     tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
01572       blobcount, mode_count, mode_threshold);
01573   }
01574   find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
01575   if (textord_oldbl_debug) {
01576     for (blobindex = 0; blobindex < MODENUM; blobindex++)
01577       tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
01578     tprintf ("\n");
01579   }
01580   pick_x_height(row, modelist, &heightstat, mode_threshold);
01581 
01582   if (textord_oldbl_debug)
01583     tprintf ("Output xheight=%g\n", row->xheight);
01584   if (row->xheight < 0 && textord_oldbl_debug)
01585     tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
01586 
01587   free_mem(heights);
01588 
01589   if (sign_bit < 0)
01590     row->xheight = -row->xheight;
01591 }

int* make_height_array ( BOX  blobcoords[],
int  blobcount,
QSPLINE baseline 
)

Get array of heights.

Create an array of the number of blobs and each of their heights.

Definition at line 1600 of file oldbasel.cpp.

References alloc_mem(), and baseline.

Referenced by make_first_xheight().

01604   {
01605   int blobindex;
01606   int xcenter;
01607   int *heights;
01608 
01609   heights = (int *) alloc_mem (sizeof (int) * blobcount);
01610 
01611   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01612     xcenter = (blobcoords[blobindex].left () +
01613       blobcoords[blobindex].right ()) / 2;
01614     heights[blobindex] = (int) (blobcoords[blobindex].top () -
01615       baseline->y (xcenter) + 0.5);
01616   }
01617 
01618   return (heights);
01619 }

void make_holed_baseline ( BOX  blobcoords[],
int  blobcount,
QSPLINE spline,
QSPLINE baseline,
float  gradient 
)

Initial approximation.

Make the first estimate at a baseline, either by shifting a supplied previous spline, or by doing a piecewise linear approximation using all the blobs.

Definition at line 659 of file oldbasel.cpp.

References LMS::add(), baseline, LMS::constrained_fit(), BOX::left(), MAXOVERLAP, NULL, BOX::right(), QSPLINE::segments, QSPLINE::xcoords, and QSPLINE::y().

Referenced by find_textlines().

00665   {
00666   int leftedge;                  /*left edge of line */
00667   int rightedge;                 /*right edge of line */
00668   int blobindex;                 /*current blob */
00669   float x;                       //centre of row
00670   ICOORD shift;                  //shift of spline
00671 
00672   LMS lms(blobcount);  //straight baseline
00673   INT32 xstarts[2];              //straight line
00674   double coeffs[3];
00675   float c;                       //line parameter
00676 
00677                                  /*left edge of row */
00678   leftedge = blobcoords[0].left ();
00679                                  /*right edge of line */
00680   rightedge = blobcoords[blobcount - 1].right ();
00681   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00682     lms.add (FCOORD ((blobcoords[blobindex].left () +
00683       blobcoords[blobindex].right ()) / 2.0,
00684       blobcoords[blobindex].bottom ()));
00685   }
00686   lms.constrained_fit (gradient, c);
00687   xstarts[0] = leftedge;
00688   xstarts[1] = rightedge;
00689   coeffs[0] = 0;
00690   coeffs[1] = gradient;
00691   coeffs[2] = c;
00692   *baseline = QSPLINE (1, xstarts, coeffs);
00693   if (spline != NULL             /*no given spline */
00694     && spline->segments >= 3     /*or trivial */
00695                                  /*or too non-overlap */
00696     && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
00697     && spline->xcoords[spline->segments - 1] >= rightedge
00698   - MAXOVERLAP * (rightedge - leftedge)) {
00699     *baseline = *spline;         /*copy it */
00700     x = (leftedge + rightedge) / 2.0;
00701     shift = ICOORD (0, (INT16) (gradient * x + c - spline->y (x)));
00702     baseline->move (shift);
00703   }
00704 }

void make_old_baselines ( TO_BLOCK block,
BOOL8  testing_on 
)

Make splines.

Top level function to make baselines the old way.

Definition at line 89 of file oldbasel.cpp.

References TO_ROW::baseline, TO_ROW::blob_list(), correlate_lines(), find_textlines(), NULL, tprintf(), and TO_ROW::xheight.

Referenced by make_spline_rows().

00092                          {
00093   QSPLINE *prev_baseline;        //baseline of previous row
00094   TO_ROW *row;                   //current row
00095   TO_ROW_IT row_it = block->get_rows ();
00096   BLOBNBOX_IT blob_it;
00097 
00098   prev_baseline = NULL;          //nothing yet
00099   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00100     row = row_it.data ();
00101     find_textlines (block, row, 2, NULL);
00102     if (row->xheight <= 0 && prev_baseline != NULL)
00103       find_textlines (block, row, 2, prev_baseline);
00104     if (row->xheight > 0)
00105                                  //was a good one
00106       prev_baseline = &row->baseline;
00107     else {
00108       prev_baseline = NULL;
00109       blob_it.set_to_list (row->blob_list ());
00110       if (textord_debug_baselines)
00111         tprintf ("Row baseline generation failed on row at (%d,%d)\n",
00112           blob_it.data ()->bounding_box ().left (),
00113           blob_it.data ()->bounding_box ().bottom ());
00114     }
00115   }
00116   correlate_lines(block);
00117 }

void merge_oldbl_parts ( BOX  blobcoords[],
int  blobcount,
char  partids[],
int  partsizes[],
int  biggestpart,
float  jumplimit 
)

Partition blobs.

For any adjacent group of blobs in a different part, put them in the main part if they fit closely to neighbours in the main part.

Definition at line 793 of file oldbasel.cpp.

References QLSQ::add(), QLSQ::clear(), FALSE, QLSQ::fit(), QLSQ::get_b(), QLSQ::get_c(), MAXBADRUN, tprintf(), and TRUE.

Referenced by partition_line().

00800   {
00801   BOOL8 found_one;               //found a bestpart blob
00802   BOOL8 close_one;               //found was close enough
00803   register int blobindex;        /*no along text line */
00804   int prevpart;                  //previous iteration
00805   int runlength;                 //no in this part
00806   float diff;                    /*difference from line */
00807   int startx;                    /*index of start blob */
00808   int test_blob;                 //another index
00809   FCOORD coord;                  //blob coordinate
00810   float m, c;                    //fitted line
00811   QLSQ stats;                    //line stuff
00812 
00813   prevpart = biggestpart;
00814   runlength = 0;
00815   startx = 0;
00816   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00817     if (partids[blobindex] != prevpart) {
00818       //tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
00819       //        blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
00820       //        prevpart,partids[blobindex],runlength);
00821       if (prevpart != biggestpart && runlength > MAXBADRUN) {
00822         stats.clear ();
00823         for (test_blob = startx; test_blob < blobindex; test_blob++) {
00824           coord = FCOORD ((blobcoords[test_blob].left ()
00825             + blobcoords[test_blob].right ()) / 2.0,
00826             blobcoords[test_blob].bottom ());
00827           stats.add (coord.x (), coord.y ());
00828         }
00829         stats.fit (1);
00830         m = stats.get_b ();
00831         c = stats.get_c ();
00832         if (textord_oldbl_debug)
00833           tprintf ("Fitted line y=%g x + %g\n", m, c);
00834         found_one = FALSE;
00835         close_one = FALSE;
00836         for (test_blob = 1; !found_one
00837           && (startx - test_blob >= 0
00838         || blobindex + test_blob <= blobcount); test_blob++) {
00839           if (startx - test_blob >= 0
00840           && partids[startx - test_blob] == biggestpart) {
00841             found_one = TRUE;
00842             coord = FCOORD ((blobcoords[startx - test_blob].left ()
00843               + blobcoords[startx -
00844               test_blob].right ()) /
00845               2.0,
00846               blobcoords[startx -
00847               test_blob].bottom ());
00848             diff = m * coord.x () + c - coord.y ();
00849             if (textord_oldbl_debug)
00850               tprintf
00851                 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00852                 diff, coord.x (), coord.y ());
00853             if (diff < jumplimit && -diff < jumplimit)
00854               close_one = TRUE;
00855           }
00856           if (blobindex + test_blob <= blobcount
00857           && partids[blobindex + test_blob - 1] == biggestpart) {
00858             found_one = TRUE;
00859             coord =
00860               FCOORD ((blobcoords[blobindex + test_blob - 1].
00861               left () + blobcoords[blobindex + test_blob -
00862               1].right ()) / 2.0,
00863               blobcoords[blobindex + test_blob -
00864               1].bottom ());
00865             diff = m * coord.x () + c - coord.y ();
00866             if (textord_oldbl_debug)
00867               tprintf
00868                 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00869                 diff, coord.x (), coord.y ());
00870             if (diff < jumplimit && -diff < jumplimit)
00871               close_one = TRUE;
00872           }
00873         }
00874         if (close_one) {
00875           if (textord_oldbl_debug)
00876             tprintf
00877               ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
00878               runlength, biggestpart, prevpart,
00879               blobcoords[startx].left (),
00880               blobcoords[startx].bottom ());
00881                                  //switch sides
00882           partsizes[prevpart] -= runlength;
00883           for (test_blob = startx; test_blob < blobindex; test_blob++)
00884             partids[test_blob] = biggestpart;
00885         }
00886       }
00887       prevpart = partids[blobindex];
00888       runlength = 1;
00889       startx = blobindex;
00890     }
00891     else
00892       runlength++;
00893   }
00894 }

void old_first_xheight ( TO_ROW row,
BOX  blobcoords[],
int  initialheight,
int  blobcount,
QSPLINE baseline,
float  jumplimit 
)

The Wise Owl way.

Makes an x-height spline by copying the baseline and shifting it. It estimates the x-height across the line to use as the shift. It also finds the ascender height if it can.

Definition at line 1451 of file oldbasel.cpp.

References STATS::add(), TO_ROW::ascrise, baseline, f, STATS::get_total(), STATS::ile(), BOX::left(), MAXHEIGHT, BOX::right(), BOX::top(), and TO_ROW::xheight.

Referenced by find_textlines().

01458   {
01459   register int blobindex;        /*current blob */
01460                                  /*height statistics */
01461   STATS heightstat (0, MAXHEIGHT);
01462   int height;                    /*height of blob */
01463   int xcentre;                   /*centre of blob */
01464   int lineheight;                /*approx xheight */
01465   float ascenders;               /*ascender sum */
01466   int asccount;                  /*no of ascenders */
01467   float xsum;                    /*xheight sum */
01468   int xcount;                    /*xheight count */
01469   register float diff;           /*height difference */
01470 
01471   if (blobcount > 1) {
01472     for (blobindex = 0; blobindex < blobcount; blobindex++) {
01473       xcentre = (blobcoords[blobindex].left ()
01474         + blobcoords[blobindex].right ()) / 2;
01475                                  /*height of blob */
01476       height = (int) (blobcoords[blobindex].top ()
01477        - baseline->y (xcentre) + 0.5);
01478       if (height > initialheight * oldbl_xhfract
01479         && height > textord_min_xheight)
01480         heightstat.add (height, 1);
01481     }
01482     if (heightstat.get_total () > 3) {
01483       lineheight = (int) heightstat.ile (0.25);
01484       if (lineheight <= 0)
01485         lineheight = (int) heightstat.ile (0.5);
01486     }
01487     else
01488       lineheight = initialheight;
01489   }
01490   else {
01491     lineheight = (int) (blobcoords[0].top ()
01492       - baseline->y ((blobcoords[0].left ()
01493       + blobcoords[0].right ()) / 2) +
01494       0.5);
01495   }
01496 
01497   xsum = 0.0f;
01498   xcount = 0;
01499   for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
01500   blobindex++) {
01501     xcentre = (blobcoords[blobindex].left ()
01502       + blobcoords[blobindex].right ()) / 2;
01503     diff = blobcoords[blobindex].top () - baseline->y (xcentre);
01504                                  /*is it ascender */
01505     if (diff > lineheight + jumplimit) {
01506       ascenders += diff;
01507       asccount++;                /*count ascenders */
01508     }
01509     else if (diff > lineheight - jumplimit) {
01510       xsum += diff;              /*mean xheight */
01511       xcount++;
01512     }
01513   }
01514   if (xcount > 0)
01515     xsum /= xcount;              /*average xheight */
01516   else
01517     xsum = (float) lineheight;   /*guess it */
01518   row->xheight *= xsum;
01519   if (asccount > 0)
01520     row->ascrise = ascenders / asccount - xsum;
01521   else
01522     row->ascrise = 0.0f;         /*had none */
01523   if (row->xheight == 0)
01524     row->xheight = -1.0f;
01525 }

int partition_coords ( BOX  blobcoords[],
int  blobcount,
char  partids[],
int  bestpart,
int  xcoords[],
int  ycoords[] 
)

*merge_partitions(partids,partcount,blobcount,bestpart) Find relevant coords

Returns:
The number of points found.
Get the x,y coordinates of all points in the bestpart and put them in xcoords,ycoords. Return the number of points found.

Definition at line 1084 of file oldbasel.cpp.

References BOX::bottom().

Referenced by find_textlines().

01091   {
01092   register int blobindex;        /*no along text line */
01093   int pointcount;                /*no of points */
01094 
01095   pointcount = 0;
01096   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01097     if (partids[blobindex] == bestpart) {
01098                                  /*centre of blob */
01099       xcoords[pointcount] = (blobcoords[blobindex].left () 
01100       + blobcoords[blobindex].right ()) >> 1;
01101       ycoords[pointcount++] = blobcoords[blobindex].bottom ();
01102     }
01103   }
01104   return pointcount;             /*no of points found */
01105 }

int partition_line ( BOX  blobcoords[],
int  blobcount,
int *  numparts,
char  partids[],
int  partsizes[],
QSPLINE spline,
float  jumplimit,
float  ydiffs[] 
)

Partition blobs.

Returns:
The biggest partition
Partition a row of blobs into different groups of continuous y position. jumplimit specifies the max allowable limit on a jump before a new partition is started.

Definition at line 717 of file oldbasel.cpp.

References choose_partition(), get_ydiffs(), MAXPARTS, merge_oldbl_parts(), and tprintf().

Referenced by find_textlines().

00726   {
00727   register int blobindex;        /*no along text line */
00728   int bestpart;                  /*best new partition */
00729   int biggestpart;               /*part with most members */
00730   float diff;                    /*difference from line */
00731   int startx;                    /*index of start blob */
00732   float partdiffs[MAXPARTS];     /*step between parts */
00733 
00734   for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
00735     partsizes[bestpart] = 0;     /*zero them all */
00736 
00737   startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
00738   *numparts = 1;                 /*1 partition */
00739   bestpart = -1;                 /*first point */
00740   for (blobindex = startx; blobindex < blobcount; blobindex++) {
00741   /*do each blob in row */
00742     diff = ydiffs[blobindex];    /*diff from line */
00743     if (textord_oldbl_debug) {
00744       tprintf ("%d(%d,%d), ", blobindex,
00745         blobcoords[blobindex].left (),
00746         blobcoords[blobindex].bottom ());
00747     }
00748     bestpart =
00749       choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00750                                  /*record partition */
00751     partids[blobindex] = bestpart;
00752     partsizes[bestpart]++;       /*another in it */
00753   }
00754 
00755   bestpart = -1;                 /*first point */
00756   partsizes[0]--;                /*doing 1st pt again */
00757                                  /*do each blob in row */
00758   for (blobindex = startx; blobindex >= 0; blobindex--) {
00759     diff = ydiffs[blobindex];    /*diff from line */
00760     if (textord_oldbl_debug) {
00761       tprintf ("%d(%d,%d), ", blobindex,
00762         blobcoords[blobindex].left (),
00763         blobcoords[blobindex].bottom ());
00764     }
00765     bestpart =
00766       choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00767                                  /*record partition */
00768     partids[blobindex] = bestpart;
00769     partsizes[bestpart]++;       /*another in it */
00770   }
00771 
00772   for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
00773     if (partsizes[bestpart] >= partsizes[biggestpart])
00774       biggestpart = bestpart;    /*new biggest */
00775   if (textord_oldbl_merge_parts)
00776     merge_oldbl_parts(blobcoords,
00777                       blobcount,
00778                       partids,
00779                       partsizes,
00780                       biggestpart,
00781                       jumplimit);
00782   return biggestpart;            /*biggest partition */
00783 }

void pick_x_height ( TO_ROW row,
int  modelist[],
STATS heightstat,
int  mode_threshold 
)

Find xheight.

Choose based on the height modes the best x height value.

Definition at line 1669 of file oldbasel.cpp.

References TO_ROW::ascrise, FALSE, MODENUM, STATS::pile_count(), TRUE, and TO_ROW::xheight.

Referenced by make_first_xheight().

01673                     {
01674   int x;
01675   int y;
01676   int z;
01677   float ratio;
01678   int found_one_bigger = FALSE;
01679   int best_x_height = 0;
01680   int best_asc = 0;
01681   int num_in_best;
01682 
01683   for (x = 0; x < MODENUM; x++) {
01684     for (y = 0; y < MODENUM; y++) {
01685       /* Check for two modes */
01686       if (modelist[x] && modelist[y] &&
01687       heightstat->pile_count (modelist[x]) > mode_threshold) {
01688         ratio = (float) modelist[y] / (float) modelist[x];
01689         if (1.2 < ratio && ratio < 1.8) {
01690           if (modelist[y] && modelist[x]) {
01691             /* Two modes found */
01692             best_x_height = modelist[x];
01693             num_in_best = heightstat->pile_count (modelist[x]);
01694 
01695             /* Try to get one higher */
01696             do {
01697               found_one_bigger = FALSE;
01698               for (z = 0; z < MODENUM; z++) {
01699                 if (modelist[z] == best_x_height + 1) {
01700                   ratio =
01701                     (float) modelist[y] / (float) modelist[z];
01702                   if ((1.2 < ratio && ratio < 1.8) &&
01703                                  /* Should be half of best */
01704                     heightstat->pile_count (modelist[z]) >
01705                   num_in_best * 0.5) {
01706                     best_x_height++;
01707                     found_one_bigger = TRUE;
01708                     break;
01709                   }
01710                 }
01711               }
01712             }
01713             while (found_one_bigger);
01714 
01715             /* try to get a higher ascender */
01716 
01717             best_asc = modelist[y];
01718             num_in_best = heightstat->pile_count (modelist[y]);
01719 
01720             /* Try to get one higher */
01721             do {
01722               found_one_bigger = FALSE;
01723               for (z = 0; z < MODENUM; z++) {
01724                 if (modelist[z] > best_asc) {
01725                   ratio =
01726                     (float) modelist[z] /
01727                     (float) best_x_height;
01728                   if ((1.2 < ratio && ratio < 1.8) &&
01729                                  /* Should be half of best */
01730                     heightstat->pile_count (modelist[z]) >
01731                   num_in_best * 0.5) {
01732                     best_asc = modelist[z];
01733                     found_one_bigger = TRUE;
01734                     break;
01735                   }
01736                 }
01737               }
01738             }
01739             while (found_one_bigger);
01740 
01741             row->xheight = (float) best_x_height;
01742             row->ascrise = (float) best_asc - best_x_height;
01743             return;
01744           }
01745         }
01746       }
01747     }
01748   }
01749 
01750   best_x_height = modelist[0];   /* Single Mode found */
01751   num_in_best = heightstat->pile_count (best_x_height);
01752   do {
01753                                  /* Try to get one higher */
01754     found_one_bigger = FALSE;
01755     for (z = 1; z < MODENUM; z++) {
01756       /* Should be half of best */
01757       if ((modelist[z] == best_x_height + 1) &&
01758       (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
01759         best_x_height++;
01760         found_one_bigger = TRUE;
01761         break;
01762       }
01763     }
01764   }
01765   while (found_one_bigger);
01766 
01767   row->ascrise = 0.0f;
01768   row->xheight = (float) best_x_height;
01769   if (row->xheight == 0)
01770     row->xheight = -1.0f;
01771 }

int segment_spline ( BOX  blobcoords[],
int  blobcount,
int  xcoords[],
int  ycoords[],
int  degree,
int  pointcount,
int  xstarts[] 
)

Make xstarts.

Segment the row at midpoints between maxima and minima of the x,y pairs. The xstarts of the segments are returned and the number found.

Definition at line 1115 of file oldbasel.cpp.

References SPLINESIZE, tprintf(), and TURNLIMIT.

Referenced by find_textlines().

01122   {
01123   register int ptindex;          /*no along text line */
01124   register int segment;          /*partition no */
01125   int lastmin, lastmax;          /*possible turn points */
01126   int turnpoints[SPLINESIZE];    /*good turning points */
01127   int turncount;                 /*no of turning points */
01128   int max_x;                     //max specified coord
01129 
01130   xstarts[0] = xcoords[0] - 1;   //leftmost defined pt
01131   max_x = xcoords[pointcount - 1] + 1;
01132   if (degree < 2)
01133     pointcount = 0;
01134   turncount = 0;                 /*no turning points yet */
01135   if (pointcount > 3) {
01136     ptindex = 1;
01137     lastmax = lastmin = 0;       /*start with first one */
01138     while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
01139                                  /*minimum */
01140       if (ycoords[ptindex - 1] > ycoords[ptindex] 
01141       && ycoords[ptindex] <= ycoords[ptindex + 1]) {
01142         if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
01143           if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
01144                                  /*new max point */
01145             turnpoints[turncount++] = lastmax;
01146           lastmin = ptindex;     /*latest minimum */
01147         }
01148         else if (ycoords[ptindex] < ycoords[lastmin]) {
01149           lastmin = ptindex;     /*lower minimum */
01150         }
01151       }
01152 
01153                                  /*maximum */
01154       if (ycoords[ptindex - 1] < ycoords[ptindex] 
01155       && ycoords[ptindex] >= ycoords[ptindex + 1]) {
01156         if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
01157           if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
01158                                  /*new min point */
01159             turnpoints[turncount++] = lastmin;
01160           lastmax = ptindex;     /*latest maximum */
01161         }
01162         else if (ycoords[ptindex] > ycoords[lastmax]) {
01163           lastmax = ptindex;     /*higher maximum */
01164         }
01165       }
01166       ptindex++;
01167     }
01168                                  /*possible global min */
01169     if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
01170     && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
01171       if (turncount < SPLINESIZE - 1)
01172                                  /*2 more turns */
01173         turnpoints[turncount++] = lastmax;
01174       if (turncount < SPLINESIZE - 1)
01175         turnpoints[turncount++] = ptindex;
01176     }
01177     else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
01178       /*possible global max */
01179     && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
01180       if (turncount < SPLINESIZE - 1)
01181                                  /*2 more turns */
01182         turnpoints[turncount++] = lastmin;
01183       if (turncount < SPLINESIZE - 1)
01184         turnpoints[turncount++] = ptindex;
01185     }
01186     else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
01187     && turncount < SPLINESIZE - 1) {
01188       if (ycoords[ptindex] > ycoords[lastmax])
01189         turnpoints[turncount++] = ptindex;
01190       else
01191         turnpoints[turncount++] = lastmax;
01192     }
01193     else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
01194     && turncount < SPLINESIZE - 1) {
01195       if (ycoords[ptindex] < ycoords[lastmin])
01196         turnpoints[turncount++] = ptindex;
01197       else
01198         turnpoints[turncount++] = lastmin;
01199     }
01200   }
01201 
01202   if (textord_oldbl_debug && turncount > 0)
01203     tprintf ("First turn is %d at (%d,%d)\n",
01204       turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
01205   for (segment = 1; segment < turncount; segment++) {
01206                                  /*centre y coord */
01207     lastmax = (ycoords[turnpoints[segment - 1]]
01208     + ycoords[turnpoints[segment]]) / 2;
01209 
01210     /* fix alg so that it works with both rising and falling sections */
01211     if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
01212                                  /*find rising y centre */
01213       for (ptindex = turnpoints[segment - 1] + 1;
01214        ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax;
01215        ptindex++);
01216     else
01217                                  /*find falling y centre */
01218       for (ptindex = turnpoints[segment - 1] + 1;
01219        ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax;
01220        ptindex++);
01221                                  /*centre x */
01222     xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
01223       + xcoords[turnpoints[segment - 1]]
01224       + xcoords[turnpoints[segment]] + 2) / 4;
01225     /*halfway between turns */
01226     if (textord_oldbl_debug)
01227       tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
01228         segment, turnpoints[segment],
01229         xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
01230         ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
01231   }
01232 
01233   xstarts[segment] = max_x;
01234   return segment;                /*no of splines */
01235 }

BOOL8 split_stepped_spline ( QSPLINE baseline,
float  jumplimit,
int  xcoords[],
int  xstarts[],
int &  segments 
)

Make xstarts.

Returns:
TRUE if any re-segments were done.
Re-segment the spline in cases where there is a big step function.

Definition at line 1246 of file oldbasel.cpp.

References baseline, FALSE, insert_spline_point(), SPLINESIZE, tprintf(), and TRUE.

Referenced by find_textlines().

01252   {
01253   BOOL8 doneany;                 //return value
01254   register int segment;          /*partition no */
01255   int startindex, centreindex, endindex;
01256   float leftcoord, rightcoord;
01257   int leftindex, rightindex;
01258   float step;                    //spline step
01259 
01260   doneany = FALSE;
01261   startindex = 0;
01262   for (segment = 1; segment < segments - 1; segment++) {
01263     step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
01264       (xstarts[segment] + xstarts[segment + 1]) / 2.0);
01265     if (step < 0)
01266       step = -step;
01267     if (step > jumplimit) {
01268       while (xcoords[startindex] < xstarts[segment - 1])
01269         startindex++;
01270       centreindex = startindex;
01271       while (xcoords[centreindex] < xstarts[segment])
01272         centreindex++;
01273       endindex = centreindex;
01274       while (xcoords[endindex] < xstarts[segment + 1])
01275         endindex++;
01276       if (segments >= SPLINESIZE) {
01277         if (textord_debug_baselines)
01278           tprintf ("Too many segments to resegment spline!!\n");
01279       }
01280       else if (endindex - startindex >= textord_spline_medianwin * 3) {
01281         while (centreindex - startindex <
01282           textord_spline_medianwin * 3 / 2)
01283           centreindex++;
01284         while (endindex - centreindex <
01285           textord_spline_medianwin * 3 / 2)
01286           centreindex--;
01287         leftindex = (startindex + startindex + centreindex) / 3;
01288         rightindex = (centreindex + endindex + endindex) / 3;
01289         leftcoord =
01290           (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
01291         rightcoord =
01292           (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
01293         while (xcoords[leftindex] > leftcoord
01294           && leftindex - startindex > textord_spline_medianwin)
01295           leftindex--;
01296         while (xcoords[leftindex] < leftcoord
01297           && centreindex - leftindex >
01298           textord_spline_medianwin / 2)
01299           leftindex++;
01300         if (xcoords[leftindex] - leftcoord >
01301           leftcoord - xcoords[leftindex - 1])
01302           leftindex--;
01303         while (xcoords[rightindex] > rightcoord
01304           && rightindex - centreindex >
01305           textord_spline_medianwin / 2)
01306           rightindex--;
01307         while (xcoords[rightindex] < rightcoord
01308           && endindex - rightindex > textord_spline_medianwin)
01309           rightindex++;
01310         if (xcoords[rightindex] - rightcoord >
01311           rightcoord - xcoords[rightindex - 1])
01312           rightindex--;
01313         if (textord_debug_baselines)
01314           tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
01315             xstarts[segment],
01316             baseline->
01317             step ((xstarts[segment - 1] +
01318             xstarts[segment]) / 2.0,
01319             (xstarts[segment] +
01320             xstarts[segment + 1]) / 2.0),
01321             (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
01322             (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
01323         insert_spline_point (xstarts, segment,
01324           (xcoords[leftindex - 1] +
01325           xcoords[leftindex]) / 2,
01326           (xcoords[rightindex - 1] +
01327           xcoords[rightindex]) / 2, segments);
01328         doneany = TRUE;
01329       }
01330       else if (textord_debug_baselines) {
01331         tprintf
01332           ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
01333           startindex, centreindex, endindex,
01334           (INT32) textord_spline_medianwin);
01335       }
01336     }
01337     //  else tprintf("Spline step at %d is %g\n",
01338     //               xstarts[segment],
01339     //               baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
01340     //               (xstarts[segment]+xstarts[segment+1])/2.0));
01341   }
01342   return doneany;
01343 }


Variable Documentation

const int kMinModeFactor = 12

Definition at line 1622 of file oldbasel.cpp.


Generated on Wed Feb 28 19:49:25 2007 for Tesseract by  doxygen 1.5.1