textord/oldbasel.cpp

Go to the documentation of this file.
00001 
00020 #include          "mfcpch.h"
00021 #include          "statistc.h"
00022 #include          "quadlsq.h"
00023 #include          "lmedsq.h"
00024 #include          "makerow.h"
00025 #include          "drawtord.h"
00026 #include          "oldbasel.h"
00027 #include          "tprintf.h"
00028 
00029 #define EXTERN
00030 
00033 EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
00034 "Use original wiseowl xheight");
00035 EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
00036 EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
00037 EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
00038 EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
00039 EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
00040 EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
00041 EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
00042 "Fix bug in modes threshold for xheights");
00043 EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
00044 EXTERN INT_VAR (oldbl_holed_losscount, 10,
00045 "Max lost before fallback line used");
00046 EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
00047 EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
00048 "X fraction for new partition");
00051 
00052 #define TURNLIMIT          1
00054 #define X_HEIGHT_FRACTION  0.7
00056 #define DESCENDER_FRACTION 0.5
00058 #define MIN_ASC_FRACTION   0.20
00060 #define MIN_DESC_FRACTION  0.25
00062 #define MINASCRISE         2.0
00064 #define MAXHEIGHTVARIANCE  0.15  
00066 #define MAXHEIGHT          300
00068 #define MAXOVERLAP         0.1
00070 #define MAXBADRUN          2
00072 #define HEIGHTBUCKETS      200
00074 #define DELTAHEIGHT        5.0
00075 
00076 #define GOODHEIGHT         5
00077 #define MAXLOOPS           10
00078 #define MODENUM            10
00079 #define MAXPARTS           6
00080 #define SPLINESIZE         23
00081 
00082 #define ABS(x) ((x)<0 ? (-(x)) : (x))
00083 
00089 void make_old_baselines(                  //make splines
00090                         TO_BLOCK *block,  //block to do
00091                         BOOL8 testing_on  //correct orientation
00092                        ) {
00093   QSPLINE *prev_baseline;        //baseline of previous row
00094   TO_ROW *row;                   //current row
00095   TO_ROW_IT row_it = block->get_rows ();
00096   BLOBNBOX_IT blob_it;
00097 
00098   prev_baseline = NULL;          //nothing yet
00099   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00100     row = row_it.data ();
00101     find_textlines (block, row, 2, NULL);
00102     if (row->xheight <= 0 && prev_baseline != NULL)
00103       find_textlines (block, row, 2, prev_baseline);
00104     if (row->xheight > 0)
00105                                  //was a good one
00106       prev_baseline = &row->baseline;
00107     else {
00108       prev_baseline = NULL;
00109       blob_it.set_to_list (row->blob_list ());
00110       if (textord_debug_baselines)
00111         tprintf ("Row baseline generation failed on row at (%d,%d)\n",
00112           blob_it.data ()->bounding_box ().left (),
00113           blob_it.data ()->bounding_box ().bottom ());
00114     }
00115   }
00116   correlate_lines(block);
00117 }
00118 
00119 
00127 void correlate_lines(                 //cleanup lines
00128                      TO_BLOCK *block  //block to do
00129                     ) {
00130   TO_ROW **rows;                 //array of ptrs
00131   int rowcount;                  /*no of rows to do */
00132   register int rowindex;         /*no of row */
00133                                  //iterator
00134   TO_ROW_IT row_it = block->get_rows ();
00135 
00136   rowcount = row_it.length ();
00137   if (rowcount == 0) {
00138                                  //default value
00139     block->xheight = block->line_size;
00140     return;                      /*none to do */
00141   }
00142   rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
00143   rowindex = 0;
00144   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00145                                  //make array
00146     rows[rowindex++] = row_it.data ();
00147 
00148                                  /*try to fix bad lines */
00149   correlate_neighbours(block, rows, rowcount);
00150 
00151   block->xheight = (float) correlate_with_stats (rows, rowcount);
00152   /*use stats */
00153   if (block->xheight <= 0)
00154                                  //desperate
00155     block->xheight = block->line_size * textord_merge_x;
00156   if (block->xheight < textord_min_xheight)
00157     block->xheight = (float) textord_min_xheight;
00158 
00159   free_mem(rows);
00160 }
00161 
00162 
00168 void correlate_neighbours(                  //fix bad rows
00169                           TO_BLOCK *block,  /*block rows are in */
00170                           TO_ROW **rows,    /*rows of block */
00171                           int rowcount      /*no of rows to do */
00172                          ) {
00173   TO_ROW *row;                   /*current row */
00174   register int rowindex;         /*no of row */
00175   register int otherrow;         /*second row */
00176   int upperrow;                  /*row above to use */
00177   int lowerrow;                  /*row below to use */
00178   float biggest;
00179 
00180   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00181     row = rows[rowindex];        /*current row */
00182     if (row->xheight < 0) {
00183                                  /*quadratic failed */
00184       for (otherrow = rowindex - 2;
00185         otherrow >= 0
00186         && (rows[otherrow]->xheight < 0.0
00187         || !row->baseline.overlap (&rows[otherrow]->baseline,
00188         MAXOVERLAP)); otherrow--);
00189       upperrow = otherrow;       /*decent row above */
00190       for (otherrow = rowindex + 1;
00191         otherrow < rowcount
00192         && (rows[otherrow]->xheight < 0.0
00193         || !row->baseline.overlap (&rows[otherrow]->baseline,
00194         MAXOVERLAP)); otherrow++);
00195       lowerrow = otherrow;       /*decent row below */
00196       if (upperrow >= 0)
00197         find_textlines (block, row, 2, &rows[upperrow]->baseline);
00198       if (row->xheight < 0 && lowerrow < rowcount)
00199         find_textlines (block, row, 2, &rows[lowerrow]->baseline);
00200       if (row->xheight < 0) {
00201         if (upperrow >= 0)
00202           find_textlines (block, row, 1, &rows[upperrow]->baseline);
00203         else if (lowerrow < rowcount)
00204           find_textlines (block, row, 1, &rows[lowerrow]->baseline);
00205       }
00206     }
00207   }
00208 
00209   for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
00210     row = rows[rowindex];        /*current row */
00211     if (row->xheight < 0)        /*linear failed */
00212                                  /*make do */
00213         row->xheight = -row->xheight;
00214     biggest = MAX (biggest, row->xheight);
00215   }
00216 }
00217 
00218 
00225 int correlate_with_stats(                //fix xheights
00226                          TO_ROW **rows,  /*rows of block */
00227                          int rowcount    /*no of rows to do */
00228                         ) {
00229   TO_ROW *row;                   /*current row */
00230   register int rowindex;         /*no of row */
00231   float lineheight;              /*mean x-height */
00232   float ascheight;               /*average ascenders */
00233   float minascheight;            /*min allowed ascheight */
00234   int xcount;                    /*no of samples for xheight */
00235   float fullheight;              /*mean top height */
00236   int fullcount;                 /*no of samples */
00237   float descheight;              /*mean descender drop */
00238   float mindescheight;           /*min allowed descheight */
00239   int desccount;                 /*no of samples */
00240   float xshift;                  /*shift in xheight */
00241 
00242                                  /*no samples */
00243   xcount = fullcount = desccount = 0;
00244   lineheight = ascheight = fullheight = descheight = 0.0;
00245   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00246     row = rows[rowindex];        /*current row */
00247     if (row->ascrise > 0.0) {    /*got ascenders? */
00248       lineheight += row->xheight;/*average x-heights */
00249       ascheight += row->ascrise; /*average ascenders */
00250       xcount++;
00251     }
00252     else {
00253       fullheight += row->xheight;/*assume full height */
00254       fullcount++;
00255     }
00256     if (row->descdrop < 0.0) {   /*got descenders? */
00257                                  /*average descenders */
00258       descheight += row->descdrop;
00259       desccount++;
00260     }
00261   }
00262 
00263   if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
00264     lineheight /= xcount;        /*average x-height */
00265                                  /*average caps height */
00266     fullheight = lineheight + ascheight / xcount;
00267                                  /*must be decent size */
00268     if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
00269       fullheight = lineheight * (1 + MIN_ASC_FRACTION);
00270   }
00271   else {
00272     fullheight /= fullcount;     /*average max height */
00273                                  /*guess x-height */
00274     lineheight = fullheight * X_HEIGHT_FRACTION;
00275   }
00276   if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
00277     descheight /= desccount;     /*average descenders */
00278   else
00279                                  /*guess descenders */
00280     descheight = -lineheight * DESCENDER_FRACTION;
00281 
00282   minascheight = lineheight * MIN_ASC_FRACTION;
00283   mindescheight = -lineheight * MIN_DESC_FRACTION;
00284   for (rowindex = 0; rowindex < rowcount; rowindex++) {
00285     row = rows[rowindex];        /*do each row */
00286     row->all_caps = FALSE;
00287     if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
00288     /*no ascenders */
00289       if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00290       && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
00291         row->ascrise = fullheight - lineheight;
00292                                  /*shift in x */
00293         xshift = lineheight - row->xheight;
00294                                  /*set to average */
00295         row->xheight = lineheight;
00296 
00297       }
00298       else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
00299       && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
00300         row->ascrise = row->xheight - lineheight;
00301         xshift = -row->ascrise;  /*shift in x */
00302                                  /*set to average */
00303         row->xheight = lineheight;
00304         row->all_caps = TRUE;
00305       }
00306       else {
00307         row->ascrise = (fullheight - lineheight) * row->xheight
00308           / fullheight;
00309         xshift = -row->ascrise;  /*shift in x */
00310                                  /*scale it */
00311         row->xheight -= row->ascrise;
00312         row->all_caps = TRUE;
00313       }
00314       if (row->ascrise < minascheight)
00315         row->ascrise =
00316           row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
00317     }
00318     if (row->descdrop > mindescheight) {
00319       if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00320         && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
00321                                  /*set to average */
00322           row->descdrop = descheight;
00323       else
00324         row->descdrop = -row->xheight * DESCENDER_FRACTION;
00325     }
00326   }
00327   return (int) lineheight;       //block xheight
00328 }
00329 
00330 
00336 void find_textlines(                  //get baseline
00337                     TO_BLOCK *block,  //block row is in
00338                     TO_ROW *row,      //row to do
00339                     int degree,       //required approximation
00340                     QSPLINE *spline   //starting spline
00341                    ) {
00342   int partcount;                 /*no of partitions of */
00343   BOOL8 holed_line;              //lost too many blobs
00344   int bestpart;                  /*biggest partition */
00345   char *partids;                 /*partition no of each blob */
00346   int partsizes[MAXPARTS];       /*no in each partition */
00347   int lineheight;                /*guessed x-height */
00348   float jumplimit;               /*allowed delta change */
00349   int *xcoords;                  /*useful sample points */
00350   int *ycoords;                  /*useful sample points */
00351   BOX *blobcoords;               /*edges of blob rectangles */
00352   int blobcount;                 /*no of blobs on line */
00353   float *ydiffs;                 /*diffs from 1st approx */
00354   int pointcount;                /*no of coords */
00355   int xstarts[SPLINESIZE + 1];   //segment boundaries
00356   int segments;                  //no of segments
00357 
00358                                  //no of blobs in row
00359   blobcount = row->blob_list ()->length ();
00360   partids = (char *) alloc_mem (blobcount * sizeof (char));
00361   xcoords = (int *) alloc_mem (blobcount * sizeof (int));
00362   ycoords = (int *) alloc_mem (blobcount * sizeof (int));
00363   blobcoords = (BOX *) alloc_mem (blobcount * sizeof (BOX));
00364   ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
00365 
00366   lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
00367     holed_line, blobcount);
00368                                  /*limit for line change */
00369   jumplimit = lineheight * textord_oldbl_jumplimit;
00370   if (jumplimit < MINASCRISE)
00371     jumplimit = MINASCRISE;
00372 
00373   if (textord_oldbl_debug) {
00374     tprintf
00375       ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
00376       block->line_size, lineheight, jumplimit);
00377   }
00378   if (holed_line)
00379     make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
00380       row->line_m ());
00381   else
00382     make_first_baseline (blobcoords, blobcount,
00383       xcoords, ycoords, spline, &row->baseline, jumplimit);
00384 #ifndef GRAPHICS_DISABLED
00385   if (textord_show_final_rows)
00386     row->baseline.plot (to_win, GOLDENROD);
00387 #endif
00388   if (blobcount > 1) {
00389     bestpart = partition_line (blobcoords, blobcount,
00390       &partcount, partids, partsizes,
00391       &row->baseline, jumplimit, ydiffs);
00392     pointcount = partition_coords (blobcoords, blobcount,
00393       partids, bestpart, xcoords, ycoords);
00394     segments = segment_spline (blobcoords, blobcount,
00395       xcoords, ycoords,
00396       degree, pointcount, xstarts);
00397     if (!holed_line) {
00398       do {
00399         row->baseline = QSPLINE (xstarts, segments,
00400           xcoords, ycoords, pointcount, degree);
00401       }
00402       while (textord_oldbl_split_splines
00403         && split_stepped_spline (&row->baseline, jumplimit / 2,
00404         xcoords, xstarts, segments));
00405     }
00406     find_lesser_parts(row,
00407                       blobcoords,
00408                       blobcount,
00409                       partids,
00410                       partsizes,
00411                       partcount,
00412                       bestpart);
00413 
00414   }
00415   else {
00416     row->xheight = -1.0f;        /*failed */
00417     row->descdrop = 0.0f;
00418     row->ascrise = 0.0f;
00419   }
00420   row->baseline.extrapolate (row->line_m (),
00421     block->block->bounding_box ().left (),
00422     block->block->bounding_box ().right ());
00423   if (textord_really_old_xheight)
00424     old_first_xheight (row, blobcoords, lineheight,
00425       blobcount, &row->baseline, jumplimit);
00426   else
00427     make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
00428       blobcount, &row->baseline, jumplimit);
00429   free_mem(partids);
00430   free_mem(xcoords);
00431   free_mem(ycoords);
00432   free_mem(blobcoords);
00433   free_mem(ydiffs);
00434 }
00435 
00436 
00447 int get_blob_coords(                    //get boxes
00448                     TO_ROW *row,        //row to use
00449                     INT32 lineheight,   //block level
00450                     BOX *blobcoords,    //ouput boxes
00451                     BOOL8 &holed_line,  //lost a lot of blobs
00452                     int &outcount       //no of real blobs
00453                    ) {
00454                                  //blobs
00455   BLOBNBOX_IT blob_it = row->blob_list ();
00456   register int blobindex;        /*no along text line */
00457   int losscount;                 //lost blobs
00458   int maxlosscount;              //greatest lost blobs
00459                                  /*height stat collection */
00460   STATS heightstat (0, MAXHEIGHT);
00461 
00462   if (blob_it.empty ())
00463     return 0;                    //none
00464   maxlosscount = 0;
00465   losscount = 0;
00466   blob_it.mark_cycle_pt ();
00467   blobindex = 0;
00468   do {
00469     blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
00470     if (blobcoords[blobindex].height () > lineheight * 0.25)
00471       heightstat.add (blobcoords[blobindex].height (), 1);
00472     if (blobindex == 0
00473       || blobcoords[blobindex].height () > lineheight * 0.25
00474     || blob_it.cycled_list ()) {
00475       blobindex++;               /*no of merged blobs */
00476       losscount = 0;
00477     }
00478     else {
00479       if (blobcoords[blobindex].height ()
00480         < blobcoords[blobindex].width () * oldbl_dot_error_size
00481         && blobcoords[blobindex].width ()
00482       < blobcoords[blobindex].height () * oldbl_dot_error_size) {
00483                                  //counts as dot
00484         blobindex++;
00485         losscount = 0;
00486       }
00487       else {
00488         losscount++;             //lost it
00489         if (losscount > maxlosscount)
00490                                  //remember max
00491             maxlosscount = losscount;
00492       }
00493     }
00494   }
00495   while (!blob_it.cycled_list ());
00496 
00497   holed_line = maxlosscount > oldbl_holed_losscount;
00498   outcount = blobindex;          /*total blobs */
00499 
00500   if (heightstat.get_total () > 1)
00501                                  /*guess x-height */
00502     return (int) heightstat.ile (0.25);
00503   else
00504     return blobcoords[0].height ();
00505 }
00506 
00507 
00515 void
00516 make_first_baseline (            //initial approximation
00517 BOX blobcoords[],                /*blob bounding boxes */
00518 int blobcount,                   /*no of blobcoords */
00519 int xcoords[],                   /*coords for spline */
00520 int ycoords[],                   /*approximator */
00521 QSPLINE * spline,                /*initial spline */
00522 QSPLINE * baseline,              /*output spline */
00523 float jumplimit                  /*guess half descenders */
00524 ) {
00525   int leftedge;                  /*left edge of line */
00526   int rightedge;                 /*right edge of line */
00527   int blobindex;                 /*current blob */
00528   int segment;                   /*current segment */
00529   float prevy, thisy, nexty;     /*3 y coords */
00530   float y1, y2, y3;              /*3 smooth blobs */
00531   float maxmax, minmin;          /*absolute limits */
00532   int x2 = 0;                    /*right edge of old y3 */
00533   int ycount;                    /*no of ycoords in use */
00534   float yturns[SPLINESIZE];      /*y coords of turn pts */
00535   int xturns[SPLINESIZE];        /*xcoords of turn pts */
00536   int xstarts[SPLINESIZE + 1];
00537   int segments;                  //no of segments
00538   ICOORD shift;                  //shift of spline
00539 
00540   prevy = 0;
00541                                  /*left edge of row */
00542   leftedge = blobcoords[0].left ();
00543                                  /*right edge of line */
00544   rightedge = blobcoords[blobcount - 1].right ();
00545   if (spline == NULL             /*no given spline */
00546     || spline->segments < 3      /*or trivial */
00547                                  /*or too non-overlap */
00548     || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
00549     || spline->xcoords[spline->segments - 1] < rightedge
00550   - MAXOVERLAP * (rightedge - leftedge)) {
00551     if (textord_oldbl_paradef)
00552       return;                    //use default
00553     xstarts[0] = blobcoords[0].left () - 1;
00554     for (blobindex = 0; blobindex < blobcount; blobindex++) {
00555       xcoords[blobindex] = (blobcoords[blobindex].left ()
00556         + blobcoords[blobindex].right ()) / 2;
00557       ycoords[blobindex] = blobcoords[blobindex].bottom ();
00558     }
00559     xstarts[1] = blobcoords[blobcount - 1].right () + 1;
00560     segments = 1;                /*no of segments */
00561 
00562                                  /*linear */
00563     *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00564 
00565     if (blobcount >= 3) {
00566       y1 = y2 = y3 = 0.0f;
00567       ycount = 0;
00568       segment = 0;               /*no of segments */
00569       maxmax = minmin = 0.0f;
00570       thisy = ycoords[0] - baseline->y (xcoords[0]);
00571       nexty = ycoords[1] - baseline->y (xcoords[1]);
00572       for (blobindex = 2; blobindex < blobcount; blobindex++) {
00573         prevy = thisy;           /*shift ycoords */
00574         thisy = nexty;
00575         nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
00576                                  /*middle of smooth y */
00577         if (ABS (thisy - prevy) < jumplimit
00578          && ABS (thisy - nexty) < jumplimit) {
00579           y1 = y2;               /*shift window */
00580           y2 = y3;
00581           y3 = thisy;            /*middle point */
00582           ycount++;
00583                                  /*local max */
00584           if (ycount >= 3 && (y1 < y2 && y2 >= y3
00585                                  /*local min */
00586           || y1 > y2 && y2 <= y3)) {
00587             if (segment < SPLINESIZE - 2) {
00588                                  /*turning pt */
00589               xturns[segment] = x2;
00590               yturns[segment] = y2;
00591               segment++;         /*no of spline segs */
00592             }
00593           }
00594           if (ycount == 1) {
00595             maxmax = minmin = y3;/*initialise limits */
00596           }
00597           else {
00598             if (y3 > maxmax)
00599               maxmax = y3;       /*biggest max */
00600             if (y3 < minmin)
00601               minmin = y3;       /*smallest min */
00602           }
00603                                  /*possible turning pt */
00604           x2 = blobcoords[blobindex - 1].right ();
00605         }
00606       }
00607 
00608       jumplimit *= 1.2;
00609                                  /*must be wavy */
00610       if (maxmax - minmin > jumplimit) {
00611         ycount = segment;        /*no of segments */
00612         for (blobindex = 0, segment = 1; blobindex < ycount;
00613         blobindex++) {
00614           if (yturns[blobindex] > minmin + jumplimit
00615           || yturns[blobindex] < maxmax - jumplimit) {
00616                                  /*significant peak */
00617             if (segment == 1
00618               || yturns[blobindex] > prevy + jumplimit
00619             || yturns[blobindex] < prevy - jumplimit) {
00620                                  /*different to previous */
00621               xstarts[segment] = xturns[blobindex];
00622               segment++;
00623               prevy = yturns[blobindex];
00624             }
00625                                  /*bigger max */
00626             else if (prevy > minmin + jumplimit && yturns[blobindex] > prevy
00627                                  /*smaller min */
00628             || prevy < maxmax - jumplimit && yturns[blobindex] < prevy) {
00629               xstarts[segment - 1] = xturns[blobindex];
00630                                  /*improved previous */
00631               prevy = yturns[blobindex];
00632             }
00633           }
00634         }
00635         xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
00636         segments = segment;      /*no of segments */
00637                                  /*linear */
00638         *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00639       }
00640     }
00641   }
00642   else {
00643     *baseline = *spline;         /*copy it */
00644     shift = ICOORD (0, (INT16) (blobcoords[0].bottom ()
00645       - spline->y (blobcoords[0].right ())));
00646     baseline->move (shift);
00647   }
00648 }
00649 
00650 
00658 void
00659 make_holed_baseline (            //initial approximation
00660 BOX blobcoords[],                /*blob bounding boxes */
00661 int blobcount,                   /*no of blobcoords */
00662 QSPLINE * spline,                /*initial spline */
00663 QSPLINE * baseline,              /*output spline */
00664 float gradient                   //of line
00665 ) {
00666   int leftedge;                  /*left edge of line */
00667   int rightedge;                 /*right edge of line */
00668   int blobindex;                 /*current blob */
00669   float x;                       //centre of row
00670   ICOORD shift;                  //shift of spline
00671 
00672   LMS lms(blobcount);  //straight baseline
00673   INT32 xstarts[2];              //straight line
00674   double coeffs[3];
00675   float c;                       //line parameter
00676 
00677                                  /*left edge of row */
00678   leftedge = blobcoords[0].left ();
00679                                  /*right edge of line */
00680   rightedge = blobcoords[blobcount - 1].right ();
00681   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00682     lms.add (FCOORD ((blobcoords[blobindex].left () +
00683       blobcoords[blobindex].right ()) / 2.0,
00684       blobcoords[blobindex].bottom ()));
00685   }
00686   lms.constrained_fit (gradient, c);
00687   xstarts[0] = leftedge;
00688   xstarts[1] = rightedge;
00689   coeffs[0] = 0;
00690   coeffs[1] = gradient;
00691   coeffs[2] = c;
00692   *baseline = QSPLINE (1, xstarts, coeffs);
00693   if (spline != NULL             /*no given spline */
00694     && spline->segments >= 3     /*or trivial */
00695                                  /*or too non-overlap */
00696     && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
00697     && spline->xcoords[spline->segments - 1] >= rightedge
00698   - MAXOVERLAP * (rightedge - leftedge)) {
00699     *baseline = *spline;         /*copy it */
00700     x = (leftedge + rightedge) / 2.0;
00701     shift = ICOORD (0, (INT16) (gradient * x + c - spline->y (x)));
00702     baseline->move (shift);
00703   }
00704 }
00705 
00706 
00716 int
00717 partition_line (                 //partition blobs
00718 BOX blobcoords[],                //bounding boxes
00719 int blobcount,                   /*no of blobs on row */
00720 int *numparts,                   /*number of partitions */
00721 char partids[],                  /*partition no of each blob */
00722 int partsizes[],                 /*no in each partition */
00723 QSPLINE * spline,                /*curve to fit to */
00724 float jumplimit,                 /*allowed delta change */
00725 float ydiffs[]                   /*diff from spline */
00726 ) {
00727   register int blobindex;        /*no along text line */
00728   int bestpart;                  /*best new partition */
00729   int biggestpart;               /*part with most members */
00730   float diff;                    /*difference from line */
00731   int startx;                    /*index of start blob */
00732   float partdiffs[MAXPARTS];     /*step between parts */
00733 
00734   for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
00735     partsizes[bestpart] = 0;     /*zero them all */
00736 
00737   startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
00738   *numparts = 1;                 /*1 partition */
00739   bestpart = -1;                 /*first point */
00740   for (blobindex = startx; blobindex < blobcount; blobindex++) {
00741   /*do each blob in row */
00742     diff = ydiffs[blobindex];    /*diff from line */
00743     if (textord_oldbl_debug) {
00744       tprintf ("%d(%d,%d), ", blobindex,
00745         blobcoords[blobindex].left (),
00746         blobcoords[blobindex].bottom ());
00747     }
00748     bestpart =
00749       choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00750                                  /*record partition */
00751     partids[blobindex] = bestpart;
00752     partsizes[bestpart]++;       /*another in it */
00753   }
00754 
00755   bestpart = -1;                 /*first point */
00756   partsizes[0]--;                /*doing 1st pt again */
00757                                  /*do each blob in row */
00758   for (blobindex = startx; blobindex >= 0; blobindex--) {
00759     diff = ydiffs[blobindex];    /*diff from line */
00760     if (textord_oldbl_debug) {
00761       tprintf ("%d(%d,%d), ", blobindex,
00762         blobcoords[blobindex].left (),
00763         blobcoords[blobindex].bottom ());
00764     }
00765     bestpart =
00766       choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00767                                  /*record partition */
00768     partids[blobindex] = bestpart;
00769     partsizes[bestpart]++;       /*another in it */
00770   }
00771 
00772   for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
00773     if (partsizes[bestpart] >= partsizes[biggestpart])
00774       biggestpart = bestpart;    /*new biggest */
00775   if (textord_oldbl_merge_parts)
00776     merge_oldbl_parts(blobcoords,
00777                       blobcount,
00778                       partids,
00779                       partsizes,
00780                       biggestpart,
00781                       jumplimit);
00782   return biggestpart;            /*biggest partition */
00783 }
00784 
00785 
00792 void
00793 merge_oldbl_parts (              //partition blobs
00794 BOX blobcoords[],                //bounding boxes
00795 int blobcount,                   /*no of blobs on row */
00796 char partids[],                  /*partition no of each blob */
00797 int partsizes[],                 /*no in each partition */
00798 int biggestpart,                 //major partition
00799 float jumplimit                  /*allowed delta change */
00800 ) {
00801   BOOL8 found_one;               //found a bestpart blob
00802   BOOL8 close_one;               //found was close enough
00803   register int blobindex;        /*no along text line */
00804   int prevpart;                  //previous iteration
00805   int runlength;                 //no in this part
00806   float diff;                    /*difference from line */
00807   int startx;                    /*index of start blob */
00808   int test_blob;                 //another index
00809   FCOORD coord;                  //blob coordinate
00810   float m, c;                    //fitted line
00811   QLSQ stats;                    //line stuff
00812 
00813   prevpart = biggestpart;
00814   runlength = 0;
00815   startx = 0;
00816   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00817     if (partids[blobindex] != prevpart) {
00818       //tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
00819       //        blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
00820       //        prevpart,partids[blobindex],runlength);
00821       if (prevpart != biggestpart && runlength > MAXBADRUN) {
00822         stats.clear ();
00823         for (test_blob = startx; test_blob < blobindex; test_blob++) {
00824           coord = FCOORD ((blobcoords[test_blob].left ()
00825             + blobcoords[test_blob].right ()) / 2.0,
00826             blobcoords[test_blob].bottom ());
00827           stats.add (coord.x (), coord.y ());
00828         }
00829         stats.fit (1);
00830         m = stats.get_b ();
00831         c = stats.get_c ();
00832         if (textord_oldbl_debug)
00833           tprintf ("Fitted line y=%g x + %g\n", m, c);
00834         found_one = FALSE;
00835         close_one = FALSE;
00836         for (test_blob = 1; !found_one
00837           && (startx - test_blob >= 0
00838         || blobindex + test_blob <= blobcount); test_blob++) {
00839           if (startx - test_blob >= 0
00840           && partids[startx - test_blob] == biggestpart) {
00841             found_one = TRUE;
00842             coord = FCOORD ((blobcoords[startx - test_blob].left ()
00843               + blobcoords[startx -
00844               test_blob].right ()) /
00845               2.0,
00846               blobcoords[startx -
00847               test_blob].bottom ());
00848             diff = m * coord.x () + c - coord.y ();
00849             if (textord_oldbl_debug)
00850               tprintf
00851                 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00852                 diff, coord.x (), coord.y ());
00853             if (diff < jumplimit && -diff < jumplimit)
00854               close_one = TRUE;
00855           }
00856           if (blobindex + test_blob <= blobcount
00857           && partids[blobindex + test_blob - 1] == biggestpart) {
00858             found_one = TRUE;
00859             coord =
00860               FCOORD ((blobcoords[blobindex + test_blob - 1].
00861               left () + blobcoords[blobindex + test_blob -
00862               1].right ()) / 2.0,
00863               blobcoords[blobindex + test_blob -
00864               1].bottom ());
00865             diff = m * coord.x () + c - coord.y ();
00866             if (textord_oldbl_debug)
00867               tprintf
00868                 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00869                 diff, coord.x (), coord.y ());
00870             if (diff < jumplimit && -diff < jumplimit)
00871               close_one = TRUE;
00872           }
00873         }
00874         if (close_one) {
00875           if (textord_oldbl_debug)
00876             tprintf
00877               ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
00878               runlength, biggestpart, prevpart,
00879               blobcoords[startx].left (),
00880               blobcoords[startx].bottom ());
00881                                  //switch sides
00882           partsizes[prevpart] -= runlength;
00883           for (test_blob = startx; test_blob < blobindex; test_blob++)
00884             partids[test_blob] = biggestpart;
00885         }
00886       }
00887       prevpart = partids[blobindex];
00888       runlength = 1;
00889       startx = blobindex;
00890     }
00891     else
00892       runlength++;
00893   }
00894 }
00895 
00896 
00905 int
00906 get_ydiffs (                     //evaluate differences
00907 BOX blobcoords[],                //bounding boxes
00908 int blobcount,                   /*no of blobs */
00909 QSPLINE * spline,                /*approximating spline */
00910 float ydiffs[]                   /*output */
00911 ) {
00912   register int blobindex;        /*current blob */
00913   int xcentre;                   /*xcoord */
00914   int lastx;                     /*last xcentre */
00915   float diffsum;                 /*sum of diffs */
00916   float diff;                    /*current difference */
00917   float drift;                   /*sum of spline steps */
00918   float bestsum;                 /*smallest diffsum */
00919   int bestindex;                 /*index of bestsum */
00920 
00921   diffsum = 0.0f;
00922   bestindex = 0;
00923   bestsum = (float) MAX_INT32;
00924   drift = 0.0f;
00925   lastx = blobcoords[0].left ();
00926                                  /*do each blob in row */
00927   for (blobindex = 0; blobindex < blobcount; blobindex++) {
00928                                  /*centre of blob */
00929     xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
00930                                  //step functions in spline
00931     drift += spline->step (lastx, xcentre);
00932     lastx = xcentre;
00933     diff = blobcoords[blobindex].bottom ();
00934     diff -= spline->y (xcentre);
00935     diff += drift;
00936     ydiffs[blobindex] = diff;    /*store difference */
00937     if (blobindex > 2)
00938                                  /*remove old one */
00939       diffsum -= ABS (ydiffs[blobindex - 3]);
00940     diffsum += ABS (diff);       /*add new one */
00941     if (blobindex >= 2 && diffsum < bestsum) {
00942       bestsum = diffsum;         /*find min sum */
00943       bestindex = blobindex - 1; /*middle of set */
00944     }
00945   }
00946   return bestindex;
00947 }
00948 
00949 
00955 int
00956 choose_partition (               //select partition
00957 register float diff,             /*diff from spline */
00958 float partdiffs[],               /*diff on all parts */
00959 int lastpart,                    /*last assigned partition */
00960 float jumplimit,                 /*new part threshold */
00961 int *partcount                   /*no of partitions */
00962 ) {
00963   register int partition;        /*partition no */
00964   int bestpart;                  /*best new partition */
00965   float bestdelta;               /*best gap from a part */
00966   static float drift;            /*drift from spline */
00967   float delta;                   /*diff from part */
00968   static float lastdelta;        /*previous delta */
00969 
00970   if (lastpart < 0) {
00971     partdiffs[0] = diff;
00972     lastpart = 0;                /*first point */
00973     drift = 0.0f;
00974     lastdelta = 0.0f;
00975   }
00976                                  /*adjusted diff from part */
00977   delta = diff - partdiffs[lastpart] - drift;
00978   if (textord_oldbl_debug) {
00979     tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
00980   }
00981   if (ABS (delta) > jumplimit / 2) {
00982                                  /*delta on part 0 */
00983     bestdelta = diff - partdiffs[0] - drift;
00984     bestpart = 0;                /*0 best so far */
00985     for (partition = 1; partition < *partcount; partition++) {
00986       delta = diff - partdiffs[partition] - drift;
00987       if (ABS (delta) < ABS (bestdelta)) {
00988         bestdelta = delta;
00989         bestpart = partition;    /*part with nearest jump */
00990       }
00991     }
00992     delta = bestdelta;
00993                                  /*too far away */
00994     if (ABS (bestdelta) > jumplimit
00995     && *partcount < MAXPARTS) {  /*and spare part left */
00996       bestpart = (*partcount)++; /*best was new one */
00997                                  /*start new one */
00998       partdiffs[bestpart] = diff - drift;
00999       delta = 0.0f;
01000     }
01001   }
01002   else {
01003     bestpart = lastpart;         /*best was last one */
01004   }
01005 
01006   if (bestpart == lastpart
01007     && (ABS (delta - lastdelta) < jumplimit / 2
01008     || ABS (delta) < jumplimit / 2))
01009                                  /*smooth the drift */
01010     drift = (3 * drift + delta) / 3;
01011   lastdelta = delta;
01012 
01013   if (textord_oldbl_debug) {
01014     tprintf ("P=%d\n", bestpart);
01015   }
01016 
01017   return bestpart;
01018 }
01019 
01020 
01022 //discards funny looking partitions and gives all the rest partid 0*/
01023 //
01024 //merge_partitions(partids,partcount,blobcount,bestpart)
01025 //register char              *partids;                     /*partition numbers*/
01026 //int                        partcount;                    /*no of partitions*/
01027 //int                        blobcount;                    /*no of blobs*/
01028 //int                        bestpart;                     /*best partition*/
01029 //{
01030 //   register int            blobindex;                    /*no along text line*/
01031 //   int                     runlength;                    /*run of same partition*/
01032 //   int                     bestrun;                      /*biggest runlength*/
01033 //
01034 //   bestrun=0;                                            /*no runs yet*/
01035 //   runlength=1;
01036 //   for (blobindex=1;blobindex<blobcount;blobindex++)
01037 //   {  if (partids[blobindex]!=partids[blobindex-1])
01038 //      {  if (runlength>bestrun)
01039 //            bestrun=runlength;                           /*find biggest run*/
01040 //         runlength=1;                                    /*new run*/
01041 //      }
01042 //      else
01043 //      {  runlength++;
01044 //      }
01045 //   }
01046 //   if (runlength>bestrun)
01047 //      bestrun=runlength;
01048 //
01049 //   for (blobindex=0;blobindex<blobcount;blobindex++)
01050 //   {  if (blobindex<1
01051 //      || partids[blobindex]!=partids[blobindex-1])
01052 //      {  if ((blobindex+1>=blobcount
01053 //         || partids[blobindex]!=partids[blobindex+1])
01054 //                                                         /*loner*/
01055 //         && (bestrun>2 || partids[blobindex]!=bestpart))
01056 //         {  partids[blobindex]=partcount;                /*discard loner*/
01057 //         }
01058 //         else if (blobindex+1<blobcount
01059 //         && partids[blobindex]==partids[blobindex+1]
01060 //                                                         /*pair*/
01061 //         && (blobindex+2>=blobcount
01062 //         || partids[blobindex]!=partids[blobindex+2])
01063 //         && (bestrun>3 || partids[blobindex]!=bestpart))
01064 //         {  partids[blobindex]=partcount;                /*discard both*/
01065 //            partids[blobindex+1]=partcount;
01066 //         }
01067 //      }
01068 //   }
01069 //   for (blobindex=0;blobindex<blobcount;blobindex++)
01070 //   {  if (partids[blobindex]<partcount)
01071 //         partids[blobindex]=0;                           /*all others together*/
01072 //   }
01073 //}
01074 
01083 int
01084 partition_coords (               //find relevant coords
01085 BOX blobcoords[],                //bounding boxes
01086 int blobcount,                   /*no of blobs in row */
01087 char partids[],                  /*partition no of each blob */
01088 int bestpart,                    /*best new partition */
01089 int xcoords[],                   /*points to work on */
01090 int ycoords[]                    /*points to work on */
01091 ) {
01092   register int blobindex;        /*no along text line */
01093   int pointcount;                /*no of points */
01094 
01095   pointcount = 0;
01096   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01097     if (partids[blobindex] == bestpart) {
01098                                  /*centre of blob */
01099       xcoords[pointcount] = (blobcoords[blobindex].left () 
01100       + blobcoords[blobindex].right ()) >> 1;
01101       ycoords[pointcount++] = blobcoords[blobindex].bottom ();
01102     }
01103   }
01104   return pointcount;             /*no of points found */
01105 }
01106 
01107 
01114 int
01115 segment_spline (                 //make xstarts
01116 BOX blobcoords[],                //boundign boxes
01117 int blobcount,                   /*no of blobs in row */
01118 int xcoords[],                   /*points to work on */
01119 int ycoords[],                   /*points to work on */
01120 int degree, int pointcount,      /*no of points */
01121 int xstarts[]                    //result
01122 ) {
01123   register int ptindex;          /*no along text line */
01124   register int segment;          /*partition no */
01125   int lastmin, lastmax;          /*possible turn points */
01126   int turnpoints[SPLINESIZE];    /*good turning points */
01127   int turncount;                 /*no of turning points */
01128   int max_x;                     //max specified coord
01129 
01130   xstarts[0] = xcoords[0] - 1;   //leftmost defined pt
01131   max_x = xcoords[pointcount - 1] + 1;
01132   if (degree < 2)
01133     pointcount = 0;
01134   turncount = 0;                 /*no turning points yet */
01135   if (pointcount > 3) {
01136     ptindex = 1;
01137     lastmax = lastmin = 0;       /*start with first one */
01138     while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
01139                                  /*minimum */
01140       if (ycoords[ptindex - 1] > ycoords[ptindex] 
01141       && ycoords[ptindex] <= ycoords[ptindex + 1]) {
01142         if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
01143           if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
01144                                  /*new max point */
01145             turnpoints[turncount++] = lastmax;
01146           lastmin = ptindex;     /*latest minimum */
01147         }
01148         else if (ycoords[ptindex] < ycoords[lastmin]) {
01149           lastmin = ptindex;     /*lower minimum */
01150         }
01151       }
01152 
01153                                  /*maximum */
01154       if (ycoords[ptindex - 1] < ycoords[ptindex] 
01155       && ycoords[ptindex] >= ycoords[ptindex + 1]) {
01156         if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
01157           if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
01158                                  /*new min point */
01159             turnpoints[turncount++] = lastmin;
01160           lastmax = ptindex;     /*latest maximum */
01161         }
01162         else if (ycoords[ptindex] > ycoords[lastmax]) {
01163           lastmax = ptindex;     /*higher maximum */
01164         }
01165       }
01166       ptindex++;
01167     }
01168                                  /*possible global min */
01169     if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
01170     && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
01171       if (turncount < SPLINESIZE - 1)
01172                                  /*2 more turns */
01173         turnpoints[turncount++] = lastmax;
01174       if (turncount < SPLINESIZE - 1)
01175         turnpoints[turncount++] = ptindex;
01176     }
01177     else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
01178       /*possible global max */
01179     && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
01180       if (turncount < SPLINESIZE - 1)
01181                                  /*2 more turns */
01182         turnpoints[turncount++] = lastmin;
01183       if (turncount < SPLINESIZE - 1)
01184         turnpoints[turncount++] = ptindex;
01185     }
01186     else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
01187     && turncount < SPLINESIZE - 1) {
01188       if (ycoords[ptindex] > ycoords[lastmax])
01189         turnpoints[turncount++] = ptindex;
01190       else
01191         turnpoints[turncount++] = lastmax;
01192     }
01193     else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
01194     && turncount < SPLINESIZE - 1) {
01195       if (ycoords[ptindex] < ycoords[lastmin])
01196         turnpoints[turncount++] = ptindex;
01197       else
01198         turnpoints[turncount++] = lastmin;
01199     }
01200   }
01201 
01202   if (textord_oldbl_debug && turncount > 0)
01203     tprintf ("First turn is %d at (%d,%d)\n",
01204       turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
01205   for (segment = 1; segment < turncount; segment++) {
01206                                  /*centre y coord */
01207     lastmax = (ycoords[turnpoints[segment - 1]]
01208     + ycoords[turnpoints[segment]]) / 2;
01209 
01210     /* fix alg so that it works with both rising and falling sections */
01211     if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
01212                                  /*find rising y centre */
01213       for (ptindex = turnpoints[segment - 1] + 1;
01214        ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax;
01215        ptindex++);
01216     else
01217                                  /*find falling y centre */
01218       for (ptindex = turnpoints[segment - 1] + 1;
01219        ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax;
01220        ptindex++);
01221                                  /*centre x */
01222     xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
01223       + xcoords[turnpoints[segment - 1]]
01224       + xcoords[turnpoints[segment]] + 2) / 4;
01225     /*halfway between turns */
01226     if (textord_oldbl_debug)
01227       tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
01228         segment, turnpoints[segment],
01229         xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
01230         ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
01231   }
01232 
01233   xstarts[segment] = max_x;
01234   return segment;                /*no of splines */
01235 }
01236 
01237 
01245 BOOL8
01246 split_stepped_spline (           //make xstarts
01247 QSPLINE * baseline,              //current shot
01248 float jumplimit,                 //max step fuction
01249 int xcoords[],                   /*points to work on */
01250 int xstarts[],                   //result
01251 int &segments                    //no of segments
01252 ) {
01253   BOOL8 doneany;                 //return value
01254   register int segment;          /*partition no */
01255   int startindex, centreindex, endindex;
01256   float leftcoord, rightcoord;
01257   int leftindex, rightindex;
01258   float step;                    //spline step
01259 
01260   doneany = FALSE;
01261   startindex = 0;
01262   for (segment = 1; segment < segments - 1; segment++) {
01263     step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
01264       (xstarts[segment] + xstarts[segment + 1]) / 2.0);
01265     if (step < 0)
01266       step = -step;
01267     if (step > jumplimit) {
01268       while (xcoords[startindex] < xstarts[segment - 1])
01269         startindex++;
01270       centreindex = startindex;
01271       while (xcoords[centreindex] < xstarts[segment])
01272         centreindex++;
01273       endindex = centreindex;
01274       while (xcoords[endindex] < xstarts[segment + 1])
01275         endindex++;
01276       if (segments >= SPLINESIZE) {
01277         if (textord_debug_baselines)
01278           tprintf ("Too many segments to resegment spline!!\n");
01279       }
01280       else if (endindex - startindex >= textord_spline_medianwin * 3) {
01281         while (centreindex - startindex <
01282           textord_spline_medianwin * 3 / 2)
01283           centreindex++;
01284         while (endindex - centreindex <
01285           textord_spline_medianwin * 3 / 2)
01286           centreindex--;
01287         leftindex = (startindex + startindex + centreindex) / 3;
01288         rightindex = (centreindex + endindex + endindex) / 3;
01289         leftcoord =
01290           (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
01291         rightcoord =
01292           (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
01293         while (xcoords[leftindex] > leftcoord
01294           && leftindex - startindex > textord_spline_medianwin)
01295           leftindex--;
01296         while (xcoords[leftindex] < leftcoord
01297           && centreindex - leftindex >
01298           textord_spline_medianwin / 2)
01299           leftindex++;
01300         if (xcoords[leftindex] - leftcoord >
01301           leftcoord - xcoords[leftindex - 1])
01302           leftindex--;
01303         while (xcoords[rightindex] > rightcoord
01304           && rightindex - centreindex >
01305           textord_spline_medianwin / 2)
01306           rightindex--;
01307         while (xcoords[rightindex] < rightcoord
01308           && endindex - rightindex > textord_spline_medianwin)
01309           rightindex++;
01310         if (xcoords[rightindex] - rightcoord >
01311           rightcoord - xcoords[rightindex - 1])
01312           rightindex--;
01313         if (textord_debug_baselines)
01314           tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
01315             xstarts[segment],
01316             baseline->
01317             step ((xstarts[segment - 1] +
01318             xstarts[segment]) / 2.0,
01319             (xstarts[segment] +
01320             xstarts[segment + 1]) / 2.0),
01321             (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
01322             (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
01323         insert_spline_point (xstarts, segment,
01324           (xcoords[leftindex - 1] +
01325           xcoords[leftindex]) / 2,
01326           (xcoords[rightindex - 1] +
01327           xcoords[rightindex]) / 2, segments);
01328         doneany = TRUE;
01329       }
01330       else if (textord_debug_baselines) {
01331         tprintf
01332           ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
01333           startindex, centreindex, endindex,
01334           (INT32) textord_spline_medianwin);
01335       }
01336     }
01337     //  else tprintf("Spline step at %d is %g\n",
01338     //               xstarts[segment],
01339     //               baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
01340     //               (xstarts[segment]+xstarts[segment+1])/2.0));
01341   }
01342   return doneany;
01343 }
01344 
01345 
01351 void
01352 insert_spline_point (            //get descenders
01353 int xstarts[],                   //starts to shuffle
01354 int segment,                     //insertion pt
01355 int coord1,                      //coords to add
01356 int coord2, int &segments        //total segments
01357 ) {
01358   int index;                     //for shuffling
01359 
01360   for (index = segments; index > segment; index--)
01361     xstarts[index + 1] = xstarts[index];
01362   segments++;
01363   xstarts[segment] = coord1;
01364   xstarts[segment + 1] = coord2;
01365 }
01366 
01367 
01374 void
01375 find_lesser_parts (              //get descenders
01376 TO_ROW * row,                    //row to process
01377 BOX blobcoords[],                //bounding boxes
01378 int blobcount,                   /*no of blobs */
01379 char partids[],                  /*partition of each blob */
01380 int partsizes[],                 /*size of each part */
01381 int partcount,                   /*no of partitions */
01382 int bestpart                     /*biggest partition */
01383 ) {
01384   register int blobindex;        /*index of blob */
01385   register int partition;        /*current partition */
01386   int xcentre;                   /*centre of blob */
01387   int poscount;                  /*count of best up step */
01388   int negcount;                  /*count of best down step */
01389   float partsteps[MAXPARTS];     /*average step to part */
01390   float bestpos;                 /*best up step */
01391   float bestneg;                 /*best down step */
01392   int runlength;                 /*length of bad run */
01393   int biggestrun;                /*biggest bad run */
01394 
01395   biggestrun = 0;
01396   for (partition = 0; partition < partcount; partition++)
01397     partsteps[partition] = 0.0;  /*zero accumulators */
01398   for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
01399     xcentre = (blobcoords[blobindex].left ()
01400       + blobcoords[blobindex].right ()) >> 1;
01401                                  /*in other parts */
01402     if (partids[blobindex] != bestpart) {
01403       runlength++;               /*run of non bests */
01404       if (runlength > biggestrun)
01405         biggestrun = runlength;
01406       partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
01407         - row->baseline.y (xcentre);
01408     }
01409     else
01410       runlength = 0;
01411   }
01412   if (biggestrun > MAXBADRUN)
01413     row->xheight = -1.0f;        /*failed */
01414   else
01415     row->xheight = 1.0f;         /*success */
01416   poscount = negcount = 0;
01417   bestpos = bestneg = 0.0;       /*no step yet */
01418   for (partition = 0; partition < partcount; partition++) {
01419     if (partition != bestpart) {
01420       partsteps[partition] /= partsizes[partition];
01421       if (partsteps[partition] >= MINASCRISE
01422       && partsizes[partition] > poscount) {
01423                                  /*ascender rise */
01424         bestpos = partsteps[partition];
01425                                  /*2nd most popular */
01426         poscount = partsizes[partition];
01427       }
01428       if (partsteps[partition] <= -MINASCRISE
01429       && partsizes[partition] > negcount) {
01430                                  /*ascender rise */
01431         bestneg = partsteps[partition];
01432                                  /*2nd most popular */
01433         negcount = partsizes[partition];
01434       }
01435     }
01436   }
01437                                  /*average x-height */
01438   partsteps[bestpart] /= blobcount;
01439   row->descdrop = bestneg;
01440 }
01441 
01442 
01450 void
01451 old_first_xheight (              //the wiseowl way
01452 TO_ROW * row,                    /*current row */
01453 BOX blobcoords[],                /*blob bounding boxes */
01454 int initialheight,               //initial guess
01455 int blobcount,                   /*blobs in blobcoords */
01456 QSPLINE * baseline,              /*established */
01457 float jumplimit                  /*min ascender height */
01458 ) {
01459   register int blobindex;        /*current blob */
01460                                  /*height statistics */
01461   STATS heightstat (0, MAXHEIGHT);
01462   int height;                    /*height of blob */
01463   int xcentre;                   /*centre of blob */
01464   int lineheight;                /*approx xheight */
01465   float ascenders;               /*ascender sum */
01466   int asccount;                  /*no of ascenders */
01467   float xsum;                    /*xheight sum */
01468   int xcount;                    /*xheight count */
01469   register float diff;           /*height difference */
01470 
01471   if (blobcount > 1) {
01472     for (blobindex = 0; blobindex < blobcount; blobindex++) {
01473       xcentre = (blobcoords[blobindex].left ()
01474         + blobcoords[blobindex].right ()) / 2;
01475                                  /*height of blob */
01476       height = (int) (blobcoords[blobindex].top ()
01477        - baseline->y (xcentre) + 0.5);
01478       if (height > initialheight * oldbl_xhfract
01479         && height > textord_min_xheight)
01480         heightstat.add (height, 1);
01481     }
01482     if (heightstat.get_total () > 3) {
01483       lineheight = (int) heightstat.ile (0.25);
01484       if (lineheight <= 0)
01485         lineheight = (int) heightstat.ile (0.5);
01486     }
01487     else
01488       lineheight = initialheight;
01489   }
01490   else {
01491     lineheight = (int) (blobcoords[0].top ()
01492       - baseline->y ((blobcoords[0].left ()
01493       + blobcoords[0].right ()) / 2) +
01494       0.5);
01495   }
01496 
01497   xsum = 0.0f;
01498   xcount = 0;
01499   for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
01500   blobindex++) {
01501     xcentre = (blobcoords[blobindex].left ()
01502       + blobcoords[blobindex].right ()) / 2;
01503     diff = blobcoords[blobindex].top () - baseline->y (xcentre);
01504                                  /*is it ascender */
01505     if (diff > lineheight + jumplimit) {
01506       ascenders += diff;
01507       asccount++;                /*count ascenders */
01508     }
01509     else if (diff > lineheight - jumplimit) {
01510       xsum += diff;              /*mean xheight */
01511       xcount++;
01512     }
01513   }
01514   if (xcount > 0)
01515     xsum /= xcount;              /*average xheight */
01516   else
01517     xsum = (float) lineheight;   /*guess it */
01518   row->xheight *= xsum;
01519   if (asccount > 0)
01520     row->ascrise = ascenders / asccount - xsum;
01521   else
01522     row->ascrise = 0.0f;         /*had none */
01523   if (row->xheight == 0)
01524     row->xheight = -1.0f;
01525 }
01526 
01527 
01535 void
01536 make_first_xheight (             //find xheight
01537 TO_ROW * row,                    /*current row */
01538 BOX blobcoords[],                /*blob bounding boxes */
01539 int lineheight,                  //initial guess
01540 int init_lineheight,             //block level guess
01541 int blobcount,                   /*blobs in blobcoords */
01542 QSPLINE * baseline,              /*established */
01543 float jumplimit                  /*min ascender height */
01544 ) {
01545   int *heights;
01546   STATS heightstat (0, HEIGHTBUCKETS);
01547   int modelist[MODENUM];
01548   int blobindex;
01549   int mode_count;                //blobs to count in thr
01550   int sign_bit;
01551   int mode_threshold;
01552 
01553   sign_bit = row->xheight > 0 ? 1 : -1;
01554   heights = make_height_array (blobcoords, blobcount, baseline);
01555 
01556   mode_count = 0;
01557   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01558     if (heights[blobindex] > lineheight * oldbl_xhfract
01559       && blobcoords[blobindex].height () > init_lineheight * 0.25
01560       && heights[blobindex] > textord_min_xheight)
01561       heightstat.add (heights[blobindex], 1);
01562     if (blobcoords[blobindex].height () > init_lineheight * 0.25)
01563       mode_count++;
01564   }
01565 
01566   mode_threshold = (int) (blobcount * 0.1);
01567   if (oldbl_dot_error_size > 1 || oldbl_xhfix)
01568     mode_threshold = (int) (mode_count * 0.1);
01569 
01570   if (textord_oldbl_debug) {
01571     tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
01572       blobcount, mode_count, mode_threshold);
01573   }
01574   find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
01575   if (textord_oldbl_debug) {
01576     for (blobindex = 0; blobindex < MODENUM; blobindex++)
01577       tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
01578     tprintf ("\n");
01579   }
01580   pick_x_height(row, modelist, &heightstat, mode_threshold);
01581 
01582   if (textord_oldbl_debug)
01583     tprintf ("Output xheight=%g\n", row->xheight);
01584   if (row->xheight < 0 && textord_oldbl_debug)
01585     tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
01586 
01587   free_mem(heights);
01588 
01589   if (sign_bit < 0)
01590     row->xheight = -row->xheight;
01591 }
01592 
01593 
01599 int *
01600 make_height_array (              //get array of heights
01601 BOX blobcoords[],                /*blob bounding boxes */
01602 int blobcount,                   /*blobs in blobcoords */
01603 QSPLINE * baseline               /*established */
01604 ) {
01605   int blobindex;
01606   int xcenter;
01607   int *heights;
01608 
01609   heights = (int *) alloc_mem (sizeof (int) * blobcount);
01610 
01611   for (blobindex = 0; blobindex < blobcount; blobindex++) {
01612     xcenter = (blobcoords[blobindex].left () +
01613       blobcoords[blobindex].right ()) / 2;
01614     heights[blobindex] = (int) (blobcoords[blobindex].top () -
01615       baseline->y (xcenter) + 0.5);
01616   }
01617 
01618   return (heights);
01619 }
01620 
01621 
01622 const int kMinModeFactor = 12;
01623 
01630 void
01631 find_top_modes (                 //get modes
01632 STATS * stats,                   //stats to hack
01633 int statnum,                     //no of piles
01634 int modelist[], int modenum      //no of modes to get
01635 ) {
01636   int mode_count;
01637   int last_i = 0;
01638   int last_max = MAX_INT32;
01639   int i;
01640   int mode;
01641   int total_max = 0;
01642 
01643   for (mode_count = 0; mode_count < modenum; mode_count++) {
01644     mode = 0;
01645     for (i = 0; i < statnum; i++) {
01646       if (stats->pile_count (i) > stats->pile_count (mode)) {
01647         if ((stats->pile_count (i) < last_max) ||
01648         ((stats->pile_count (i) == last_max) && (i > last_i))) {
01649           mode = i;
01650         }
01651       }
01652     }
01653     last_i = mode;
01654     last_max = stats->pile_count (last_i);
01655     total_max += last_max;
01656     if (last_max <= total_max / kMinModeFactor)
01657       mode = 0;
01658     modelist[mode_count] = mode;
01659   }
01660 }
01661 
01662 
01668 void
01669 pick_x_height (                  //find xheight
01670 TO_ROW * row,                    //row to do
01671                                  //height stats
01672 int modelist[], STATS * heightstat,
01673 int mode_threshold) {
01674   int x;
01675   int y;
01676   int z;
01677   float ratio;
01678   int found_one_bigger = FALSE;
01679   int best_x_height = 0;
01680   int best_asc = 0;
01681   int num_in_best;
01682 
01683   for (x = 0; x < MODENUM; x++) {
01684     for (y = 0; y < MODENUM; y++) {
01685       /* Check for two modes */
01686       if (modelist[x] && modelist[y] &&
01687       heightstat->pile_count (modelist[x]) > mode_threshold) {
01688         ratio = (float) modelist[y] / (float) modelist[x];
01689         if (1.2 < ratio && ratio < 1.8) {
01690           if (modelist[y] && modelist[x]) {
01691             /* Two modes found */
01692             best_x_height = modelist[x];
01693             num_in_best = heightstat->pile_count (modelist[x]);
01694 
01695             /* Try to get one higher */
01696             do {
01697               found_one_bigger = FALSE;
01698               for (z = 0; z < MODENUM; z++) {
01699                 if (modelist[z] == best_x_height + 1) {
01700                   ratio =
01701                     (float) modelist[y] / (float) modelist[z];
01702                   if ((1.2 < ratio && ratio < 1.8) &&
01703                                  /* Should be half of best */
01704                     heightstat->pile_count (modelist[z]) >
01705                   num_in_best * 0.5) {
01706                     best_x_height++;
01707                     found_one_bigger = TRUE;
01708                     break;
01709                   }
01710                 }
01711               }
01712             }
01713             while (found_one_bigger);
01714 
01715             /* try to get a higher ascender */
01716 
01717             best_asc = modelist[y];
01718             num_in_best = heightstat->pile_count (modelist[y]);
01719 
01720             /* Try to get one higher */
01721             do {
01722               found_one_bigger = FALSE;
01723               for (z = 0; z < MODENUM; z++) {
01724                 if (modelist[z] > best_asc) {
01725                   ratio =
01726                     (float) modelist[z] /
01727                     (float) best_x_height;
01728                   if ((1.2 < ratio && ratio < 1.8) &&
01729                                  /* Should be half of best */
01730                     heightstat->pile_count (modelist[z]) >
01731                   num_in_best * 0.5) {
01732                     best_asc = modelist[z];
01733                     found_one_bigger = TRUE;
01734                     break;
01735                   }
01736                 }
01737               }
01738             }
01739             while (found_one_bigger);
01740 
01741             row->xheight = (float) best_x_height;
01742             row->ascrise = (float) best_asc - best_x_height;
01743             return;
01744           }
01745         }
01746       }
01747     }
01748   }
01749 
01750   best_x_height = modelist[0];   /* Single Mode found */
01751   num_in_best = heightstat->pile_count (best_x_height);
01752   do {
01753                                  /* Try to get one higher */
01754     found_one_bigger = FALSE;
01755     for (z = 1; z < MODENUM; z++) {
01756       /* Should be half of best */
01757       if ((modelist[z] == best_x_height + 1) &&
01758       (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
01759         best_x_height++;
01760         found_one_bigger = TRUE;
01761         break;
01762       }
01763     }
01764   }
01765   while (found_one_bigger);
01766 
01767   row->ascrise = 0.0f;
01768   row->xheight = (float) best_x_height;
01769   if (row->xheight == 0)
01770     row->xheight = -1.0f;
01771 }

Generated on Wed Feb 28 19:49:11 2007 for Tesseract by  doxygen 1.5.1