00001
00020 #include "mfcpch.h"
00021 #include "statistc.h"
00022 #include "quadlsq.h"
00023 #include "lmedsq.h"
00024 #include "makerow.h"
00025 #include "drawtord.h"
00026 #include "oldbasel.h"
00027 #include "tprintf.h"
00028
00029 #define EXTERN
00030
00033 EXTERN BOOL_VAR (textord_really_old_xheight, FALSE,
00034 "Use original wiseowl xheight");
00035 EXTERN BOOL_VAR (textord_oldbl_debug, FALSE, "Debug old baseline generation");
00036 EXTERN BOOL_VAR (textord_debug_baselines, FALSE, "Debug baseline generation");
00037 EXTERN BOOL_VAR (textord_oldbl_paradef, TRUE, "Use para default mechanism");
00038 EXTERN BOOL_VAR (textord_oldbl_split_splines, TRUE, "Split stepped splines");
00039 EXTERN BOOL_VAR (textord_oldbl_merge_parts, TRUE, "Merge suspect partitions");
00040 EXTERN BOOL_VAR (oldbl_corrfix, TRUE, "Improve correlation of heights");
00041 EXTERN BOOL_VAR (oldbl_xhfix, FALSE,
00042 "Fix bug in modes threshold for xheights");
00043 EXTERN double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
00044 EXTERN INT_VAR (oldbl_holed_losscount, 10,
00045 "Max lost before fallback line used");
00046 EXTERN double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
00047 EXTERN double_VAR (textord_oldbl_jumplimit, 0.15,
00048 "X fraction for new partition");
00051
00052 #define TURNLIMIT 1
00054 #define X_HEIGHT_FRACTION 0.7
00056 #define DESCENDER_FRACTION 0.5
00058 #define MIN_ASC_FRACTION 0.20
00060 #define MIN_DESC_FRACTION 0.25
00062 #define MINASCRISE 2.0
00064 #define MAXHEIGHTVARIANCE 0.15
00066 #define MAXHEIGHT 300
00068 #define MAXOVERLAP 0.1
00070 #define MAXBADRUN 2
00072 #define HEIGHTBUCKETS 200
00074 #define DELTAHEIGHT 5.0
00075
00076 #define GOODHEIGHT 5
00077 #define MAXLOOPS 10
00078 #define MODENUM 10
00079 #define MAXPARTS 6
00080 #define SPLINESIZE 23
00081
00082 #define ABS(x) ((x)<0 ? (-(x)) : (x))
00083
00089 void make_old_baselines(
00090 TO_BLOCK *block,
00091 BOOL8 testing_on
00092 ) {
00093 QSPLINE *prev_baseline;
00094 TO_ROW *row;
00095 TO_ROW_IT row_it = block->get_rows ();
00096 BLOBNBOX_IT blob_it;
00097
00098 prev_baseline = NULL;
00099 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00100 row = row_it.data ();
00101 find_textlines (block, row, 2, NULL);
00102 if (row->xheight <= 0 && prev_baseline != NULL)
00103 find_textlines (block, row, 2, prev_baseline);
00104 if (row->xheight > 0)
00105
00106 prev_baseline = &row->baseline;
00107 else {
00108 prev_baseline = NULL;
00109 blob_it.set_to_list (row->blob_list ());
00110 if (textord_debug_baselines)
00111 tprintf ("Row baseline generation failed on row at (%d,%d)\n",
00112 blob_it.data ()->bounding_box ().left (),
00113 blob_it.data ()->bounding_box ().bottom ());
00114 }
00115 }
00116 correlate_lines(block);
00117 }
00118
00119
00127 void correlate_lines(
00128 TO_BLOCK *block
00129 ) {
00130 TO_ROW **rows;
00131 int rowcount;
00132 register int rowindex;
00133
00134 TO_ROW_IT row_it = block->get_rows ();
00135
00136 rowcount = row_it.length ();
00137 if (rowcount == 0) {
00138
00139 block->xheight = block->line_size;
00140 return;
00141 }
00142 rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
00143 rowindex = 0;
00144 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
00145
00146 rows[rowindex++] = row_it.data ();
00147
00148
00149 correlate_neighbours(block, rows, rowcount);
00150
00151 block->xheight = (float) correlate_with_stats (rows, rowcount);
00152
00153 if (block->xheight <= 0)
00154
00155 block->xheight = block->line_size * textord_merge_x;
00156 if (block->xheight < textord_min_xheight)
00157 block->xheight = (float) textord_min_xheight;
00158
00159 free_mem(rows);
00160 }
00161
00162
00168 void correlate_neighbours(
00169 TO_BLOCK *block,
00170 TO_ROW **rows,
00171 int rowcount
00172 ) {
00173 TO_ROW *row;
00174 register int rowindex;
00175 register int otherrow;
00176 int upperrow;
00177 int lowerrow;
00178 float biggest;
00179
00180 for (rowindex = 0; rowindex < rowcount; rowindex++) {
00181 row = rows[rowindex];
00182 if (row->xheight < 0) {
00183
00184 for (otherrow = rowindex - 2;
00185 otherrow >= 0
00186 && (rows[otherrow]->xheight < 0.0
00187 || !row->baseline.overlap (&rows[otherrow]->baseline,
00188 MAXOVERLAP)); otherrow--);
00189 upperrow = otherrow;
00190 for (otherrow = rowindex + 1;
00191 otherrow < rowcount
00192 && (rows[otherrow]->xheight < 0.0
00193 || !row->baseline.overlap (&rows[otherrow]->baseline,
00194 MAXOVERLAP)); otherrow++);
00195 lowerrow = otherrow;
00196 if (upperrow >= 0)
00197 find_textlines (block, row, 2, &rows[upperrow]->baseline);
00198 if (row->xheight < 0 && lowerrow < rowcount)
00199 find_textlines (block, row, 2, &rows[lowerrow]->baseline);
00200 if (row->xheight < 0) {
00201 if (upperrow >= 0)
00202 find_textlines (block, row, 1, &rows[upperrow]->baseline);
00203 else if (lowerrow < rowcount)
00204 find_textlines (block, row, 1, &rows[lowerrow]->baseline);
00205 }
00206 }
00207 }
00208
00209 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
00210 row = rows[rowindex];
00211 if (row->xheight < 0)
00212
00213 row->xheight = -row->xheight;
00214 biggest = MAX (biggest, row->xheight);
00215 }
00216 }
00217
00218
00225 int correlate_with_stats(
00226 TO_ROW **rows,
00227 int rowcount
00228 ) {
00229 TO_ROW *row;
00230 register int rowindex;
00231 float lineheight;
00232 float ascheight;
00233 float minascheight;
00234 int xcount;
00235 float fullheight;
00236 int fullcount;
00237 float descheight;
00238 float mindescheight;
00239 int desccount;
00240 float xshift;
00241
00242
00243 xcount = fullcount = desccount = 0;
00244 lineheight = ascheight = fullheight = descheight = 0.0;
00245 for (rowindex = 0; rowindex < rowcount; rowindex++) {
00246 row = rows[rowindex];
00247 if (row->ascrise > 0.0) {
00248 lineheight += row->xheight;
00249 ascheight += row->ascrise;
00250 xcount++;
00251 }
00252 else {
00253 fullheight += row->xheight;
00254 fullcount++;
00255 }
00256 if (row->descdrop < 0.0) {
00257
00258 descheight += row->descdrop;
00259 desccount++;
00260 }
00261 }
00262
00263 if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
00264 lineheight /= xcount;
00265
00266 fullheight = lineheight + ascheight / xcount;
00267
00268 if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
00269 fullheight = lineheight * (1 + MIN_ASC_FRACTION);
00270 }
00271 else {
00272 fullheight /= fullcount;
00273
00274 lineheight = fullheight * X_HEIGHT_FRACTION;
00275 }
00276 if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
00277 descheight /= desccount;
00278 else
00279
00280 descheight = -lineheight * DESCENDER_FRACTION;
00281
00282 minascheight = lineheight * MIN_ASC_FRACTION;
00283 mindescheight = -lineheight * MIN_DESC_FRACTION;
00284 for (rowindex = 0; rowindex < rowcount; rowindex++) {
00285 row = rows[rowindex];
00286 row->all_caps = FALSE;
00287 if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
00288
00289 if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00290 && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
00291 row->ascrise = fullheight - lineheight;
00292
00293 xshift = lineheight - row->xheight;
00294
00295 row->xheight = lineheight;
00296
00297 }
00298 else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
00299 && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
00300 row->ascrise = row->xheight - lineheight;
00301 xshift = -row->ascrise;
00302
00303 row->xheight = lineheight;
00304 row->all_caps = TRUE;
00305 }
00306 else {
00307 row->ascrise = (fullheight - lineheight) * row->xheight
00308 / fullheight;
00309 xshift = -row->ascrise;
00310
00311 row->xheight -= row->ascrise;
00312 row->all_caps = TRUE;
00313 }
00314 if (row->ascrise < minascheight)
00315 row->ascrise =
00316 row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
00317 }
00318 if (row->descdrop > mindescheight) {
00319 if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
00320 && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
00321
00322 row->descdrop = descheight;
00323 else
00324 row->descdrop = -row->xheight * DESCENDER_FRACTION;
00325 }
00326 }
00327 return (int) lineheight;
00328 }
00329
00330
00336 void find_textlines(
00337 TO_BLOCK *block,
00338 TO_ROW *row,
00339 int degree,
00340 QSPLINE *spline
00341 ) {
00342 int partcount;
00343 BOOL8 holed_line;
00344 int bestpart;
00345 char *partids;
00346 int partsizes[MAXPARTS];
00347 int lineheight;
00348 float jumplimit;
00349 int *xcoords;
00350 int *ycoords;
00351 BOX *blobcoords;
00352 int blobcount;
00353 float *ydiffs;
00354 int pointcount;
00355 int xstarts[SPLINESIZE + 1];
00356 int segments;
00357
00358
00359 blobcount = row->blob_list ()->length ();
00360 partids = (char *) alloc_mem (blobcount * sizeof (char));
00361 xcoords = (int *) alloc_mem (blobcount * sizeof (int));
00362 ycoords = (int *) alloc_mem (blobcount * sizeof (int));
00363 blobcoords = (BOX *) alloc_mem (blobcount * sizeof (BOX));
00364 ydiffs = (float *) alloc_mem (blobcount * sizeof (float));
00365
00366 lineheight = get_blob_coords (row, (int) block->line_size, blobcoords,
00367 holed_line, blobcount);
00368
00369 jumplimit = lineheight * textord_oldbl_jumplimit;
00370 if (jumplimit < MINASCRISE)
00371 jumplimit = MINASCRISE;
00372
00373 if (textord_oldbl_debug) {
00374 tprintf
00375 ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
00376 block->line_size, lineheight, jumplimit);
00377 }
00378 if (holed_line)
00379 make_holed_baseline (blobcoords, blobcount, spline, &row->baseline,
00380 row->line_m ());
00381 else
00382 make_first_baseline (blobcoords, blobcount,
00383 xcoords, ycoords, spline, &row->baseline, jumplimit);
00384 #ifndef GRAPHICS_DISABLED
00385 if (textord_show_final_rows)
00386 row->baseline.plot (to_win, GOLDENROD);
00387 #endif
00388 if (blobcount > 1) {
00389 bestpart = partition_line (blobcoords, blobcount,
00390 &partcount, partids, partsizes,
00391 &row->baseline, jumplimit, ydiffs);
00392 pointcount = partition_coords (blobcoords, blobcount,
00393 partids, bestpart, xcoords, ycoords);
00394 segments = segment_spline (blobcoords, blobcount,
00395 xcoords, ycoords,
00396 degree, pointcount, xstarts);
00397 if (!holed_line) {
00398 do {
00399 row->baseline = QSPLINE (xstarts, segments,
00400 xcoords, ycoords, pointcount, degree);
00401 }
00402 while (textord_oldbl_split_splines
00403 && split_stepped_spline (&row->baseline, jumplimit / 2,
00404 xcoords, xstarts, segments));
00405 }
00406 find_lesser_parts(row,
00407 blobcoords,
00408 blobcount,
00409 partids,
00410 partsizes,
00411 partcount,
00412 bestpart);
00413
00414 }
00415 else {
00416 row->xheight = -1.0f;
00417 row->descdrop = 0.0f;
00418 row->ascrise = 0.0f;
00419 }
00420 row->baseline.extrapolate (row->line_m (),
00421 block->block->bounding_box ().left (),
00422 block->block->bounding_box ().right ());
00423 if (textord_really_old_xheight)
00424 old_first_xheight (row, blobcoords, lineheight,
00425 blobcount, &row->baseline, jumplimit);
00426 else
00427 make_first_xheight (row, blobcoords, lineheight, (int) block->line_size,
00428 blobcount, &row->baseline, jumplimit);
00429 free_mem(partids);
00430 free_mem(xcoords);
00431 free_mem(ycoords);
00432 free_mem(blobcoords);
00433 free_mem(ydiffs);
00434 }
00435
00436
00447 int get_blob_coords(
00448 TO_ROW *row,
00449 INT32 lineheight,
00450 BOX *blobcoords,
00451 BOOL8 &holed_line,
00452 int &outcount
00453 ) {
00454
00455 BLOBNBOX_IT blob_it = row->blob_list ();
00456 register int blobindex;
00457 int losscount;
00458 int maxlosscount;
00459
00460 STATS heightstat (0, MAXHEIGHT);
00461
00462 if (blob_it.empty ())
00463 return 0;
00464 maxlosscount = 0;
00465 losscount = 0;
00466 blob_it.mark_cycle_pt ();
00467 blobindex = 0;
00468 do {
00469 blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
00470 if (blobcoords[blobindex].height () > lineheight * 0.25)
00471 heightstat.add (blobcoords[blobindex].height (), 1);
00472 if (blobindex == 0
00473 || blobcoords[blobindex].height () > lineheight * 0.25
00474 || blob_it.cycled_list ()) {
00475 blobindex++;
00476 losscount = 0;
00477 }
00478 else {
00479 if (blobcoords[blobindex].height ()
00480 < blobcoords[blobindex].width () * oldbl_dot_error_size
00481 && blobcoords[blobindex].width ()
00482 < blobcoords[blobindex].height () * oldbl_dot_error_size) {
00483
00484 blobindex++;
00485 losscount = 0;
00486 }
00487 else {
00488 losscount++;
00489 if (losscount > maxlosscount)
00490
00491 maxlosscount = losscount;
00492 }
00493 }
00494 }
00495 while (!blob_it.cycled_list ());
00496
00497 holed_line = maxlosscount > oldbl_holed_losscount;
00498 outcount = blobindex;
00499
00500 if (heightstat.get_total () > 1)
00501
00502 return (int) heightstat.ile (0.25);
00503 else
00504 return blobcoords[0].height ();
00505 }
00506
00507
00515 void
00516 make_first_baseline (
00517 BOX blobcoords[],
00518 int blobcount,
00519 int xcoords[],
00520 int ycoords[],
00521 QSPLINE * spline,
00522 QSPLINE * baseline,
00523 float jumplimit
00524 ) {
00525 int leftedge;
00526 int rightedge;
00527 int blobindex;
00528 int segment;
00529 float prevy, thisy, nexty;
00530 float y1, y2, y3;
00531 float maxmax, minmin;
00532 int x2 = 0;
00533 int ycount;
00534 float yturns[SPLINESIZE];
00535 int xturns[SPLINESIZE];
00536 int xstarts[SPLINESIZE + 1];
00537 int segments;
00538 ICOORD shift;
00539
00540 prevy = 0;
00541
00542 leftedge = blobcoords[0].left ();
00543
00544 rightedge = blobcoords[blobcount - 1].right ();
00545 if (spline == NULL
00546 || spline->segments < 3
00547
00548 || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
00549 || spline->xcoords[spline->segments - 1] < rightedge
00550 - MAXOVERLAP * (rightedge - leftedge)) {
00551 if (textord_oldbl_paradef)
00552 return;
00553 xstarts[0] = blobcoords[0].left () - 1;
00554 for (blobindex = 0; blobindex < blobcount; blobindex++) {
00555 xcoords[blobindex] = (blobcoords[blobindex].left ()
00556 + blobcoords[blobindex].right ()) / 2;
00557 ycoords[blobindex] = blobcoords[blobindex].bottom ();
00558 }
00559 xstarts[1] = blobcoords[blobcount - 1].right () + 1;
00560 segments = 1;
00561
00562
00563 *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00564
00565 if (blobcount >= 3) {
00566 y1 = y2 = y3 = 0.0f;
00567 ycount = 0;
00568 segment = 0;
00569 maxmax = minmin = 0.0f;
00570 thisy = ycoords[0] - baseline->y (xcoords[0]);
00571 nexty = ycoords[1] - baseline->y (xcoords[1]);
00572 for (blobindex = 2; blobindex < blobcount; blobindex++) {
00573 prevy = thisy;
00574 thisy = nexty;
00575 nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
00576
00577 if (ABS (thisy - prevy) < jumplimit
00578 && ABS (thisy - nexty) < jumplimit) {
00579 y1 = y2;
00580 y2 = y3;
00581 y3 = thisy;
00582 ycount++;
00583
00584 if (ycount >= 3 && (y1 < y2 && y2 >= y3
00585
00586 || y1 > y2 && y2 <= y3)) {
00587 if (segment < SPLINESIZE - 2) {
00588
00589 xturns[segment] = x2;
00590 yturns[segment] = y2;
00591 segment++;
00592 }
00593 }
00594 if (ycount == 1) {
00595 maxmax = minmin = y3;
00596 }
00597 else {
00598 if (y3 > maxmax)
00599 maxmax = y3;
00600 if (y3 < minmin)
00601 minmin = y3;
00602 }
00603
00604 x2 = blobcoords[blobindex - 1].right ();
00605 }
00606 }
00607
00608 jumplimit *= 1.2;
00609
00610 if (maxmax - minmin > jumplimit) {
00611 ycount = segment;
00612 for (blobindex = 0, segment = 1; blobindex < ycount;
00613 blobindex++) {
00614 if (yturns[blobindex] > minmin + jumplimit
00615 || yturns[blobindex] < maxmax - jumplimit) {
00616
00617 if (segment == 1
00618 || yturns[blobindex] > prevy + jumplimit
00619 || yturns[blobindex] < prevy - jumplimit) {
00620
00621 xstarts[segment] = xturns[blobindex];
00622 segment++;
00623 prevy = yturns[blobindex];
00624 }
00625
00626 else if (prevy > minmin + jumplimit && yturns[blobindex] > prevy
00627
00628 || prevy < maxmax - jumplimit && yturns[blobindex] < prevy) {
00629 xstarts[segment - 1] = xturns[blobindex];
00630
00631 prevy = yturns[blobindex];
00632 }
00633 }
00634 }
00635 xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
00636 segments = segment;
00637
00638 *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
00639 }
00640 }
00641 }
00642 else {
00643 *baseline = *spline;
00644 shift = ICOORD (0, (INT16) (blobcoords[0].bottom ()
00645 - spline->y (blobcoords[0].right ())));
00646 baseline->move (shift);
00647 }
00648 }
00649
00650
00658 void
00659 make_holed_baseline (
00660 BOX blobcoords[],
00661 int blobcount,
00662 QSPLINE * spline,
00663 QSPLINE * baseline,
00664 float gradient
00665 ) {
00666 int leftedge;
00667 int rightedge;
00668 int blobindex;
00669 float x;
00670 ICOORD shift;
00671
00672 LMS lms(blobcount);
00673 INT32 xstarts[2];
00674 double coeffs[3];
00675 float c;
00676
00677
00678 leftedge = blobcoords[0].left ();
00679
00680 rightedge = blobcoords[blobcount - 1].right ();
00681 for (blobindex = 0; blobindex < blobcount; blobindex++) {
00682 lms.add (FCOORD ((blobcoords[blobindex].left () +
00683 blobcoords[blobindex].right ()) / 2.0,
00684 blobcoords[blobindex].bottom ()));
00685 }
00686 lms.constrained_fit (gradient, c);
00687 xstarts[0] = leftedge;
00688 xstarts[1] = rightedge;
00689 coeffs[0] = 0;
00690 coeffs[1] = gradient;
00691 coeffs[2] = c;
00692 *baseline = QSPLINE (1, xstarts, coeffs);
00693 if (spline != NULL
00694 && spline->segments >= 3
00695
00696 && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
00697 && spline->xcoords[spline->segments - 1] >= rightedge
00698 - MAXOVERLAP * (rightedge - leftedge)) {
00699 *baseline = *spline;
00700 x = (leftedge + rightedge) / 2.0;
00701 shift = ICOORD (0, (INT16) (gradient * x + c - spline->y (x)));
00702 baseline->move (shift);
00703 }
00704 }
00705
00706
00716 int
00717 partition_line (
00718 BOX blobcoords[],
00719 int blobcount,
00720 int *numparts,
00721 char partids[],
00722 int partsizes[],
00723 QSPLINE * spline,
00724 float jumplimit,
00725 float ydiffs[]
00726 ) {
00727 register int blobindex;
00728 int bestpart;
00729 int biggestpart;
00730 float diff;
00731 int startx;
00732 float partdiffs[MAXPARTS];
00733
00734 for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
00735 partsizes[bestpart] = 0;
00736
00737 startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
00738 *numparts = 1;
00739 bestpart = -1;
00740 for (blobindex = startx; blobindex < blobcount; blobindex++) {
00741
00742 diff = ydiffs[blobindex];
00743 if (textord_oldbl_debug) {
00744 tprintf ("%d(%d,%d), ", blobindex,
00745 blobcoords[blobindex].left (),
00746 blobcoords[blobindex].bottom ());
00747 }
00748 bestpart =
00749 choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00750
00751 partids[blobindex] = bestpart;
00752 partsizes[bestpart]++;
00753 }
00754
00755 bestpart = -1;
00756 partsizes[0]--;
00757
00758 for (blobindex = startx; blobindex >= 0; blobindex--) {
00759 diff = ydiffs[blobindex];
00760 if (textord_oldbl_debug) {
00761 tprintf ("%d(%d,%d), ", blobindex,
00762 blobcoords[blobindex].left (),
00763 blobcoords[blobindex].bottom ());
00764 }
00765 bestpart =
00766 choose_partition(diff, partdiffs, bestpart, jumplimit, numparts);
00767
00768 partids[blobindex] = bestpart;
00769 partsizes[bestpart]++;
00770 }
00771
00772 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
00773 if (partsizes[bestpart] >= partsizes[biggestpart])
00774 biggestpart = bestpart;
00775 if (textord_oldbl_merge_parts)
00776 merge_oldbl_parts(blobcoords,
00777 blobcount,
00778 partids,
00779 partsizes,
00780 biggestpart,
00781 jumplimit);
00782 return biggestpart;
00783 }
00784
00785
00792 void
00793 merge_oldbl_parts (
00794 BOX blobcoords[],
00795 int blobcount,
00796 char partids[],
00797 int partsizes[],
00798 int biggestpart,
00799 float jumplimit
00800 ) {
00801 BOOL8 found_one;
00802 BOOL8 close_one;
00803 register int blobindex;
00804 int prevpart;
00805 int runlength;
00806 float diff;
00807 int startx;
00808 int test_blob;
00809 FCOORD coord;
00810 float m, c;
00811 QLSQ stats;
00812
00813 prevpart = biggestpart;
00814 runlength = 0;
00815 startx = 0;
00816 for (blobindex = 0; blobindex < blobcount; blobindex++) {
00817 if (partids[blobindex] != prevpart) {
00818
00819
00820
00821 if (prevpart != biggestpart && runlength > MAXBADRUN) {
00822 stats.clear ();
00823 for (test_blob = startx; test_blob < blobindex; test_blob++) {
00824 coord = FCOORD ((blobcoords[test_blob].left ()
00825 + blobcoords[test_blob].right ()) / 2.0,
00826 blobcoords[test_blob].bottom ());
00827 stats.add (coord.x (), coord.y ());
00828 }
00829 stats.fit (1);
00830 m = stats.get_b ();
00831 c = stats.get_c ();
00832 if (textord_oldbl_debug)
00833 tprintf ("Fitted line y=%g x + %g\n", m, c);
00834 found_one = FALSE;
00835 close_one = FALSE;
00836 for (test_blob = 1; !found_one
00837 && (startx - test_blob >= 0
00838 || blobindex + test_blob <= blobcount); test_blob++) {
00839 if (startx - test_blob >= 0
00840 && partids[startx - test_blob] == biggestpart) {
00841 found_one = TRUE;
00842 coord = FCOORD ((blobcoords[startx - test_blob].left ()
00843 + blobcoords[startx -
00844 test_blob].right ()) /
00845 2.0,
00846 blobcoords[startx -
00847 test_blob].bottom ());
00848 diff = m * coord.x () + c - coord.y ();
00849 if (textord_oldbl_debug)
00850 tprintf
00851 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00852 diff, coord.x (), coord.y ());
00853 if (diff < jumplimit && -diff < jumplimit)
00854 close_one = TRUE;
00855 }
00856 if (blobindex + test_blob <= blobcount
00857 && partids[blobindex + test_blob - 1] == biggestpart) {
00858 found_one = TRUE;
00859 coord =
00860 FCOORD ((blobcoords[blobindex + test_blob - 1].
00861 left () + blobcoords[blobindex + test_blob -
00862 1].right ()) / 2.0,
00863 blobcoords[blobindex + test_blob -
00864 1].bottom ());
00865 diff = m * coord.x () + c - coord.y ();
00866 if (textord_oldbl_debug)
00867 tprintf
00868 ("Diff of common blob to suspect part=%g at (%g,%g)\n",
00869 diff, coord.x (), coord.y ());
00870 if (diff < jumplimit && -diff < jumplimit)
00871 close_one = TRUE;
00872 }
00873 }
00874 if (close_one) {
00875 if (textord_oldbl_debug)
00876 tprintf
00877 ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
00878 runlength, biggestpart, prevpart,
00879 blobcoords[startx].left (),
00880 blobcoords[startx].bottom ());
00881
00882 partsizes[prevpart] -= runlength;
00883 for (test_blob = startx; test_blob < blobindex; test_blob++)
00884 partids[test_blob] = biggestpart;
00885 }
00886 }
00887 prevpart = partids[blobindex];
00888 runlength = 1;
00889 startx = blobindex;
00890 }
00891 else
00892 runlength++;
00893 }
00894 }
00895
00896
00905 int
00906 get_ydiffs (
00907 BOX blobcoords[],
00908 int blobcount,
00909 QSPLINE * spline,
00910 float ydiffs[]
00911 ) {
00912 register int blobindex;
00913 int xcentre;
00914 int lastx;
00915 float diffsum;
00916 float diff;
00917 float drift;
00918 float bestsum;
00919 int bestindex;
00920
00921 diffsum = 0.0f;
00922 bestindex = 0;
00923 bestsum = (float) MAX_INT32;
00924 drift = 0.0f;
00925 lastx = blobcoords[0].left ();
00926
00927 for (blobindex = 0; blobindex < blobcount; blobindex++) {
00928
00929 xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
00930
00931 drift += spline->step (lastx, xcentre);
00932 lastx = xcentre;
00933 diff = blobcoords[blobindex].bottom ();
00934 diff -= spline->y (xcentre);
00935 diff += drift;
00936 ydiffs[blobindex] = diff;
00937 if (blobindex > 2)
00938
00939 diffsum -= ABS (ydiffs[blobindex - 3]);
00940 diffsum += ABS (diff);
00941 if (blobindex >= 2 && diffsum < bestsum) {
00942 bestsum = diffsum;
00943 bestindex = blobindex - 1;
00944 }
00945 }
00946 return bestindex;
00947 }
00948
00949
00955 int
00956 choose_partition (
00957 register float diff,
00958 float partdiffs[],
00959 int lastpart,
00960 float jumplimit,
00961 int *partcount
00962 ) {
00963 register int partition;
00964 int bestpart;
00965 float bestdelta;
00966 static float drift;
00967 float delta;
00968 static float lastdelta;
00969
00970 if (lastpart < 0) {
00971 partdiffs[0] = diff;
00972 lastpart = 0;
00973 drift = 0.0f;
00974 lastdelta = 0.0f;
00975 }
00976
00977 delta = diff - partdiffs[lastpart] - drift;
00978 if (textord_oldbl_debug) {
00979 tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift);
00980 }
00981 if (ABS (delta) > jumplimit / 2) {
00982
00983 bestdelta = diff - partdiffs[0] - drift;
00984 bestpart = 0;
00985 for (partition = 1; partition < *partcount; partition++) {
00986 delta = diff - partdiffs[partition] - drift;
00987 if (ABS (delta) < ABS (bestdelta)) {
00988 bestdelta = delta;
00989 bestpart = partition;
00990 }
00991 }
00992 delta = bestdelta;
00993
00994 if (ABS (bestdelta) > jumplimit
00995 && *partcount < MAXPARTS) {
00996 bestpart = (*partcount)++;
00997
00998 partdiffs[bestpart] = diff - drift;
00999 delta = 0.0f;
01000 }
01001 }
01002 else {
01003 bestpart = lastpart;
01004 }
01005
01006 if (bestpart == lastpart
01007 && (ABS (delta - lastdelta) < jumplimit / 2
01008 || ABS (delta) < jumplimit / 2))
01009
01010 drift = (3 * drift + delta) / 3;
01011 lastdelta = delta;
01012
01013 if (textord_oldbl_debug) {
01014 tprintf ("P=%d\n", bestpart);
01015 }
01016
01017 return bestpart;
01018 }
01019
01020
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01083 int
01084 partition_coords (
01085 BOX blobcoords[],
01086 int blobcount,
01087 char partids[],
01088 int bestpart,
01089 int xcoords[],
01090 int ycoords[]
01091 ) {
01092 register int blobindex;
01093 int pointcount;
01094
01095 pointcount = 0;
01096 for (blobindex = 0; blobindex < blobcount; blobindex++) {
01097 if (partids[blobindex] == bestpart) {
01098
01099 xcoords[pointcount] = (blobcoords[blobindex].left ()
01100 + blobcoords[blobindex].right ()) >> 1;
01101 ycoords[pointcount++] = blobcoords[blobindex].bottom ();
01102 }
01103 }
01104 return pointcount;
01105 }
01106
01107
01114 int
01115 segment_spline (
01116 BOX blobcoords[],
01117 int blobcount,
01118 int xcoords[],
01119 int ycoords[],
01120 int degree, int pointcount,
01121 int xstarts[]
01122 ) {
01123 register int ptindex;
01124 register int segment;
01125 int lastmin, lastmax;
01126 int turnpoints[SPLINESIZE];
01127 int turncount;
01128 int max_x;
01129
01130 xstarts[0] = xcoords[0] - 1;
01131 max_x = xcoords[pointcount - 1] + 1;
01132 if (degree < 2)
01133 pointcount = 0;
01134 turncount = 0;
01135 if (pointcount > 3) {
01136 ptindex = 1;
01137 lastmax = lastmin = 0;
01138 while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
01139
01140 if (ycoords[ptindex - 1] > ycoords[ptindex]
01141 && ycoords[ptindex] <= ycoords[ptindex + 1]) {
01142 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
01143 if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
01144
01145 turnpoints[turncount++] = lastmax;
01146 lastmin = ptindex;
01147 }
01148 else if (ycoords[ptindex] < ycoords[lastmin]) {
01149 lastmin = ptindex;
01150 }
01151 }
01152
01153
01154 if (ycoords[ptindex - 1] < ycoords[ptindex]
01155 && ycoords[ptindex] >= ycoords[ptindex + 1]) {
01156 if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
01157 if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
01158
01159 turnpoints[turncount++] = lastmin;
01160 lastmax = ptindex;
01161 }
01162 else if (ycoords[ptindex] > ycoords[lastmax]) {
01163 lastmax = ptindex;
01164 }
01165 }
01166 ptindex++;
01167 }
01168
01169 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
01170 && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
01171 if (turncount < SPLINESIZE - 1)
01172
01173 turnpoints[turncount++] = lastmax;
01174 if (turncount < SPLINESIZE - 1)
01175 turnpoints[turncount++] = ptindex;
01176 }
01177 else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
01178
01179 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
01180 if (turncount < SPLINESIZE - 1)
01181
01182 turnpoints[turncount++] = lastmin;
01183 if (turncount < SPLINESIZE - 1)
01184 turnpoints[turncount++] = ptindex;
01185 }
01186 else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
01187 && turncount < SPLINESIZE - 1) {
01188 if (ycoords[ptindex] > ycoords[lastmax])
01189 turnpoints[turncount++] = ptindex;
01190 else
01191 turnpoints[turncount++] = lastmax;
01192 }
01193 else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
01194 && turncount < SPLINESIZE - 1) {
01195 if (ycoords[ptindex] < ycoords[lastmin])
01196 turnpoints[turncount++] = ptindex;
01197 else
01198 turnpoints[turncount++] = lastmin;
01199 }
01200 }
01201
01202 if (textord_oldbl_debug && turncount > 0)
01203 tprintf ("First turn is %d at (%d,%d)\n",
01204 turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
01205 for (segment = 1; segment < turncount; segment++) {
01206
01207 lastmax = (ycoords[turnpoints[segment - 1]]
01208 + ycoords[turnpoints[segment]]) / 2;
01209
01210
01211 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
01212
01213 for (ptindex = turnpoints[segment - 1] + 1;
01214 ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax;
01215 ptindex++);
01216 else
01217
01218 for (ptindex = turnpoints[segment - 1] + 1;
01219 ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax;
01220 ptindex++);
01221
01222 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
01223 + xcoords[turnpoints[segment - 1]]
01224 + xcoords[turnpoints[segment]] + 2) / 4;
01225
01226 if (textord_oldbl_debug)
01227 tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
01228 segment, turnpoints[segment],
01229 xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
01230 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
01231 }
01232
01233 xstarts[segment] = max_x;
01234 return segment;
01235 }
01236
01237
01245 BOOL8
01246 split_stepped_spline (
01247 QSPLINE * baseline,
01248 float jumplimit,
01249 int xcoords[],
01250 int xstarts[],
01251 int &segments
01252 ) {
01253 BOOL8 doneany;
01254 register int segment;
01255 int startindex, centreindex, endindex;
01256 float leftcoord, rightcoord;
01257 int leftindex, rightindex;
01258 float step;
01259
01260 doneany = FALSE;
01261 startindex = 0;
01262 for (segment = 1; segment < segments - 1; segment++) {
01263 step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
01264 (xstarts[segment] + xstarts[segment + 1]) / 2.0);
01265 if (step < 0)
01266 step = -step;
01267 if (step > jumplimit) {
01268 while (xcoords[startindex] < xstarts[segment - 1])
01269 startindex++;
01270 centreindex = startindex;
01271 while (xcoords[centreindex] < xstarts[segment])
01272 centreindex++;
01273 endindex = centreindex;
01274 while (xcoords[endindex] < xstarts[segment + 1])
01275 endindex++;
01276 if (segments >= SPLINESIZE) {
01277 if (textord_debug_baselines)
01278 tprintf ("Too many segments to resegment spline!!\n");
01279 }
01280 else if (endindex - startindex >= textord_spline_medianwin * 3) {
01281 while (centreindex - startindex <
01282 textord_spline_medianwin * 3 / 2)
01283 centreindex++;
01284 while (endindex - centreindex <
01285 textord_spline_medianwin * 3 / 2)
01286 centreindex--;
01287 leftindex = (startindex + startindex + centreindex) / 3;
01288 rightindex = (centreindex + endindex + endindex) / 3;
01289 leftcoord =
01290 (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
01291 rightcoord =
01292 (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
01293 while (xcoords[leftindex] > leftcoord
01294 && leftindex - startindex > textord_spline_medianwin)
01295 leftindex--;
01296 while (xcoords[leftindex] < leftcoord
01297 && centreindex - leftindex >
01298 textord_spline_medianwin / 2)
01299 leftindex++;
01300 if (xcoords[leftindex] - leftcoord >
01301 leftcoord - xcoords[leftindex - 1])
01302 leftindex--;
01303 while (xcoords[rightindex] > rightcoord
01304 && rightindex - centreindex >
01305 textord_spline_medianwin / 2)
01306 rightindex--;
01307 while (xcoords[rightindex] < rightcoord
01308 && endindex - rightindex > textord_spline_medianwin)
01309 rightindex++;
01310 if (xcoords[rightindex] - rightcoord >
01311 rightcoord - xcoords[rightindex - 1])
01312 rightindex--;
01313 if (textord_debug_baselines)
01314 tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
01315 xstarts[segment],
01316 baseline->
01317 step ((xstarts[segment - 1] +
01318 xstarts[segment]) / 2.0,
01319 (xstarts[segment] +
01320 xstarts[segment + 1]) / 2.0),
01321 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
01322 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
01323 insert_spline_point (xstarts, segment,
01324 (xcoords[leftindex - 1] +
01325 xcoords[leftindex]) / 2,
01326 (xcoords[rightindex - 1] +
01327 xcoords[rightindex]) / 2, segments);
01328 doneany = TRUE;
01329 }
01330 else if (textord_debug_baselines) {
01331 tprintf
01332 ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
01333 startindex, centreindex, endindex,
01334 (INT32) textord_spline_medianwin);
01335 }
01336 }
01337
01338
01339
01340
01341 }
01342 return doneany;
01343 }
01344
01345
01351 void
01352 insert_spline_point (
01353 int xstarts[],
01354 int segment,
01355 int coord1,
01356 int coord2, int &segments
01357 ) {
01358 int index;
01359
01360 for (index = segments; index > segment; index--)
01361 xstarts[index + 1] = xstarts[index];
01362 segments++;
01363 xstarts[segment] = coord1;
01364 xstarts[segment + 1] = coord2;
01365 }
01366
01367
01374 void
01375 find_lesser_parts (
01376 TO_ROW * row,
01377 BOX blobcoords[],
01378 int blobcount,
01379 char partids[],
01380 int partsizes[],
01381 int partcount,
01382 int bestpart
01383 ) {
01384 register int blobindex;
01385 register int partition;
01386 int xcentre;
01387 int poscount;
01388 int negcount;
01389 float partsteps[MAXPARTS];
01390 float bestpos;
01391 float bestneg;
01392 int runlength;
01393 int biggestrun;
01394
01395 biggestrun = 0;
01396 for (partition = 0; partition < partcount; partition++)
01397 partsteps[partition] = 0.0;
01398 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
01399 xcentre = (blobcoords[blobindex].left ()
01400 + blobcoords[blobindex].right ()) >> 1;
01401
01402 if (partids[blobindex] != bestpart) {
01403 runlength++;
01404 if (runlength > biggestrun)
01405 biggestrun = runlength;
01406 partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()
01407 - row->baseline.y (xcentre);
01408 }
01409 else
01410 runlength = 0;
01411 }
01412 if (biggestrun > MAXBADRUN)
01413 row->xheight = -1.0f;
01414 else
01415 row->xheight = 1.0f;
01416 poscount = negcount = 0;
01417 bestpos = bestneg = 0.0;
01418 for (partition = 0; partition < partcount; partition++) {
01419 if (partition != bestpart) {
01420 partsteps[partition] /= partsizes[partition];
01421 if (partsteps[partition] >= MINASCRISE
01422 && partsizes[partition] > poscount) {
01423
01424 bestpos = partsteps[partition];
01425
01426 poscount = partsizes[partition];
01427 }
01428 if (partsteps[partition] <= -MINASCRISE
01429 && partsizes[partition] > negcount) {
01430
01431 bestneg = partsteps[partition];
01432
01433 negcount = partsizes[partition];
01434 }
01435 }
01436 }
01437
01438 partsteps[bestpart] /= blobcount;
01439 row->descdrop = bestneg;
01440 }
01441
01442
01450 void
01451 old_first_xheight (
01452 TO_ROW * row,
01453 BOX blobcoords[],
01454 int initialheight,
01455 int blobcount,
01456 QSPLINE * baseline,
01457 float jumplimit
01458 ) {
01459 register int blobindex;
01460
01461 STATS heightstat (0, MAXHEIGHT);
01462 int height;
01463 int xcentre;
01464 int lineheight;
01465 float ascenders;
01466 int asccount;
01467 float xsum;
01468 int xcount;
01469 register float diff;
01470
01471 if (blobcount > 1) {
01472 for (blobindex = 0; blobindex < blobcount; blobindex++) {
01473 xcentre = (blobcoords[blobindex].left ()
01474 + blobcoords[blobindex].right ()) / 2;
01475
01476 height = (int) (blobcoords[blobindex].top ()
01477 - baseline->y (xcentre) + 0.5);
01478 if (height > initialheight * oldbl_xhfract
01479 && height > textord_min_xheight)
01480 heightstat.add (height, 1);
01481 }
01482 if (heightstat.get_total () > 3) {
01483 lineheight = (int) heightstat.ile (0.25);
01484 if (lineheight <= 0)
01485 lineheight = (int) heightstat.ile (0.5);
01486 }
01487 else
01488 lineheight = initialheight;
01489 }
01490 else {
01491 lineheight = (int) (blobcoords[0].top ()
01492 - baseline->y ((blobcoords[0].left ()
01493 + blobcoords[0].right ()) / 2) +
01494 0.5);
01495 }
01496
01497 xsum = 0.0f;
01498 xcount = 0;
01499 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
01500 blobindex++) {
01501 xcentre = (blobcoords[blobindex].left ()
01502 + blobcoords[blobindex].right ()) / 2;
01503 diff = blobcoords[blobindex].top () - baseline->y (xcentre);
01504
01505 if (diff > lineheight + jumplimit) {
01506 ascenders += diff;
01507 asccount++;
01508 }
01509 else if (diff > lineheight - jumplimit) {
01510 xsum += diff;
01511 xcount++;
01512 }
01513 }
01514 if (xcount > 0)
01515 xsum /= xcount;
01516 else
01517 xsum = (float) lineheight;
01518 row->xheight *= xsum;
01519 if (asccount > 0)
01520 row->ascrise = ascenders / asccount - xsum;
01521 else
01522 row->ascrise = 0.0f;
01523 if (row->xheight == 0)
01524 row->xheight = -1.0f;
01525 }
01526
01527
01535 void
01536 make_first_xheight (
01537 TO_ROW * row,
01538 BOX blobcoords[],
01539 int lineheight,
01540 int init_lineheight,
01541 int blobcount,
01542 QSPLINE * baseline,
01543 float jumplimit
01544 ) {
01545 int *heights;
01546 STATS heightstat (0, HEIGHTBUCKETS);
01547 int modelist[MODENUM];
01548 int blobindex;
01549 int mode_count;
01550 int sign_bit;
01551 int mode_threshold;
01552
01553 sign_bit = row->xheight > 0 ? 1 : -1;
01554 heights = make_height_array (blobcoords, blobcount, baseline);
01555
01556 mode_count = 0;
01557 for (blobindex = 0; blobindex < blobcount; blobindex++) {
01558 if (heights[blobindex] > lineheight * oldbl_xhfract
01559 && blobcoords[blobindex].height () > init_lineheight * 0.25
01560 && heights[blobindex] > textord_min_xheight)
01561 heightstat.add (heights[blobindex], 1);
01562 if (blobcoords[blobindex].height () > init_lineheight * 0.25)
01563 mode_count++;
01564 }
01565
01566 mode_threshold = (int) (blobcount * 0.1);
01567 if (oldbl_dot_error_size > 1 || oldbl_xhfix)
01568 mode_threshold = (int) (mode_count * 0.1);
01569
01570 if (textord_oldbl_debug) {
01571 tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
01572 blobcount, mode_count, mode_threshold);
01573 }
01574 find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
01575 if (textord_oldbl_debug) {
01576 for (blobindex = 0; blobindex < MODENUM; blobindex++)
01577 tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
01578 tprintf ("\n");
01579 }
01580 pick_x_height(row, modelist, &heightstat, mode_threshold);
01581
01582 if (textord_oldbl_debug)
01583 tprintf ("Output xheight=%g\n", row->xheight);
01584 if (row->xheight < 0 && textord_oldbl_debug)
01585 tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
01586
01587 free_mem(heights);
01588
01589 if (sign_bit < 0)
01590 row->xheight = -row->xheight;
01591 }
01592
01593
01599 int *
01600 make_height_array (
01601 BOX blobcoords[],
01602 int blobcount,
01603 QSPLINE * baseline
01604 ) {
01605 int blobindex;
01606 int xcenter;
01607 int *heights;
01608
01609 heights = (int *) alloc_mem (sizeof (int) * blobcount);
01610
01611 for (blobindex = 0; blobindex < blobcount; blobindex++) {
01612 xcenter = (blobcoords[blobindex].left () +
01613 blobcoords[blobindex].right ()) / 2;
01614 heights[blobindex] = (int) (blobcoords[blobindex].top () -
01615 baseline->y (xcenter) + 0.5);
01616 }
01617
01618 return (heights);
01619 }
01620
01621
01622 const int kMinModeFactor = 12;
01623
01630 void
01631 find_top_modes (
01632 STATS * stats,
01633 int statnum,
01634 int modelist[], int modenum
01635 ) {
01636 int mode_count;
01637 int last_i = 0;
01638 int last_max = MAX_INT32;
01639 int i;
01640 int mode;
01641 int total_max = 0;
01642
01643 for (mode_count = 0; mode_count < modenum; mode_count++) {
01644 mode = 0;
01645 for (i = 0; i < statnum; i++) {
01646 if (stats->pile_count (i) > stats->pile_count (mode)) {
01647 if ((stats->pile_count (i) < last_max) ||
01648 ((stats->pile_count (i) == last_max) && (i > last_i))) {
01649 mode = i;
01650 }
01651 }
01652 }
01653 last_i = mode;
01654 last_max = stats->pile_count (last_i);
01655 total_max += last_max;
01656 if (last_max <= total_max / kMinModeFactor)
01657 mode = 0;
01658 modelist[mode_count] = mode;
01659 }
01660 }
01661
01662
01668 void
01669 pick_x_height (
01670 TO_ROW * row,
01671
01672 int modelist[], STATS * heightstat,
01673 int mode_threshold) {
01674 int x;
01675 int y;
01676 int z;
01677 float ratio;
01678 int found_one_bigger = FALSE;
01679 int best_x_height = 0;
01680 int best_asc = 0;
01681 int num_in_best;
01682
01683 for (x = 0; x < MODENUM; x++) {
01684 for (y = 0; y < MODENUM; y++) {
01685
01686 if (modelist[x] && modelist[y] &&
01687 heightstat->pile_count (modelist[x]) > mode_threshold) {
01688 ratio = (float) modelist[y] / (float) modelist[x];
01689 if (1.2 < ratio && ratio < 1.8) {
01690 if (modelist[y] && modelist[x]) {
01691
01692 best_x_height = modelist[x];
01693 num_in_best = heightstat->pile_count (modelist[x]);
01694
01695
01696 do {
01697 found_one_bigger = FALSE;
01698 for (z = 0; z < MODENUM; z++) {
01699 if (modelist[z] == best_x_height + 1) {
01700 ratio =
01701 (float) modelist[y] / (float) modelist[z];
01702 if ((1.2 < ratio && ratio < 1.8) &&
01703
01704 heightstat->pile_count (modelist[z]) >
01705 num_in_best * 0.5) {
01706 best_x_height++;
01707 found_one_bigger = TRUE;
01708 break;
01709 }
01710 }
01711 }
01712 }
01713 while (found_one_bigger);
01714
01715
01716
01717 best_asc = modelist[y];
01718 num_in_best = heightstat->pile_count (modelist[y]);
01719
01720
01721 do {
01722 found_one_bigger = FALSE;
01723 for (z = 0; z < MODENUM; z++) {
01724 if (modelist[z] > best_asc) {
01725 ratio =
01726 (float) modelist[z] /
01727 (float) best_x_height;
01728 if ((1.2 < ratio && ratio < 1.8) &&
01729
01730 heightstat->pile_count (modelist[z]) >
01731 num_in_best * 0.5) {
01732 best_asc = modelist[z];
01733 found_one_bigger = TRUE;
01734 break;
01735 }
01736 }
01737 }
01738 }
01739 while (found_one_bigger);
01740
01741 row->xheight = (float) best_x_height;
01742 row->ascrise = (float) best_asc - best_x_height;
01743 return;
01744 }
01745 }
01746 }
01747 }
01748 }
01749
01750 best_x_height = modelist[0];
01751 num_in_best = heightstat->pile_count (best_x_height);
01752 do {
01753
01754 found_one_bigger = FALSE;
01755 for (z = 1; z < MODENUM; z++) {
01756
01757 if ((modelist[z] == best_x_height + 1) &&
01758 (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
01759 best_x_height++;
01760 found_one_bigger = TRUE;
01761 break;
01762 }
01763 }
01764 }
01765 while (found_one_bigger);
01766
01767 row->ascrise = 0.0f;
01768 row->xheight = (float) best_x_height;
01769 if (row->xheight == 0)
01770 row->xheight = -1.0f;
01771 }