#include "mfcpch.h"
#include "statistc.h"
#include "quadlsq.h"
#include "lmedsq.h"
#include "makerow.h"
#include "drawtord.h"
#include "oldbasel.h"
#include "tprintf.h"
Go to the source code of this file.
#define ABS | ( | x | ) | ((x)<0 ? (-(x)) : (x)) |
Definition at line 82 of file oldbasel.cpp.
Referenced by choose_partition(), get_ydiffs(), and make_first_baseline().
#define DELTAHEIGHT 5.0 |
#define DESCENDER_FRACTION 0.5 |
#define EXTERN |
* (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
Definition at line 29 of file oldbasel.cpp.
#define GOODHEIGHT 5 |
Definition at line 76 of file oldbasel.cpp.
#define HEIGHTBUCKETS 200 |
#define MAXBADRUN 2 |
Max non best for failed.
Definition at line 70 of file oldbasel.cpp.
Referenced by find_lesser_parts(), and merge_oldbl_parts().
#define MAXHEIGHT 300 |
Max blob height.
Definition at line 66 of file oldbasel.cpp.
Referenced by get_blob_coords(), and old_first_xheight().
#define MAXHEIGHTVARIANCE 0.15 |
#define MAXLOOPS 10 |
Definition at line 77 of file oldbasel.cpp.
#define MAXOVERLAP 0.1 |
max 10% missed overlap
Definition at line 68 of file oldbasel.cpp.
Referenced by correlate_neighbours(), make_first_baseline(), and make_holed_baseline().
#define MAXPARTS 6 |
Definition at line 79 of file oldbasel.cpp.
Referenced by choose_partition(), find_lesser_parts(), find_textlines(), and partition_line().
#define MIN_ASC_FRACTION 0.20 |
#define MIN_DESC_FRACTION 0.25 |
#define MINASCRISE 2.0 |
Min ascender/desc step.
Definition at line 62 of file oldbasel.cpp.
Referenced by find_lesser_parts(), and find_textlines().
#define MODENUM 10 |
Definition at line 78 of file oldbasel.cpp.
Referenced by make_first_xheight(), and pick_x_height().
#define SPLINESIZE 23 |
Definition at line 80 of file oldbasel.cpp.
#define TURNLIMIT 1 |
Min size for turning point.
Definition at line 52 of file oldbasel.cpp.
Referenced by segment_spline().
#define X_HEIGHT_FRACTION 0.7 |
int choose_partition | ( | register float | diff, | |
float | partdiffs[], | |||
int | lastpart, | |||
float | jumplimit, | |||
int * | partcount | |||
) |
Select partition.
Choose a partition for the point and return the index.
Definition at line 956 of file oldbasel.cpp.
References ABS, MAXPARTS, and tprintf().
Referenced by partition_line().
00962 { 00963 register int partition; /*partition no */ 00964 int bestpart; /*best new partition */ 00965 float bestdelta; /*best gap from a part */ 00966 static float drift; /*drift from spline */ 00967 float delta; /*diff from part */ 00968 static float lastdelta; /*previous delta */ 00969 00970 if (lastpart < 0) { 00971 partdiffs[0] = diff; 00972 lastpart = 0; /*first point */ 00973 drift = 0.0f; 00974 lastdelta = 0.0f; 00975 } 00976 /*adjusted diff from part */ 00977 delta = diff - partdiffs[lastpart] - drift; 00978 if (textord_oldbl_debug) { 00979 tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, drift); 00980 } 00981 if (ABS (delta) > jumplimit / 2) { 00982 /*delta on part 0 */ 00983 bestdelta = diff - partdiffs[0] - drift; 00984 bestpart = 0; /*0 best so far */ 00985 for (partition = 1; partition < *partcount; partition++) { 00986 delta = diff - partdiffs[partition] - drift; 00987 if (ABS (delta) < ABS (bestdelta)) { 00988 bestdelta = delta; 00989 bestpart = partition; /*part with nearest jump */ 00990 } 00991 } 00992 delta = bestdelta; 00993 /*too far away */ 00994 if (ABS (bestdelta) > jumplimit 00995 && *partcount < MAXPARTS) { /*and spare part left */ 00996 bestpart = (*partcount)++; /*best was new one */ 00997 /*start new one */ 00998 partdiffs[bestpart] = diff - drift; 00999 delta = 0.0f; 01000 } 01001 } 01002 else { 01003 bestpart = lastpart; /*best was last one */ 01004 } 01005 01006 if (bestpart == lastpart 01007 && (ABS (delta - lastdelta) < jumplimit / 2 01008 || ABS (delta) < jumplimit / 2)) 01009 /*smooth the drift */ 01010 drift = (3 * drift + delta) / 3; 01011 lastdelta = delta; 01012 01013 if (textord_oldbl_debug) { 01014 tprintf ("P=%d\n", bestpart); 01015 } 01016 01017 return bestpart; 01018 }
void correlate_lines | ( | TO_BLOCK * | block | ) |
Cleanup lines.
Correlate the x-heights and ascender heights of a block to fill-in the ascender height and descender height for rows without one. Also fix baselines of rows without a decent fit.
Definition at line 127 of file oldbasel.cpp.
References alloc_mem(), correlate_neighbours(), correlate_with_stats(), and free_mem().
Referenced by make_old_baselines().
00129 { 00130 TO_ROW **rows; //array of ptrs 00131 int rowcount; /*no of rows to do */ 00132 register int rowindex; /*no of row */ 00133 //iterator 00134 TO_ROW_IT row_it = block->get_rows (); 00135 00136 rowcount = row_it.length (); 00137 if (rowcount == 0) { 00138 //default value 00139 block->xheight = block->line_size; 00140 return; /*none to do */ 00141 } 00142 rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); 00143 rowindex = 0; 00144 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) 00145 //make array 00146 rows[rowindex++] = row_it.data (); 00147 00148 /*try to fix bad lines */ 00149 correlate_neighbours(block, rows, rowcount); 00150 00151 block->xheight = (float) correlate_with_stats (rows, rowcount); 00152 /*use stats */ 00153 if (block->xheight <= 0) 00154 //desperate 00155 block->xheight = block->line_size * textord_merge_x; 00156 if (block->xheight < textord_min_xheight) 00157 block->xheight = (float) textord_min_xheight; 00158 00159 free_mem(rows); 00160 }
Fix bad rows.
Try to fix rows that had a bad spline fit by using neighbours.
Definition at line 168 of file oldbasel.cpp.
References TO_ROW::baseline, baseline, find_textlines(), MAXOVERLAP, QSPLINE::overlap(), and TO_ROW::xheight.
Referenced by correlate_lines().
00172 { 00173 TO_ROW *row; /*current row */ 00174 register int rowindex; /*no of row */ 00175 register int otherrow; /*second row */ 00176 int upperrow; /*row above to use */ 00177 int lowerrow; /*row below to use */ 00178 float biggest; 00179 00180 for (rowindex = 0; rowindex < rowcount; rowindex++) { 00181 row = rows[rowindex]; /*current row */ 00182 if (row->xheight < 0) { 00183 /*quadratic failed */ 00184 for (otherrow = rowindex - 2; 00185 otherrow >= 0 00186 && (rows[otherrow]->xheight < 0.0 00187 || !row->baseline.overlap (&rows[otherrow]->baseline, 00188 MAXOVERLAP)); otherrow--); 00189 upperrow = otherrow; /*decent row above */ 00190 for (otherrow = rowindex + 1; 00191 otherrow < rowcount 00192 && (rows[otherrow]->xheight < 0.0 00193 || !row->baseline.overlap (&rows[otherrow]->baseline, 00194 MAXOVERLAP)); otherrow++); 00195 lowerrow = otherrow; /*decent row below */ 00196 if (upperrow >= 0) 00197 find_textlines (block, row, 2, &rows[upperrow]->baseline); 00198 if (row->xheight < 0 && lowerrow < rowcount) 00199 find_textlines (block, row, 2, &rows[lowerrow]->baseline); 00200 if (row->xheight < 0) { 00201 if (upperrow >= 0) 00202 find_textlines (block, row, 1, &rows[upperrow]->baseline); 00203 else if (lowerrow < rowcount) 00204 find_textlines (block, row, 1, &rows[lowerrow]->baseline); 00205 } 00206 } 00207 } 00208 00209 for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { 00210 row = rows[rowindex]; /*current row */ 00211 if (row->xheight < 0) /*linear failed */ 00212 /*make do */ 00213 row->xheight = -row->xheight; 00214 biggest = MAX (biggest, row->xheight); 00215 } 00216 }
int correlate_with_stats | ( | TO_ROW ** | rows, | |
int | rowcount | |||
) |
Fix xheights.
Correlate the x-heights and ascender heights of a block to fill-in the ascender height and descender height for rows without one.
Definition at line 225 of file oldbasel.cpp.
References TO_ROW::ascrise, TO_ROW::descdrop, and TO_ROW::xheight.
Referenced by correlate_lines().
00228 { 00229 TO_ROW *row; /*current row */ 00230 register int rowindex; /*no of row */ 00231 float lineheight; /*mean x-height */ 00232 float ascheight; /*average ascenders */ 00233 float minascheight; /*min allowed ascheight */ 00234 int xcount; /*no of samples for xheight */ 00235 float fullheight; /*mean top height */ 00236 int fullcount; /*no of samples */ 00237 float descheight; /*mean descender drop */ 00238 float mindescheight; /*min allowed descheight */ 00239 int desccount; /*no of samples */ 00240 float xshift; /*shift in xheight */ 00241 00242 /*no samples */ 00243 xcount = fullcount = desccount = 0; 00244 lineheight = ascheight = fullheight = descheight = 0.0; 00245 for (rowindex = 0; rowindex < rowcount; rowindex++) { 00246 row = rows[rowindex]; /*current row */ 00247 if (row->ascrise > 0.0) { /*got ascenders? */ 00248 lineheight += row->xheight;/*average x-heights */ 00249 ascheight += row->ascrise; /*average ascenders */ 00250 xcount++; 00251 } 00252 else { 00253 fullheight += row->xheight;/*assume full height */ 00254 fullcount++; 00255 } 00256 if (row->descdrop < 0.0) { /*got descenders? */ 00257 /*average descenders */ 00258 descheight += row->descdrop; 00259 desccount++; 00260 } 00261 } 00262 00263 if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { 00264 lineheight /= xcount; /*average x-height */ 00265 /*average caps height */ 00266 fullheight = lineheight + ascheight / xcount; 00267 /*must be decent size */ 00268 if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) 00269 fullheight = lineheight * (1 + MIN_ASC_FRACTION); 00270 } 00271 else { 00272 fullheight /= fullcount; /*average max height */ 00273 /*guess x-height */ 00274 lineheight = fullheight * X_HEIGHT_FRACTION; 00275 } 00276 if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) 00277 descheight /= desccount; /*average descenders */ 00278 else 00279 /*guess descenders */ 00280 descheight = -lineheight * DESCENDER_FRACTION; 00281 00282 minascheight = lineheight * MIN_ASC_FRACTION; 00283 mindescheight = -lineheight * MIN_DESC_FRACTION; 00284 for (rowindex = 0; rowindex < rowcount; rowindex++) { 00285 row = rows[rowindex]; /*do each row */ 00286 row->all_caps = FALSE; 00287 if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { 00288 /*no ascenders */ 00289 if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) 00290 && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { 00291 row->ascrise = fullheight - lineheight; 00292 /*shift in x */ 00293 xshift = lineheight - row->xheight; 00294 /*set to average */ 00295 row->xheight = lineheight; 00296 00297 } 00298 else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) 00299 && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { 00300 row->ascrise = row->xheight - lineheight; 00301 xshift = -row->ascrise; /*shift in x */ 00302 /*set to average */ 00303 row->xheight = lineheight; 00304 row->all_caps = TRUE; 00305 } 00306 else { 00307 row->ascrise = (fullheight - lineheight) * row->xheight 00308 / fullheight; 00309 xshift = -row->ascrise; /*shift in x */ 00310 /*scale it */ 00311 row->xheight -= row->ascrise; 00312 row->all_caps = TRUE; 00313 } 00314 if (row->ascrise < minascheight) 00315 row->ascrise = 00316 row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); 00317 } 00318 if (row->descdrop > mindescheight) { 00319 if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) 00320 && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) 00321 /*set to average */ 00322 row->descdrop = descheight; 00323 else 00324 row->descdrop = -row->xheight * DESCENDER_FRACTION; 00325 } 00326 } 00327 return (int) lineheight; //block xheight 00328 }
void find_lesser_parts | ( | TO_ROW * | row, | |
BOX | blobcoords[], | |||
int | blobcount, | |||
char | partids[], | |||
int | partsizes[], | |||
int | partcount, | |||
int | bestpart | |||
) |
Get descenders.
Average the step from the spline for the other partitions and find the commonest partition which has a descender.
Definition at line 1375 of file oldbasel.cpp.
References TO_ROW::baseline, BOX::bottom(), TO_ROW::descdrop, MAXBADRUN, MAXPARTS, MINASCRISE, TO_ROW::xheight, and QSPLINE::y().
Referenced by find_textlines().
01383 { 01384 register int blobindex; /*index of blob */ 01385 register int partition; /*current partition */ 01386 int xcentre; /*centre of blob */ 01387 int poscount; /*count of best up step */ 01388 int negcount; /*count of best down step */ 01389 float partsteps[MAXPARTS]; /*average step to part */ 01390 float bestpos; /*best up step */ 01391 float bestneg; /*best down step */ 01392 int runlength; /*length of bad run */ 01393 int biggestrun; /*biggest bad run */ 01394 01395 biggestrun = 0; 01396 for (partition = 0; partition < partcount; partition++) 01397 partsteps[partition] = 0.0; /*zero accumulators */ 01398 for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { 01399 xcentre = (blobcoords[blobindex].left () 01400 + blobcoords[blobindex].right ()) >> 1; 01401 /*in other parts */ 01402 if (partids[blobindex] != bestpart) { 01403 runlength++; /*run of non bests */ 01404 if (runlength > biggestrun) 01405 biggestrun = runlength; 01406 partsteps[partids[blobindex]] += blobcoords[blobindex].bottom () 01407 - row->baseline.y (xcentre); 01408 } 01409 else 01410 runlength = 0; 01411 } 01412 if (biggestrun > MAXBADRUN) 01413 row->xheight = -1.0f; /*failed */ 01414 else 01415 row->xheight = 1.0f; /*success */ 01416 poscount = negcount = 0; 01417 bestpos = bestneg = 0.0; /*no step yet */ 01418 for (partition = 0; partition < partcount; partition++) { 01419 if (partition != bestpart) { 01420 partsteps[partition] /= partsizes[partition]; 01421 if (partsteps[partition] >= MINASCRISE 01422 && partsizes[partition] > poscount) { 01423 /*ascender rise */ 01424 bestpos = partsteps[partition]; 01425 /*2nd most popular */ 01426 poscount = partsizes[partition]; 01427 } 01428 if (partsteps[partition] <= -MINASCRISE 01429 && partsizes[partition] > negcount) { 01430 /*ascender rise */ 01431 bestneg = partsteps[partition]; 01432 /*2nd most popular */ 01433 negcount = partsizes[partition]; 01434 } 01435 } 01436 } 01437 /*average x-height */ 01438 partsteps[bestpart] /= blobcount; 01439 row->descdrop = bestneg; 01440 }
Get baseline.
Compute the baseline for the given row.
Definition at line 336 of file oldbasel.cpp.
References alloc_mem(), TO_ROW::ascrise, TO_ROW::baseline, TO_ROW::blob_list(), TO_ROW::descdrop, QSPLINE::extrapolate(), find_lesser_parts(), free_mem(), get_blob_coords(), GOLDENROD, TO_ROW::line_m(), make_first_baseline(), make_first_xheight(), make_holed_baseline(), MAXPARTS, MINASCRISE, old_first_xheight(), partition_coords(), partition_line(), QSPLINE::plot(), segment_spline(), SPLINESIZE, split_stepped_spline(), to_win, tprintf(), and TO_ROW::xheight.
Referenced by correlate_neighbours(), and make_old_baselines().
00341 { 00342 int partcount; /*no of partitions of */ 00343 BOOL8 holed_line; //lost too many blobs 00344 int bestpart; /*biggest partition */ 00345 char *partids; /*partition no of each blob */ 00346 int partsizes[MAXPARTS]; /*no in each partition */ 00347 int lineheight; /*guessed x-height */ 00348 float jumplimit; /*allowed delta change */ 00349 int *xcoords; /*useful sample points */ 00350 int *ycoords; /*useful sample points */ 00351 BOX *blobcoords; /*edges of blob rectangles */ 00352 int blobcount; /*no of blobs on line */ 00353 float *ydiffs; /*diffs from 1st approx */ 00354 int pointcount; /*no of coords */ 00355 int xstarts[SPLINESIZE + 1]; //segment boundaries 00356 int segments; //no of segments 00357 00358 //no of blobs in row 00359 blobcount = row->blob_list ()->length (); 00360 partids = (char *) alloc_mem (blobcount * sizeof (char)); 00361 xcoords = (int *) alloc_mem (blobcount * sizeof (int)); 00362 ycoords = (int *) alloc_mem (blobcount * sizeof (int)); 00363 blobcoords = (BOX *) alloc_mem (blobcount * sizeof (BOX)); 00364 ydiffs = (float *) alloc_mem (blobcount * sizeof (float)); 00365 00366 lineheight = get_blob_coords (row, (int) block->line_size, blobcoords, 00367 holed_line, blobcount); 00368 /*limit for line change */ 00369 jumplimit = lineheight * textord_oldbl_jumplimit; 00370 if (jumplimit < MINASCRISE) 00371 jumplimit = MINASCRISE; 00372 00373 if (textord_oldbl_debug) { 00374 tprintf 00375 ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", 00376 block->line_size, lineheight, jumplimit); 00377 } 00378 if (holed_line) 00379 make_holed_baseline (blobcoords, blobcount, spline, &row->baseline, 00380 row->line_m ()); 00381 else 00382 make_first_baseline (blobcoords, blobcount, 00383 xcoords, ycoords, spline, &row->baseline, jumplimit); 00384 #ifndef GRAPHICS_DISABLED 00385 if (textord_show_final_rows) 00386 row->baseline.plot (to_win, GOLDENROD); 00387 #endif 00388 if (blobcount > 1) { 00389 bestpart = partition_line (blobcoords, blobcount, 00390 &partcount, partids, partsizes, 00391 &row->baseline, jumplimit, ydiffs); 00392 pointcount = partition_coords (blobcoords, blobcount, 00393 partids, bestpart, xcoords, ycoords); 00394 segments = segment_spline (blobcoords, blobcount, 00395 xcoords, ycoords, 00396 degree, pointcount, xstarts); 00397 if (!holed_line) { 00398 do { 00399 row->baseline = QSPLINE (xstarts, segments, 00400 xcoords, ycoords, pointcount, degree); 00401 } 00402 while (textord_oldbl_split_splines 00403 && split_stepped_spline (&row->baseline, jumplimit / 2, 00404 xcoords, xstarts, segments)); 00405 } 00406 find_lesser_parts(row, 00407 blobcoords, 00408 blobcount, 00409 partids, 00410 partsizes, 00411 partcount, 00412 bestpart); 00413 00414 } 00415 else { 00416 row->xheight = -1.0f; /*failed */ 00417 row->descdrop = 0.0f; 00418 row->ascrise = 0.0f; 00419 } 00420 row->baseline.extrapolate (row->line_m (), 00421 block->block->bounding_box ().left (), 00422 block->block->bounding_box ().right ()); 00423 if (textord_really_old_xheight) 00424 old_first_xheight (row, blobcoords, lineheight, 00425 blobcount, &row->baseline, jumplimit); 00426 else 00427 make_first_xheight (row, blobcoords, lineheight, (int) block->line_size, 00428 blobcount, &row->baseline, jumplimit); 00429 free_mem(partids); 00430 free_mem(xcoords); 00431 free_mem(ycoords); 00432 free_mem(blobcoords); 00433 free_mem(ydiffs); 00434 }
void find_top_modes | ( | STATS * | stats, | |
int | statnum, | |||
int | modelist[], | |||
int | modenum | |||
) |
Get modes.
Fill the input array with the indices of the top ten modes of the input distribution.
Definition at line 1631 of file oldbasel.cpp.
References MAX_INT32, mode, and STATS::pile_count().
Referenced by make_first_xheight().
01635 { 01636 int mode_count; 01637 int last_i = 0; 01638 int last_max = MAX_INT32; 01639 int i; 01640 int mode; 01641 int total_max = 0; 01642 01643 for (mode_count = 0; mode_count < modenum; mode_count++) { 01644 mode = 0; 01645 for (i = 0; i < statnum; i++) { 01646 if (stats->pile_count (i) > stats->pile_count (mode)) { 01647 if ((stats->pile_count (i) < last_max) || 01648 ((stats->pile_count (i) == last_max) && (i > last_i))) { 01649 mode = i; 01650 } 01651 } 01652 } 01653 last_i = mode; 01654 last_max = stats->pile_count (last_i); 01655 total_max += last_max; 01656 if (last_max <= total_max / kMinModeFactor) 01657 mode = 0; 01658 modelist[mode_count] = mode; 01659 } 01660 }
int get_blob_coords | ( | TO_ROW * | row, | |
INT32 | lineheight, | |||
BOX * | blobcoords, | |||
BOOL8 & | holed_line, | |||
int & | outcount | |||
) |
Get boxes.
Definition at line 447 of file oldbasel.cpp.
References STATS::add(), TO_ROW::blob_list(), box_next_pre_chopped(), STATS::get_total(), BOX::height(), STATS::ile(), and MAXHEIGHT.
Referenced by find_textlines().
00453 { 00454 //blobs 00455 BLOBNBOX_IT blob_it = row->blob_list (); 00456 register int blobindex; /*no along text line */ 00457 int losscount; //lost blobs 00458 int maxlosscount; //greatest lost blobs 00459 /*height stat collection */ 00460 STATS heightstat (0, MAXHEIGHT); 00461 00462 if (blob_it.empty ()) 00463 return 0; //none 00464 maxlosscount = 0; 00465 losscount = 0; 00466 blob_it.mark_cycle_pt (); 00467 blobindex = 0; 00468 do { 00469 blobcoords[blobindex] = box_next_pre_chopped (&blob_it); 00470 if (blobcoords[blobindex].height () > lineheight * 0.25) 00471 heightstat.add (blobcoords[blobindex].height (), 1); 00472 if (blobindex == 0 00473 || blobcoords[blobindex].height () > lineheight * 0.25 00474 || blob_it.cycled_list ()) { 00475 blobindex++; /*no of merged blobs */ 00476 losscount = 0; 00477 } 00478 else { 00479 if (blobcoords[blobindex].height () 00480 < blobcoords[blobindex].width () * oldbl_dot_error_size 00481 && blobcoords[blobindex].width () 00482 < blobcoords[blobindex].height () * oldbl_dot_error_size) { 00483 //counts as dot 00484 blobindex++; 00485 losscount = 0; 00486 } 00487 else { 00488 losscount++; //lost it 00489 if (losscount > maxlosscount) 00490 //remember max 00491 maxlosscount = losscount; 00492 } 00493 } 00494 } 00495 while (!blob_it.cycled_list ()); 00496 00497 holed_line = maxlosscount > oldbl_holed_losscount; 00498 outcount = blobindex; /*total blobs */ 00499 00500 if (heightstat.get_total () > 1) 00501 /*guess x-height */ 00502 return (int) heightstat.ile (0.25); 00503 else 00504 return blobcoords[0].height (); 00505 }
Evaluate differences.
Definition at line 906 of file oldbasel.cpp.
References ABS, BOX::bottom(), BOX::left(), MAX_INT32, QSPLINE::step(), and QSPLINE::y().
Referenced by partition_line().
00911 { 00912 register int blobindex; /*current blob */ 00913 int xcentre; /*xcoord */ 00914 int lastx; /*last xcentre */ 00915 float diffsum; /*sum of diffs */ 00916 float diff; /*current difference */ 00917 float drift; /*sum of spline steps */ 00918 float bestsum; /*smallest diffsum */ 00919 int bestindex; /*index of bestsum */ 00920 00921 diffsum = 0.0f; 00922 bestindex = 0; 00923 bestsum = (float) MAX_INT32; 00924 drift = 0.0f; 00925 lastx = blobcoords[0].left (); 00926 /*do each blob in row */ 00927 for (blobindex = 0; blobindex < blobcount; blobindex++) { 00928 /*centre of blob */ 00929 xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; 00930 //step functions in spline 00931 drift += spline->step (lastx, xcentre); 00932 lastx = xcentre; 00933 diff = blobcoords[blobindex].bottom (); 00934 diff -= spline->y (xcentre); 00935 diff += drift; 00936 ydiffs[blobindex] = diff; /*store difference */ 00937 if (blobindex > 2) 00938 /*remove old one */ 00939 diffsum -= ABS (ydiffs[blobindex - 3]); 00940 diffsum += ABS (diff); /*add new one */ 00941 if (blobindex >= 2 && diffsum < bestsum) { 00942 bestsum = diffsum; /*find min sum */ 00943 bestindex = blobindex - 1; /*middle of set */ 00944 } 00945 } 00946 return bestindex; 00947 }
void insert_spline_point | ( | int | xstarts[], | |
int | segment, | |||
int | coord1, | |||
int | coord2, | |||
int & | segments | |||
) |
Get descenders.
Insert a new spline point and shuffle up the others.
Definition at line 1352 of file oldbasel.cpp.
Referenced by split_stepped_spline().
01357 { 01358 int index; //for shuffling 01359 01360 for (index = segments; index > segment; index--) 01361 xstarts[index + 1] = xstarts[index]; 01362 segments++; 01363 xstarts[segment] = coord1; 01364 xstarts[segment + 1] = coord2; 01365 }
void make_first_baseline | ( | BOX | blobcoords[], | |
int | blobcount, | |||
int | xcoords[], | |||
int | ycoords[], | |||
QSPLINE * | spline, | |||
QSPLINE * | baseline, | |||
float | jumplimit | |||
) |
Initial approximation.
Make the first estimate at a baseline, either by shifting a supplied previous spline, or by doing a piecewise linear approximation using all the blobs.
Definition at line 516 of file oldbasel.cpp.
References ABS, baseline, BOX::bottom(), BOX::left(), MAXOVERLAP, NULL, BOX::right(), QSPLINE::segments, SPLINESIZE, QSPLINE::xcoords, and QSPLINE::y().
Referenced by find_textlines().
00524 { 00525 int leftedge; /*left edge of line */ 00526 int rightedge; /*right edge of line */ 00527 int blobindex; /*current blob */ 00528 int segment; /*current segment */ 00529 float prevy, thisy, nexty; /*3 y coords */ 00530 float y1, y2, y3; /*3 smooth blobs */ 00531 float maxmax, minmin; /*absolute limits */ 00532 int x2 = 0; /*right edge of old y3 */ 00533 int ycount; /*no of ycoords in use */ 00534 float yturns[SPLINESIZE]; /*y coords of turn pts */ 00535 int xturns[SPLINESIZE]; /*xcoords of turn pts */ 00536 int xstarts[SPLINESIZE + 1]; 00537 int segments; //no of segments 00538 ICOORD shift; //shift of spline 00539 00540 prevy = 0; 00541 /*left edge of row */ 00542 leftedge = blobcoords[0].left (); 00543 /*right edge of line */ 00544 rightedge = blobcoords[blobcount - 1].right (); 00545 if (spline == NULL /*no given spline */ 00546 || spline->segments < 3 /*or trivial */ 00547 /*or too non-overlap */ 00548 || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) 00549 || spline->xcoords[spline->segments - 1] < rightedge 00550 - MAXOVERLAP * (rightedge - leftedge)) { 00551 if (textord_oldbl_paradef) 00552 return; //use default 00553 xstarts[0] = blobcoords[0].left () - 1; 00554 for (blobindex = 0; blobindex < blobcount; blobindex++) { 00555 xcoords[blobindex] = (blobcoords[blobindex].left () 00556 + blobcoords[blobindex].right ()) / 2; 00557 ycoords[blobindex] = blobcoords[blobindex].bottom (); 00558 } 00559 xstarts[1] = blobcoords[blobcount - 1].right () + 1; 00560 segments = 1; /*no of segments */ 00561 00562 /*linear */ 00563 *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); 00564 00565 if (blobcount >= 3) { 00566 y1 = y2 = y3 = 0.0f; 00567 ycount = 0; 00568 segment = 0; /*no of segments */ 00569 maxmax = minmin = 0.0f; 00570 thisy = ycoords[0] - baseline->y (xcoords[0]); 00571 nexty = ycoords[1] - baseline->y (xcoords[1]); 00572 for (blobindex = 2; blobindex < blobcount; blobindex++) { 00573 prevy = thisy; /*shift ycoords */ 00574 thisy = nexty; 00575 nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); 00576 /*middle of smooth y */ 00577 if (ABS (thisy - prevy) < jumplimit 00578 && ABS (thisy - nexty) < jumplimit) { 00579 y1 = y2; /*shift window */ 00580 y2 = y3; 00581 y3 = thisy; /*middle point */ 00582 ycount++; 00583 /*local max */ 00584 if (ycount >= 3 && (y1 < y2 && y2 >= y3 00585 /*local min */ 00586 || y1 > y2 && y2 <= y3)) { 00587 if (segment < SPLINESIZE - 2) { 00588 /*turning pt */ 00589 xturns[segment] = x2; 00590 yturns[segment] = y2; 00591 segment++; /*no of spline segs */ 00592 } 00593 } 00594 if (ycount == 1) { 00595 maxmax = minmin = y3;/*initialise limits */ 00596 } 00597 else { 00598 if (y3 > maxmax) 00599 maxmax = y3; /*biggest max */ 00600 if (y3 < minmin) 00601 minmin = y3; /*smallest min */ 00602 } 00603 /*possible turning pt */ 00604 x2 = blobcoords[blobindex - 1].right (); 00605 } 00606 } 00607 00608 jumplimit *= 1.2; 00609 /*must be wavy */ 00610 if (maxmax - minmin > jumplimit) { 00611 ycount = segment; /*no of segments */ 00612 for (blobindex = 0, segment = 1; blobindex < ycount; 00613 blobindex++) { 00614 if (yturns[blobindex] > minmin + jumplimit 00615 || yturns[blobindex] < maxmax - jumplimit) { 00616 /*significant peak */ 00617 if (segment == 1 00618 || yturns[blobindex] > prevy + jumplimit 00619 || yturns[blobindex] < prevy - jumplimit) { 00620 /*different to previous */ 00621 xstarts[segment] = xturns[blobindex]; 00622 segment++; 00623 prevy = yturns[blobindex]; 00624 } 00625 /*bigger max */ 00626 else if (prevy > minmin + jumplimit && yturns[blobindex] > prevy 00627 /*smaller min */ 00628 || prevy < maxmax - jumplimit && yturns[blobindex] < prevy) { 00629 xstarts[segment - 1] = xturns[blobindex]; 00630 /*improved previous */ 00631 prevy = yturns[blobindex]; 00632 } 00633 } 00634 } 00635 xstarts[segment] = blobcoords[blobcount - 1].right () + 1; 00636 segments = segment; /*no of segments */ 00637 /*linear */ 00638 *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); 00639 } 00640 } 00641 } 00642 else { 00643 *baseline = *spline; /*copy it */ 00644 shift = ICOORD (0, (INT16) (blobcoords[0].bottom () 00645 - spline->y (blobcoords[0].right ()))); 00646 baseline->move (shift); 00647 } 00648 }
void make_first_xheight | ( | TO_ROW * | row, | |
BOX | blobcoords[], | |||
int | lineheight, | |||
int | init_lineheight, | |||
int | blobcount, | |||
QSPLINE * | baseline, | |||
float | jumplimit | |||
) |
Find xheight.
Makes an x-height spline by copying the baseline and shifting it. It estimates the x-height across the line to use as the shift. It also finds the ascender height if it can.
Definition at line 1536 of file oldbasel.cpp.
References STATS::add(), baseline, find_top_modes(), free_mem(), HEIGHTBUCKETS, make_height_array(), MODENUM, pick_x_height(), tprintf(), and TO_ROW::xheight.
Referenced by find_textlines().
01544 { 01545 int *heights; 01546 STATS heightstat (0, HEIGHTBUCKETS); 01547 int modelist[MODENUM]; 01548 int blobindex; 01549 int mode_count; //blobs to count in thr 01550 int sign_bit; 01551 int mode_threshold; 01552 01553 sign_bit = row->xheight > 0 ? 1 : -1; 01554 heights = make_height_array (blobcoords, blobcount, baseline); 01555 01556 mode_count = 0; 01557 for (blobindex = 0; blobindex < blobcount; blobindex++) { 01558 if (heights[blobindex] > lineheight * oldbl_xhfract 01559 && blobcoords[blobindex].height () > init_lineheight * 0.25 01560 && heights[blobindex] > textord_min_xheight) 01561 heightstat.add (heights[blobindex], 1); 01562 if (blobcoords[blobindex].height () > init_lineheight * 0.25) 01563 mode_count++; 01564 } 01565 01566 mode_threshold = (int) (blobcount * 0.1); 01567 if (oldbl_dot_error_size > 1 || oldbl_xhfix) 01568 mode_threshold = (int) (mode_count * 0.1); 01569 01570 if (textord_oldbl_debug) { 01571 tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", 01572 blobcount, mode_count, mode_threshold); 01573 } 01574 find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); 01575 if (textord_oldbl_debug) { 01576 for (blobindex = 0; blobindex < MODENUM; blobindex++) 01577 tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); 01578 tprintf ("\n"); 01579 } 01580 pick_x_height(row, modelist, &heightstat, mode_threshold); 01581 01582 if (textord_oldbl_debug) 01583 tprintf ("Output xheight=%g\n", row->xheight); 01584 if (row->xheight < 0 && textord_oldbl_debug) 01585 tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); 01586 01587 free_mem(heights); 01588 01589 if (sign_bit < 0) 01590 row->xheight = -row->xheight; 01591 }
Get array of heights.
Create an array of the number of blobs and each of their heights.
Definition at line 1600 of file oldbasel.cpp.
References alloc_mem(), and baseline.
Referenced by make_first_xheight().
01604 { 01605 int blobindex; 01606 int xcenter; 01607 int *heights; 01608 01609 heights = (int *) alloc_mem (sizeof (int) * blobcount); 01610 01611 for (blobindex = 0; blobindex < blobcount; blobindex++) { 01612 xcenter = (blobcoords[blobindex].left () + 01613 blobcoords[blobindex].right ()) / 2; 01614 heights[blobindex] = (int) (blobcoords[blobindex].top () - 01615 baseline->y (xcenter) + 0.5); 01616 } 01617 01618 return (heights); 01619 }
void make_holed_baseline | ( | BOX | blobcoords[], | |
int | blobcount, | |||
QSPLINE * | spline, | |||
QSPLINE * | baseline, | |||
float | gradient | |||
) |
Initial approximation.
Make the first estimate at a baseline, either by shifting a supplied previous spline, or by doing a piecewise linear approximation using all the blobs.
Definition at line 659 of file oldbasel.cpp.
References LMS::add(), baseline, LMS::constrained_fit(), BOX::left(), MAXOVERLAP, NULL, BOX::right(), QSPLINE::segments, QSPLINE::xcoords, and QSPLINE::y().
Referenced by find_textlines().
00665 { 00666 int leftedge; /*left edge of line */ 00667 int rightedge; /*right edge of line */ 00668 int blobindex; /*current blob */ 00669 float x; //centre of row 00670 ICOORD shift; //shift of spline 00671 00672 LMS lms(blobcount); //straight baseline 00673 INT32 xstarts[2]; //straight line 00674 double coeffs[3]; 00675 float c; //line parameter 00676 00677 /*left edge of row */ 00678 leftedge = blobcoords[0].left (); 00679 /*right edge of line */ 00680 rightedge = blobcoords[blobcount - 1].right (); 00681 for (blobindex = 0; blobindex < blobcount; blobindex++) { 00682 lms.add (FCOORD ((blobcoords[blobindex].left () + 00683 blobcoords[blobindex].right ()) / 2.0, 00684 blobcoords[blobindex].bottom ())); 00685 } 00686 lms.constrained_fit (gradient, c); 00687 xstarts[0] = leftedge; 00688 xstarts[1] = rightedge; 00689 coeffs[0] = 0; 00690 coeffs[1] = gradient; 00691 coeffs[2] = c; 00692 *baseline = QSPLINE (1, xstarts, coeffs); 00693 if (spline != NULL /*no given spline */ 00694 && spline->segments >= 3 /*or trivial */ 00695 /*or too non-overlap */ 00696 && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) 00697 && spline->xcoords[spline->segments - 1] >= rightedge 00698 - MAXOVERLAP * (rightedge - leftedge)) { 00699 *baseline = *spline; /*copy it */ 00700 x = (leftedge + rightedge) / 2.0; 00701 shift = ICOORD (0, (INT16) (gradient * x + c - spline->y (x))); 00702 baseline->move (shift); 00703 } 00704 }
Make splines.
Top level function to make baselines the old way.
Definition at line 89 of file oldbasel.cpp.
References TO_ROW::baseline, TO_ROW::blob_list(), correlate_lines(), find_textlines(), NULL, tprintf(), and TO_ROW::xheight.
Referenced by make_spline_rows().
00092 { 00093 QSPLINE *prev_baseline; //baseline of previous row 00094 TO_ROW *row; //current row 00095 TO_ROW_IT row_it = block->get_rows (); 00096 BLOBNBOX_IT blob_it; 00097 00098 prev_baseline = NULL; //nothing yet 00099 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00100 row = row_it.data (); 00101 find_textlines (block, row, 2, NULL); 00102 if (row->xheight <= 0 && prev_baseline != NULL) 00103 find_textlines (block, row, 2, prev_baseline); 00104 if (row->xheight > 0) 00105 //was a good one 00106 prev_baseline = &row->baseline; 00107 else { 00108 prev_baseline = NULL; 00109 blob_it.set_to_list (row->blob_list ()); 00110 if (textord_debug_baselines) 00111 tprintf ("Row baseline generation failed on row at (%d,%d)\n", 00112 blob_it.data ()->bounding_box ().left (), 00113 blob_it.data ()->bounding_box ().bottom ()); 00114 } 00115 } 00116 correlate_lines(block); 00117 }
void merge_oldbl_parts | ( | BOX | blobcoords[], | |
int | blobcount, | |||
char | partids[], | |||
int | partsizes[], | |||
int | biggestpart, | |||
float | jumplimit | |||
) |
Partition blobs.
For any adjacent group of blobs in a different part, put them in the main part if they fit closely to neighbours in the main part.
Definition at line 793 of file oldbasel.cpp.
References QLSQ::add(), QLSQ::clear(), FALSE, QLSQ::fit(), QLSQ::get_b(), QLSQ::get_c(), MAXBADRUN, tprintf(), and TRUE.
Referenced by partition_line().
00800 { 00801 BOOL8 found_one; //found a bestpart blob 00802 BOOL8 close_one; //found was close enough 00803 register int blobindex; /*no along text line */ 00804 int prevpart; //previous iteration 00805 int runlength; //no in this part 00806 float diff; /*difference from line */ 00807 int startx; /*index of start blob */ 00808 int test_blob; //another index 00809 FCOORD coord; //blob coordinate 00810 float m, c; //fitted line 00811 QLSQ stats; //line stuff 00812 00813 prevpart = biggestpart; 00814 runlength = 0; 00815 startx = 0; 00816 for (blobindex = 0; blobindex < blobcount; blobindex++) { 00817 if (partids[blobindex] != prevpart) { 00818 //tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", 00819 // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), 00820 // prevpart,partids[blobindex],runlength); 00821 if (prevpart != biggestpart && runlength > MAXBADRUN) { 00822 stats.clear (); 00823 for (test_blob = startx; test_blob < blobindex; test_blob++) { 00824 coord = FCOORD ((blobcoords[test_blob].left () 00825 + blobcoords[test_blob].right ()) / 2.0, 00826 blobcoords[test_blob].bottom ()); 00827 stats.add (coord.x (), coord.y ()); 00828 } 00829 stats.fit (1); 00830 m = stats.get_b (); 00831 c = stats.get_c (); 00832 if (textord_oldbl_debug) 00833 tprintf ("Fitted line y=%g x + %g\n", m, c); 00834 found_one = FALSE; 00835 close_one = FALSE; 00836 for (test_blob = 1; !found_one 00837 && (startx - test_blob >= 0 00838 || blobindex + test_blob <= blobcount); test_blob++) { 00839 if (startx - test_blob >= 0 00840 && partids[startx - test_blob] == biggestpart) { 00841 found_one = TRUE; 00842 coord = FCOORD ((blobcoords[startx - test_blob].left () 00843 + blobcoords[startx - 00844 test_blob].right ()) / 00845 2.0, 00846 blobcoords[startx - 00847 test_blob].bottom ()); 00848 diff = m * coord.x () + c - coord.y (); 00849 if (textord_oldbl_debug) 00850 tprintf 00851 ("Diff of common blob to suspect part=%g at (%g,%g)\n", 00852 diff, coord.x (), coord.y ()); 00853 if (diff < jumplimit && -diff < jumplimit) 00854 close_one = TRUE; 00855 } 00856 if (blobindex + test_blob <= blobcount 00857 && partids[blobindex + test_blob - 1] == biggestpart) { 00858 found_one = TRUE; 00859 coord = 00860 FCOORD ((blobcoords[blobindex + test_blob - 1]. 00861 left () + blobcoords[blobindex + test_blob - 00862 1].right ()) / 2.0, 00863 blobcoords[blobindex + test_blob - 00864 1].bottom ()); 00865 diff = m * coord.x () + c - coord.y (); 00866 if (textord_oldbl_debug) 00867 tprintf 00868 ("Diff of common blob to suspect part=%g at (%g,%g)\n", 00869 diff, coord.x (), coord.y ()); 00870 if (diff < jumplimit && -diff < jumplimit) 00871 close_one = TRUE; 00872 } 00873 } 00874 if (close_one) { 00875 if (textord_oldbl_debug) 00876 tprintf 00877 ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", 00878 runlength, biggestpart, prevpart, 00879 blobcoords[startx].left (), 00880 blobcoords[startx].bottom ()); 00881 //switch sides 00882 partsizes[prevpart] -= runlength; 00883 for (test_blob = startx; test_blob < blobindex; test_blob++) 00884 partids[test_blob] = biggestpart; 00885 } 00886 } 00887 prevpart = partids[blobindex]; 00888 runlength = 1; 00889 startx = blobindex; 00890 } 00891 else 00892 runlength++; 00893 } 00894 }
void old_first_xheight | ( | TO_ROW * | row, | |
BOX | blobcoords[], | |||
int | initialheight, | |||
int | blobcount, | |||
QSPLINE * | baseline, | |||
float | jumplimit | |||
) |
The Wise Owl way.
Makes an x-height spline by copying the baseline and shifting it. It estimates the x-height across the line to use as the shift. It also finds the ascender height if it can.
Definition at line 1451 of file oldbasel.cpp.
References STATS::add(), TO_ROW::ascrise, baseline, f, STATS::get_total(), STATS::ile(), BOX::left(), MAXHEIGHT, BOX::right(), BOX::top(), and TO_ROW::xheight.
Referenced by find_textlines().
01458 { 01459 register int blobindex; /*current blob */ 01460 /*height statistics */ 01461 STATS heightstat (0, MAXHEIGHT); 01462 int height; /*height of blob */ 01463 int xcentre; /*centre of blob */ 01464 int lineheight; /*approx xheight */ 01465 float ascenders; /*ascender sum */ 01466 int asccount; /*no of ascenders */ 01467 float xsum; /*xheight sum */ 01468 int xcount; /*xheight count */ 01469 register float diff; /*height difference */ 01470 01471 if (blobcount > 1) { 01472 for (blobindex = 0; blobindex < blobcount; blobindex++) { 01473 xcentre = (blobcoords[blobindex].left () 01474 + blobcoords[blobindex].right ()) / 2; 01475 /*height of blob */ 01476 height = (int) (blobcoords[blobindex].top () 01477 - baseline->y (xcentre) + 0.5); 01478 if (height > initialheight * oldbl_xhfract 01479 && height > textord_min_xheight) 01480 heightstat.add (height, 1); 01481 } 01482 if (heightstat.get_total () > 3) { 01483 lineheight = (int) heightstat.ile (0.25); 01484 if (lineheight <= 0) 01485 lineheight = (int) heightstat.ile (0.5); 01486 } 01487 else 01488 lineheight = initialheight; 01489 } 01490 else { 01491 lineheight = (int) (blobcoords[0].top () 01492 - baseline->y ((blobcoords[0].left () 01493 + blobcoords[0].right ()) / 2) + 01494 0.5); 01495 } 01496 01497 xsum = 0.0f; 01498 xcount = 0; 01499 for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; 01500 blobindex++) { 01501 xcentre = (blobcoords[blobindex].left () 01502 + blobcoords[blobindex].right ()) / 2; 01503 diff = blobcoords[blobindex].top () - baseline->y (xcentre); 01504 /*is it ascender */ 01505 if (diff > lineheight + jumplimit) { 01506 ascenders += diff; 01507 asccount++; /*count ascenders */ 01508 } 01509 else if (diff > lineheight - jumplimit) { 01510 xsum += diff; /*mean xheight */ 01511 xcount++; 01512 } 01513 } 01514 if (xcount > 0) 01515 xsum /= xcount; /*average xheight */ 01516 else 01517 xsum = (float) lineheight; /*guess it */ 01518 row->xheight *= xsum; 01519 if (asccount > 0) 01520 row->ascrise = ascenders / asccount - xsum; 01521 else 01522 row->ascrise = 0.0f; /*had none */ 01523 if (row->xheight == 0) 01524 row->xheight = -1.0f; 01525 }
int partition_coords | ( | BOX | blobcoords[], | |
int | blobcount, | |||
char | partids[], | |||
int | bestpart, | |||
int | xcoords[], | |||
int | ycoords[] | |||
) |
*merge_partitions(partids,partcount,blobcount,bestpart) Find relevant coords
Definition at line 1084 of file oldbasel.cpp.
References BOX::bottom().
Referenced by find_textlines().
01091 { 01092 register int blobindex; /*no along text line */ 01093 int pointcount; /*no of points */ 01094 01095 pointcount = 0; 01096 for (blobindex = 0; blobindex < blobcount; blobindex++) { 01097 if (partids[blobindex] == bestpart) { 01098 /*centre of blob */ 01099 xcoords[pointcount] = (blobcoords[blobindex].left () 01100 + blobcoords[blobindex].right ()) >> 1; 01101 ycoords[pointcount++] = blobcoords[blobindex].bottom (); 01102 } 01103 } 01104 return pointcount; /*no of points found */ 01105 }
int partition_line | ( | BOX | blobcoords[], | |
int | blobcount, | |||
int * | numparts, | |||
char | partids[], | |||
int | partsizes[], | |||
QSPLINE * | spline, | |||
float | jumplimit, | |||
float | ydiffs[] | |||
) |
Partition blobs.
Definition at line 717 of file oldbasel.cpp.
References choose_partition(), get_ydiffs(), MAXPARTS, merge_oldbl_parts(), and tprintf().
Referenced by find_textlines().
00726 { 00727 register int blobindex; /*no along text line */ 00728 int bestpart; /*best new partition */ 00729 int biggestpart; /*part with most members */ 00730 float diff; /*difference from line */ 00731 int startx; /*index of start blob */ 00732 float partdiffs[MAXPARTS]; /*step between parts */ 00733 00734 for (bestpart = 0; bestpart < MAXPARTS; bestpart++) 00735 partsizes[bestpart] = 0; /*zero them all */ 00736 00737 startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); 00738 *numparts = 1; /*1 partition */ 00739 bestpart = -1; /*first point */ 00740 for (blobindex = startx; blobindex < blobcount; blobindex++) { 00741 /*do each blob in row */ 00742 diff = ydiffs[blobindex]; /*diff from line */ 00743 if (textord_oldbl_debug) { 00744 tprintf ("%d(%d,%d), ", blobindex, 00745 blobcoords[blobindex].left (), 00746 blobcoords[blobindex].bottom ()); 00747 } 00748 bestpart = 00749 choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); 00750 /*record partition */ 00751 partids[blobindex] = bestpart; 00752 partsizes[bestpart]++; /*another in it */ 00753 } 00754 00755 bestpart = -1; /*first point */ 00756 partsizes[0]--; /*doing 1st pt again */ 00757 /*do each blob in row */ 00758 for (blobindex = startx; blobindex >= 0; blobindex--) { 00759 diff = ydiffs[blobindex]; /*diff from line */ 00760 if (textord_oldbl_debug) { 00761 tprintf ("%d(%d,%d), ", blobindex, 00762 blobcoords[blobindex].left (), 00763 blobcoords[blobindex].bottom ()); 00764 } 00765 bestpart = 00766 choose_partition(diff, partdiffs, bestpart, jumplimit, numparts); 00767 /*record partition */ 00768 partids[blobindex] = bestpart; 00769 partsizes[bestpart]++; /*another in it */ 00770 } 00771 00772 for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) 00773 if (partsizes[bestpart] >= partsizes[biggestpart]) 00774 biggestpart = bestpart; /*new biggest */ 00775 if (textord_oldbl_merge_parts) 00776 merge_oldbl_parts(blobcoords, 00777 blobcount, 00778 partids, 00779 partsizes, 00780 biggestpart, 00781 jumplimit); 00782 return biggestpart; /*biggest partition */ 00783 }
Find xheight.
Choose based on the height modes the best x height value.
Definition at line 1669 of file oldbasel.cpp.
References TO_ROW::ascrise, FALSE, MODENUM, STATS::pile_count(), TRUE, and TO_ROW::xheight.
Referenced by make_first_xheight().
01673 { 01674 int x; 01675 int y; 01676 int z; 01677 float ratio; 01678 int found_one_bigger = FALSE; 01679 int best_x_height = 0; 01680 int best_asc = 0; 01681 int num_in_best; 01682 01683 for (x = 0; x < MODENUM; x++) { 01684 for (y = 0; y < MODENUM; y++) { 01685 /* Check for two modes */ 01686 if (modelist[x] && modelist[y] && 01687 heightstat->pile_count (modelist[x]) > mode_threshold) { 01688 ratio = (float) modelist[y] / (float) modelist[x]; 01689 if (1.2 < ratio && ratio < 1.8) { 01690 if (modelist[y] && modelist[x]) { 01691 /* Two modes found */ 01692 best_x_height = modelist[x]; 01693 num_in_best = heightstat->pile_count (modelist[x]); 01694 01695 /* Try to get one higher */ 01696 do { 01697 found_one_bigger = FALSE; 01698 for (z = 0; z < MODENUM; z++) { 01699 if (modelist[z] == best_x_height + 1) { 01700 ratio = 01701 (float) modelist[y] / (float) modelist[z]; 01702 if ((1.2 < ratio && ratio < 1.8) && 01703 /* Should be half of best */ 01704 heightstat->pile_count (modelist[z]) > 01705 num_in_best * 0.5) { 01706 best_x_height++; 01707 found_one_bigger = TRUE; 01708 break; 01709 } 01710 } 01711 } 01712 } 01713 while (found_one_bigger); 01714 01715 /* try to get a higher ascender */ 01716 01717 best_asc = modelist[y]; 01718 num_in_best = heightstat->pile_count (modelist[y]); 01719 01720 /* Try to get one higher */ 01721 do { 01722 found_one_bigger = FALSE; 01723 for (z = 0; z < MODENUM; z++) { 01724 if (modelist[z] > best_asc) { 01725 ratio = 01726 (float) modelist[z] / 01727 (float) best_x_height; 01728 if ((1.2 < ratio && ratio < 1.8) && 01729 /* Should be half of best */ 01730 heightstat->pile_count (modelist[z]) > 01731 num_in_best * 0.5) { 01732 best_asc = modelist[z]; 01733 found_one_bigger = TRUE; 01734 break; 01735 } 01736 } 01737 } 01738 } 01739 while (found_one_bigger); 01740 01741 row->xheight = (float) best_x_height; 01742 row->ascrise = (float) best_asc - best_x_height; 01743 return; 01744 } 01745 } 01746 } 01747 } 01748 } 01749 01750 best_x_height = modelist[0]; /* Single Mode found */ 01751 num_in_best = heightstat->pile_count (best_x_height); 01752 do { 01753 /* Try to get one higher */ 01754 found_one_bigger = FALSE; 01755 for (z = 1; z < MODENUM; z++) { 01756 /* Should be half of best */ 01757 if ((modelist[z] == best_x_height + 1) && 01758 (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { 01759 best_x_height++; 01760 found_one_bigger = TRUE; 01761 break; 01762 } 01763 } 01764 } 01765 while (found_one_bigger); 01766 01767 row->ascrise = 0.0f; 01768 row->xheight = (float) best_x_height; 01769 if (row->xheight == 0) 01770 row->xheight = -1.0f; 01771 }
int segment_spline | ( | BOX | blobcoords[], | |
int | blobcount, | |||
int | xcoords[], | |||
int | ycoords[], | |||
int | degree, | |||
int | pointcount, | |||
int | xstarts[] | |||
) |
Make xstarts.
Segment the row at midpoints between maxima and minima of the x,y pairs. The xstarts of the segments are returned and the number found.
Definition at line 1115 of file oldbasel.cpp.
References SPLINESIZE, tprintf(), and TURNLIMIT.
Referenced by find_textlines().
01122 { 01123 register int ptindex; /*no along text line */ 01124 register int segment; /*partition no */ 01125 int lastmin, lastmax; /*possible turn points */ 01126 int turnpoints[SPLINESIZE]; /*good turning points */ 01127 int turncount; /*no of turning points */ 01128 int max_x; //max specified coord 01129 01130 xstarts[0] = xcoords[0] - 1; //leftmost defined pt 01131 max_x = xcoords[pointcount - 1] + 1; 01132 if (degree < 2) 01133 pointcount = 0; 01134 turncount = 0; /*no turning points yet */ 01135 if (pointcount > 3) { 01136 ptindex = 1; 01137 lastmax = lastmin = 0; /*start with first one */ 01138 while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { 01139 /*minimum */ 01140 if (ycoords[ptindex - 1] > ycoords[ptindex] 01141 && ycoords[ptindex] <= ycoords[ptindex + 1]) { 01142 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { 01143 if (turncount == 0 || turnpoints[turncount - 1] != lastmax) 01144 /*new max point */ 01145 turnpoints[turncount++] = lastmax; 01146 lastmin = ptindex; /*latest minimum */ 01147 } 01148 else if (ycoords[ptindex] < ycoords[lastmin]) { 01149 lastmin = ptindex; /*lower minimum */ 01150 } 01151 } 01152 01153 /*maximum */ 01154 if (ycoords[ptindex - 1] < ycoords[ptindex] 01155 && ycoords[ptindex] >= ycoords[ptindex + 1]) { 01156 if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { 01157 if (turncount == 0 || turnpoints[turncount - 1] != lastmin) 01158 /*new min point */ 01159 turnpoints[turncount++] = lastmin; 01160 lastmax = ptindex; /*latest maximum */ 01161 } 01162 else if (ycoords[ptindex] > ycoords[lastmax]) { 01163 lastmax = ptindex; /*higher maximum */ 01164 } 01165 } 01166 ptindex++; 01167 } 01168 /*possible global min */ 01169 if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT 01170 && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { 01171 if (turncount < SPLINESIZE - 1) 01172 /*2 more turns */ 01173 turnpoints[turncount++] = lastmax; 01174 if (turncount < SPLINESIZE - 1) 01175 turnpoints[turncount++] = ptindex; 01176 } 01177 else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT 01178 /*possible global max */ 01179 && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { 01180 if (turncount < SPLINESIZE - 1) 01181 /*2 more turns */ 01182 turnpoints[turncount++] = lastmin; 01183 if (turncount < SPLINESIZE - 1) 01184 turnpoints[turncount++] = ptindex; 01185 } 01186 else if (turncount > 0 && turnpoints[turncount - 1] == lastmin 01187 && turncount < SPLINESIZE - 1) { 01188 if (ycoords[ptindex] > ycoords[lastmax]) 01189 turnpoints[turncount++] = ptindex; 01190 else 01191 turnpoints[turncount++] = lastmax; 01192 } 01193 else if (turncount > 0 && turnpoints[turncount - 1] == lastmax 01194 && turncount < SPLINESIZE - 1) { 01195 if (ycoords[ptindex] < ycoords[lastmin]) 01196 turnpoints[turncount++] = ptindex; 01197 else 01198 turnpoints[turncount++] = lastmin; 01199 } 01200 } 01201 01202 if (textord_oldbl_debug && turncount > 0) 01203 tprintf ("First turn is %d at (%d,%d)\n", 01204 turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); 01205 for (segment = 1; segment < turncount; segment++) { 01206 /*centre y coord */ 01207 lastmax = (ycoords[turnpoints[segment - 1]] 01208 + ycoords[turnpoints[segment]]) / 2; 01209 01210 /* fix alg so that it works with both rising and falling sections */ 01211 if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) 01212 /*find rising y centre */ 01213 for (ptindex = turnpoints[segment - 1] + 1; 01214 ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; 01215 ptindex++); 01216 else 01217 /*find falling y centre */ 01218 for (ptindex = turnpoints[segment - 1] + 1; 01219 ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; 01220 ptindex++); 01221 /*centre x */ 01222 xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] 01223 + xcoords[turnpoints[segment - 1]] 01224 + xcoords[turnpoints[segment]] + 2) / 4; 01225 /*halfway between turns */ 01226 if (textord_oldbl_debug) 01227 tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", 01228 segment, turnpoints[segment], 01229 xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], 01230 ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); 01231 } 01232 01233 xstarts[segment] = max_x; 01234 return segment; /*no of splines */ 01235 }
BOOL8 split_stepped_spline | ( | QSPLINE * | baseline, | |
float | jumplimit, | |||
int | xcoords[], | |||
int | xstarts[], | |||
int & | segments | |||
) |
Make xstarts.
Definition at line 1246 of file oldbasel.cpp.
References baseline, FALSE, insert_spline_point(), SPLINESIZE, tprintf(), and TRUE.
Referenced by find_textlines().
01252 { 01253 BOOL8 doneany; //return value 01254 register int segment; /*partition no */ 01255 int startindex, centreindex, endindex; 01256 float leftcoord, rightcoord; 01257 int leftindex, rightindex; 01258 float step; //spline step 01259 01260 doneany = FALSE; 01261 startindex = 0; 01262 for (segment = 1; segment < segments - 1; segment++) { 01263 step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, 01264 (xstarts[segment] + xstarts[segment + 1]) / 2.0); 01265 if (step < 0) 01266 step = -step; 01267 if (step > jumplimit) { 01268 while (xcoords[startindex] < xstarts[segment - 1]) 01269 startindex++; 01270 centreindex = startindex; 01271 while (xcoords[centreindex] < xstarts[segment]) 01272 centreindex++; 01273 endindex = centreindex; 01274 while (xcoords[endindex] < xstarts[segment + 1]) 01275 endindex++; 01276 if (segments >= SPLINESIZE) { 01277 if (textord_debug_baselines) 01278 tprintf ("Too many segments to resegment spline!!\n"); 01279 } 01280 else if (endindex - startindex >= textord_spline_medianwin * 3) { 01281 while (centreindex - startindex < 01282 textord_spline_medianwin * 3 / 2) 01283 centreindex++; 01284 while (endindex - centreindex < 01285 textord_spline_medianwin * 3 / 2) 01286 centreindex--; 01287 leftindex = (startindex + startindex + centreindex) / 3; 01288 rightindex = (centreindex + endindex + endindex) / 3; 01289 leftcoord = 01290 (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; 01291 rightcoord = 01292 (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; 01293 while (xcoords[leftindex] > leftcoord 01294 && leftindex - startindex > textord_spline_medianwin) 01295 leftindex--; 01296 while (xcoords[leftindex] < leftcoord 01297 && centreindex - leftindex > 01298 textord_spline_medianwin / 2) 01299 leftindex++; 01300 if (xcoords[leftindex] - leftcoord > 01301 leftcoord - xcoords[leftindex - 1]) 01302 leftindex--; 01303 while (xcoords[rightindex] > rightcoord 01304 && rightindex - centreindex > 01305 textord_spline_medianwin / 2) 01306 rightindex--; 01307 while (xcoords[rightindex] < rightcoord 01308 && endindex - rightindex > textord_spline_medianwin) 01309 rightindex++; 01310 if (xcoords[rightindex] - rightcoord > 01311 rightcoord - xcoords[rightindex - 1]) 01312 rightindex--; 01313 if (textord_debug_baselines) 01314 tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", 01315 xstarts[segment], 01316 baseline-> 01317 step ((xstarts[segment - 1] + 01318 xstarts[segment]) / 2.0, 01319 (xstarts[segment] + 01320 xstarts[segment + 1]) / 2.0), 01321 (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, 01322 (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); 01323 insert_spline_point (xstarts, segment, 01324 (xcoords[leftindex - 1] + 01325 xcoords[leftindex]) / 2, 01326 (xcoords[rightindex - 1] + 01327 xcoords[rightindex]) / 2, segments); 01328 doneany = TRUE; 01329 } 01330 else if (textord_debug_baselines) { 01331 tprintf 01332 ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", 01333 startindex, centreindex, endindex, 01334 (INT32) textord_spline_medianwin); 01335 } 01336 } 01337 // else tprintf("Spline step at %d is %g\n", 01338 // xstarts[segment], 01339 // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0, 01340 // (xstarts[segment]+xstarts[segment+1])/2.0)); 01341 } 01342 return doneany; 01343 }
const int kMinModeFactor = 12 |
Definition at line 1622 of file oldbasel.cpp.