00001
00020 #include "mfcpch.h"
00021
00022 #include "scanedg.h"
00023 #include "drawedg.h"
00024 #include "edgloop.h"
00025 #include "edgblob.h"
00026
00027 #ifdef TEXT_VERBOSE
00028 #include "../cutil/callcpp.h"
00029 #endif
00030
00031 #define EXTERN
00032
00035 EXTERN INT_VAR (edges_children_per_grandchild, 10,
00036 "Importance ratio for chucking outlines");
00037 EXTERN INT_VAR (edges_children_count_limit, 45, "Max holes allowed in blob");
00038 EXTERN BOOL_VAR (edges_children_fix, FALSE,
00039 "Remove boxy parents of char-like children");
00040 EXTERN INT_VAR (edges_min_nonhole, 12,
00041 "Min pixels for potential char in box");
00042 EXTERN INT_VAR (edges_patharea_ratio, 40,
00043 "Max lensq/area for acceptable child outline");
00044 EXTERN double_VAR (edges_childarea, 0.5,
00045 "Max area fraction of child outline");
00046 EXTERN double_VAR (edges_boxarea, 0.8,
00047 "Min area fraction of grandchild for box");
00053 OL_BUCKETS::OL_BUCKETS (
00054 ICOORD bleft,
00055 ICOORD tright): bl (bleft), tr (tright) {
00056
00057 bxdim = (tright.x () - bleft.x ()) / BUCKETSIZE + 1;
00058 bydim = (tright.y () - bleft.y ()) / BUCKETSIZE + 1;
00059
00060 buckets = new C_OUTLINE_LIST[bxdim * bydim];
00061 index = 0;
00062 }
00063
00064
00073 C_OUTLINE_LIST *
00074 OL_BUCKETS::operator () (
00075 INT16 x,
00076 INT16 y) {
00077 return &buckets[(y - bl.y ()) / BUCKETSIZE * bxdim +
00078 (x - bl.x ()) / BUCKETSIZE];
00079 }
00080
00081
00104 INT32 OL_BUCKETS::count_children(
00105 C_OUTLINE *outline,
00106 INT32 max_count
00107 ) {
00108 BOOL8 parent_box;
00109 INT16 xmin, xmax;
00110 INT16 ymin, ymax;
00111 INT16 xindex, yindex;
00112 C_OUTLINE *child;
00113 INT32 child_count;
00114 INT32 grandchild_count;
00115 INT32 parent_area;
00116 FLOAT32 max_parent_area;
00117 INT32 child_area;
00118 INT32 child_length;
00119 BOX olbox;
00120 C_OUTLINE_IT child_it;
00121
00122 olbox = outline->bounding_box ();
00123 xmin = (olbox.left () - bl.x ()) / BUCKETSIZE;
00124 xmax = (olbox.right () - bl.x ()) / BUCKETSIZE;
00125 ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE;
00126 ymax = (olbox.top () - bl.y ()) / BUCKETSIZE;
00127
00128 child_count = 0;
00129 grandchild_count = 0;
00130 parent_area = 0;
00131 max_parent_area = 0;
00132 parent_box = TRUE;
00133 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00134 cprintf("\n1(bb=%d,%d,%d,%d)",xmin,ymin,xmax,ymax);
00135 #endif
00136
00137 for (yindex = ymin; yindex <= ymax; yindex++) {
00138 for (xindex = xmin; xindex <= xmax; xindex++) {
00139 child_it.set_to_list (&buckets[yindex * bxdim + xindex]);
00140 if (child_it.empty ())
00141 continue;
00142 for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
00143 child_it.forward ()) {
00144 child = child_it.data ();
00145 if (child != outline && *child < *outline) {
00146 child_count++;
00147 if (child_count <= max_count)
00148 grandchild_count += count_children (child,
00149 (max_count -
00150 child_count) /
00151 edges_children_per_grandchild)
00152 * edges_children_per_grandchild;
00153 if (child_count + grandchild_count > max_count) {
00154
00155
00156
00157
00158 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00159 cprintf("2(%d,%d)",child_count,grandchild_count);
00160 #endif
00161 return child_count + grandchild_count;
00162 }
00163 if (parent_area == 0) {
00164 parent_area = outline->outer_area ();
00165 if (parent_area < 0)
00166 parent_area = -parent_area;
00167 max_parent_area = outline->bounding_box ().width ()
00168 * outline->bounding_box ().height () * edges_boxarea;
00169 if (parent_area < max_parent_area)
00170 parent_box = FALSE;
00171 }
00172 if (parent_box
00173 && (!edges_children_fix
00174 || child->bounding_box ().height () >
00175 edges_min_nonhole) ) {
00176 child_area = child->outer_area ();
00177 if (child_area < 0)
00178 child_area = -child_area;
00179 if (edges_children_fix) {
00180 if (parent_area - child_area < max_parent_area) {
00181 parent_box = FALSE;
00182 continue;
00183 }
00184 if (grandchild_count > 0) {
00185
00186
00187
00188
00189
00190
00191 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00192 cprintf("5(%d)",max_count+1);
00193 #endif
00194 return max_count + 1;
00195 }
00196 child_length = child->pathlength ();
00197 if (child_length * child_length >
00198 child_area * edges_patharea_ratio) {
00199
00200
00201
00202
00203
00204
00205 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00206 cprintf("6(%d)",max_count+1);
00207 #endif
00208 return max_count + 1;
00209 }
00210 }
00211 if (child_area < child->bounding_box ().width ()
00212 * child->bounding_box ().height () *
00213 edges_childarea) {
00214
00215
00216
00217
00218
00219
00220
00221 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00222 cprintf("7(%d)",max_count+1);
00223 #endif
00224 return max_count + 1;
00225 }
00226 }
00227 }
00228 }
00229 }
00230 }
00231 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSG)
00232 cprintf("8[%d,%d=%d|%d,%d,%d]",xindex,yindex,child_count,parent_area,grandchild_count,child_length);
00233 #endif
00234 return child_count + grandchild_count;
00235 }
00236
00237
00250 void OL_BUCKETS::extract_children(
00251 C_OUTLINE *outline,
00252 C_OUTLINE_IT *it
00253 ) {
00254 INT16 xmin, xmax;
00255 INT16 ymin, ymax;
00256 INT16 xindex, yindex;
00257 BOX olbox;
00258 C_OUTLINE_IT child_it;
00259
00260 olbox = outline->bounding_box ();
00261 xmin = (olbox.left () - bl.x ()) / BUCKETSIZE;
00262 xmax = (olbox.right () - bl.x ()) / BUCKETSIZE;
00263 ymin = (olbox.bottom () - bl.y ()) / BUCKETSIZE;
00264 ymax = (olbox.top () - bl.y ()) / BUCKETSIZE;
00265 for (yindex = ymin; yindex <= ymax; yindex++) {
00266 for (xindex = xmin; xindex <= xmax; xindex++) {
00267 child_it.set_to_list (&buckets[yindex * bxdim + xindex]);
00268 for (child_it.mark_cycle_pt (); !child_it.cycled_list ();
00269 child_it.forward ()) {
00270 if (*child_it.data () < *outline) {
00271 it->add_after_then_move (child_it.extract ());
00272 }
00273 }
00274 }
00275 }
00276 }
00277
00278
00289 void extract_edges(
00290 #ifndef GRAPHICS_DISABLED
00291 WINDOW window,
00292 #endif
00293 IMAGE *image,
00294 IMAGE *t_image,
00295 ICOORD page_tr,
00296 BLOCK *block
00297 ) {
00298 ICOORD bleft;
00299 ICOORD tright;
00300 C_OUTLINE_LIST outlines;
00301
00302 C_OUTLINE_IT out_it = &outlines;
00303
00304 #ifndef GRAPHICS_DISABLED
00305 get_outlines (window, image, t_image, page_tr, (PDBLK *) block, &out_it);
00306 #else
00307 get_outlines (image, t_image, page_tr, (PDBLK *) block, &out_it);
00308 #endif
00309
00310 block->bounding_box (bleft, tright);
00311
00312 outlines_to_blobs(block, bleft, tright, &outlines);
00313
00314 }
00315
00316
00328 void outlines_to_blobs(
00329 BLOCK *block,
00330 ICOORD bleft,
00331 ICOORD tright,
00332 C_OUTLINE_LIST *outlines) {
00333
00334 OL_BUCKETS buckets(bleft, tright);
00335
00336 fill_buckets(outlines, &buckets);
00337 empty_buckets(block, &buckets);
00338 }
00339
00340
00349 void fill_buckets(
00350 C_OUTLINE_LIST *outlines,
00351 OL_BUCKETS *buckets
00352 ) {
00353 BOX ol_box;
00354 C_OUTLINE_IT out_it = outlines;
00355 C_OUTLINE_IT bucket_it;
00356 C_OUTLINE *outline;
00357
00358 int t=0;
00359 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00360 outline = out_it.extract ();
00361
00362 ol_box = outline->bounding_box ();
00363 bucket_it.set_to_list ((*buckets) (ol_box.left (), ol_box.bottom ()));
00364 bucket_it.add_to_end (outline);
00365 }
00366 }
00367
00368
00377 void empty_buckets(
00378 BLOCK *block,
00379 OL_BUCKETS *buckets
00380 ) {
00381 BOOL8 good_blob;
00382 C_OUTLINE_LIST outlines;
00383
00384 C_OUTLINE_IT out_it = &outlines;
00385 C_OUTLINE_IT bucket_it = buckets->start_scan ();
00386 C_OUTLINE_IT parent_it;
00387 C_BLOB *blob;
00388 C_BLOB_IT good_blobs = block->blob_list ();
00389 C_BLOB_IT junk_blobs = block->reject_blobs ();
00390
00391 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00392 cprintf("1");
00393 #endif
00394 while (!bucket_it.empty ()) {
00395 out_it.set_to_list (&outlines);
00396 do {
00397 parent_it = bucket_it;
00398 do
00399 bucket_it.forward ();
00400 while (!bucket_it.at_first ()
00401 && !(*parent_it.data () < *bucket_it.data ()));
00402 }
00403 while (!bucket_it.at_first ());
00404
00405
00406 out_it.add_after_then_move (parent_it.extract ());
00407 good_blob = capture_children (buckets, &junk_blobs, &out_it);
00408 blob = new C_BLOB (&outlines);
00409 if (good_blob) {
00410 good_blobs.add_after_then_move (blob);
00411 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00412 cprintf("2");
00413 #endif
00414 }
00415 else {
00416 junk_blobs.add_after_then_move (blob);
00417 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00418 cprintf("3");
00419 #endif
00420 }
00421
00422 bucket_it.set_to_list (buckets->scan_next ());
00423 }
00424 }
00425
00426
00444 BOOL8 capture_children(
00445 OL_BUCKETS *buckets,
00446 C_BLOB_IT *reject_it,
00447 C_OUTLINE_IT *blob_it
00448 ) {
00449 BOOL8 anydone;
00450 C_OUTLINE *outline;
00451 C_OUTLINE *child;
00452 C_OUTLINE_IT test_it;
00453 INT32 child_count;
00454 C_BLOB *blob;
00455 C_OUTLINE_LIST r_list;
00456 C_OUTLINE_IT r_it;
00457
00458 outline = blob_it->data ();
00459 child_count = buckets->count_children (outline, edges_children_count_limit);
00460 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00461 cprintf("L");
00462 #endif
00463 if (child_count > edges_children_count_limit)
00464 return FALSE;
00465 if (child_count == 0)
00466 return TRUE;
00467
00468 buckets->extract_children (outline, blob_it);
00469 if (child_count == 1) {
00470 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00471 cprintf("K");
00472 #endif
00473 return TRUE;
00474 }
00475 do {
00476 anydone = FALSE;
00477 blob_it->move_to_first ();
00478 for (blob_it->mark_cycle_pt (); !blob_it->cycled_list ();
00479 blob_it->forward ()) {
00480 child = blob_it->data ();
00481 if (child != outline) {
00482 for (test_it = *blob_it, test_it.mark_cycle_pt ();
00483 !test_it.cycled_list (); test_it.forward ()) {
00484 if (test_it.data () != child && *test_it.data () < *child) {
00485 r_it.set_to_list (&r_list);
00486 r_it.add_after_then_move (test_it.extract ());
00487
00488 blob = new C_BLOB (&r_list);
00489 reject_it->add_after_then_move (blob);
00490 anydone = TRUE;
00491 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00492 cprintf("N");
00493 #endif
00494 }
00495 }
00496 if (anydone) {
00497 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00498 cprintf("M");
00499 #endif
00500 break;
00501 }
00502 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00503 cprintf("L");
00504 #endif
00505 }
00506 }
00507 }
00508 while (anydone);
00509 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00510 cprintf("O\n");
00511 #endif
00512 return TRUE;
00513 }