00001
00020
00021
00022
00023
00024
00025
00026 #include "mfcpch.h"
00027 #include <ctype.h>
00028 #include <math.h>
00029 #include "errcode.h"
00030 #include "grphics.h"
00031 #include "drawtord.h"
00032 #include "blkocc.h"
00033 #include "notdll.h"
00034
00035 const ERRCODE BLOCKOCC = "BlockOcc";
00036
00037 ELISTIZE (REGION_OCC)
00038 #define EXTERN
00039
00042 EXTERN BOOL_VAR (blockocc_show_result, FALSE,
00043 "Show intermediate results");
00044
00045
00046
00047 EXTERN INT_VAR (blockocc_desc_height, 0,
00048 "Descender height after normalisation");
00049 EXTERN INT_VAR (blockocc_asc_height, 255,
00050 "Ascender height after normalisation");
00051 EXTERN INT_VAR (blockocc_band_count, 4, "Number of bands used");
00052 EXTERN double_VAR (textord_underline_threshold, 0.5,
00053 "Fraction of width occupied");
00098 #define REGION_TYPE_EMPTY 0
00099 #define REGION_TYPE_OPEN_RIGHT 1
00100 #define REGION_TYPE_OPEN_LEFT 2
00101 #define REGION_TYPE_UPPER_BOUND 3
00102 #define REGION_TYPE_UPPER_UNBOUND 4
00103 #define REGION_TYPE_LOWER_BOUND 5
00104 #define REGION_TYPE_LOWER_UNBOUND 6
00105 #define REGION_TYPE_ENCLOSED 7
00106
00107 BAND bands[MAX_NUM_BANDS + 1];
00108
00121 BOOL8 test_underline(
00122 BOOL8 testing_on,
00123 PBLOB *blob,
00124 float baseline,
00125 float xheight
00126 ) {
00127 INT16 occ;
00128 INT16 blob_width;
00129 BOX blob_box;
00130 float occs[MAX_NUM_BANDS + 1];
00131
00132 blob_box = blob->bounding_box ();
00133 set_bands(baseline, xheight);
00134 blob_width = blob->bounding_box ().width ();
00135 if (testing_on) {
00136
00137
00138
00139
00140
00141
00142 tprintf
00143 ("Testing underline on blob at (%d,%d)->(%d,%d), base=%g\nOccs:",
00144 blob->bounding_box ().left (), blob->bounding_box ().bottom (),
00145 blob->bounding_box ().right (), blob->bounding_box ().top (),
00146 baseline);
00147 }
00148 block_occ(blob, occs);
00149 if (testing_on) {
00150 for (occ = 0; occ <= MAX_NUM_BANDS; occ++)
00151 tprintf ("%g ", occs[occ]);
00152 tprintf ("\n");
00153 }
00154 if (occs[1] > occs[2] + occs[2] && occs[1] > occs[3] + occs[3]
00155 && occs[1] > blob_width * textord_underline_threshold)
00156 return TRUE;
00157 if (occs[4] > occs[2] + occs[2]
00158 && occs[4] > blob_width * textord_underline_threshold)
00159 return TRUE;
00160 return FALSE;
00161 }
00162
00173 BOOL8 test_underline(
00174 BOOL8 testing_on,
00175 C_BLOB *blob,
00176 INT16 baseline,
00177 INT16 xheight
00178 ) {
00179 INT16 occ;
00180 INT16 blob_width;
00181 BOX blob_box;
00182 INT32 desc_occ;
00183 INT32 x_occ;
00184 INT32 asc_occ;
00185 STATS projection;
00186
00187 blob_box = blob->bounding_box ();
00188 blob_width = blob->bounding_box ().width ();
00189 projection.set_range (blob_box.bottom (), blob_box.top () + 1);
00190 if (testing_on) {
00191
00192
00193
00194
00195
00196
00197 tprintf
00198 ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:",
00199 blob->bounding_box ().left (), blob->bounding_box ().bottom (),
00200 blob->bounding_box ().right (), blob->bounding_box ().top (),
00201 baseline);
00202 }
00203 horizontal_cblob_projection(blob, &projection);
00204 desc_occ = 0;
00205 for (occ = blob_box.bottom (); occ < baseline; occ++)
00206 if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ)
00207
00208 desc_occ = projection.pile_count (occ);
00209 x_occ = 0;
00210 for (occ = baseline; occ <= baseline + xheight; occ++)
00211 if (occ >= blob_box.bottom () && occ <= blob_box.top ()
00212 && projection.pile_count (occ) > x_occ)
00213
00214 x_occ = projection.pile_count (occ);
00215 asc_occ = 0;
00216 for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++)
00217 if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ)
00218 asc_occ = projection.pile_count (occ);
00219 if (testing_on) {
00220 tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ);
00221 }
00222 if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) {
00223 tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n",
00224 blob_box.bottom (), blob_box.top (), baseline, xheight);
00225 projection.print (stdout, TRUE);
00226 }
00227 if (desc_occ > x_occ + x_occ
00228 && desc_occ > blob_width * textord_underline_threshold)
00229 return TRUE;
00230 if (asc_occ > x_occ + x_occ
00231 && asc_occ > blob_width * textord_underline_threshold)
00232 return TRUE;
00233 return FALSE;
00234 }
00235
00236
00241 void horizontal_cblob_projection(
00242 C_BLOB *blob,
00243 STATS *stats
00244 ) {
00245
00246 C_OUTLINE_IT out_it = blob->out_list ();
00247
00248 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00249 horizontal_coutline_projection (out_it.data (), stats);
00250 }
00251 }
00252
00253
00264 void horizontal_coutline_projection(
00265 C_OUTLINE *outline,
00266 STATS *stats
00267 ) {
00268 ICOORD pos;
00269 ICOORD step;
00270 INT32 length;
00271 INT16 stepindex;
00272 C_OUTLINE_IT out_it = outline->child ();
00273
00274 pos = outline->start_pos ();
00275 length = outline->pathlength ();
00276 for (stepindex = 0; stepindex < length; stepindex++) {
00277 step = outline->step (stepindex);
00278 if (step.y () > 0) {
00279 stats->add (pos.y (), pos.x ());
00280 }
00281 else if (step.y () < 0) {
00282 stats->add (pos.y () - 1, -pos.x ());
00283 }
00284 pos += step;
00285 }
00286
00287 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00288 horizontal_coutline_projection (out_it.data (), stats);
00289 }
00290 }
00291
00295 void set_bands(
00296 float baseline,
00297 float xheight
00298 ) {
00299 INT16 int_bl, int_xh;
00300
00301 bands[DOT_BAND].set (0, 0, 0, 0, 0, 0);
00302
00303 int_bl = (INT16) baseline;
00304 int_xh = (INT16) xheight;
00305 bands[1].set (int_bl, int_bl, int_bl,
00306 NO_LOWER_LIMIT, NO_LOWER_LIMIT, NO_LOWER_LIMIT);
00307
00308 bands[2].set (int_bl + int_xh / 2, int_bl + int_xh / 2, int_bl + int_xh / 2,
00309 int_bl, int_bl, int_bl);
00310
00311 bands[3].set (int_bl + int_xh, int_bl + int_xh, int_bl + int_xh,
00312 int_bl + int_xh / 2, int_bl + int_xh / 2,
00313 int_bl + int_xh / 2);
00314
00315 bands[4].set (NO_UPPER_LIMIT, NO_UPPER_LIMIT, NO_UPPER_LIMIT,
00316 int_bl + int_xh, int_bl + int_xh, int_bl + int_xh);
00317 }
00318
00326 void
00327 block_occ (PBLOB * blob,
00328 float occs[]
00329 ) {
00330 int band_index;
00331 REGION_OCC *region;
00332 REGION_OCC_LIST region_occ_list[MAX_NUM_BANDS + 1];
00333 REGION_OCC_IT region_it;
00334
00335 find_transitions(blob, region_occ_list);
00336 compress_region_list(region_occ_list);
00337 for (band_index = 0; band_index <= MAX_NUM_BANDS; band_index++) {
00338 occs[band_index] = 0.0f;
00339 region_it.set_to_list (®ion_occ_list[band_index]);
00340 for (region_it.mark_cycle_pt (); !region_it.cycled_list ();
00341 region_it.forward ()) {
00342 region = region_it.data ();
00343 occs[band_index] += region->max_x - region->min_x;
00344 }
00345 }
00346 }
00347
00348
00371 void find_transitions(PBLOB *blob,
00372 REGION_OCC_LIST *region_occ_list) {
00373 OUTLINE_IT outline_it;
00374 BOX box;
00375 POLYPT_IT pt_it;
00376 FCOORD point1;
00377 FCOORD point2;
00378 FCOORD *entry_pt = &point1;
00379 FCOORD *exit_pt = &point2;
00380 FCOORD *temp_pt;
00381 INT16 increment;
00382 INT16 prev_band;
00383 INT16 band;
00384 INT16 next_band;
00385 float min_x;
00386 float max_x;
00387 float min_y;
00388 float max_y;
00389 BOOL8 doubly_contained;
00390
00391 outline_it = blob->out_list ();
00392 for (outline_it.mark_cycle_pt (); !outline_it.cycled_list ();
00393 outline_it.forward ()) {
00394 find_fbox(&outline_it, &min_x, &min_y, &max_x, &max_y);
00395
00396 if (bands[DOT_BAND].range_in_nominal (max_y, min_y)) {
00397 record_region(DOT_BAND,
00398 min_x,
00399 max_x,
00400 REGION_TYPE_ENCLOSED,
00401 region_occ_list);
00402 }
00403 else {
00404 band = find_containing_maximal_band (max_y, min_y,
00405 &doubly_contained);
00406 if (band != UNDEFINED_BAND) {
00407
00408 if (!doubly_contained)
00409 record_region(band,
00410 min_x,
00411 max_x,
00412 REGION_TYPE_ENCLOSED,
00413 region_occ_list);
00414 else {
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424 }
00425 }
00426 else {
00427
00428
00429 pt_it = outline_it.data ()->polypts ();
00430
00431 find_significant_line(pt_it, &band);
00432 *entry_pt = pt_it.data ()->pos;
00433 next_region(&pt_it,
00434 band,
00435 &next_band,
00436 &min_x,
00437 &max_x,
00438 &increment,
00439 exit_pt);
00440 pt_it.mark_cycle_pt ();
00441
00442
00443
00444 do {
00445 prev_band = band;
00446 band = band + increment;
00447
00448 while (band != next_band) {
00449 temp_pt = entry_pt;
00450 entry_pt = exit_pt;
00451 exit_pt = temp_pt;
00452 min_x = max_x = entry_pt->x ();
00453
00454 find_trans_point (&pt_it, band, band + increment,
00455 exit_pt);
00456 maintain_limits (&min_x, &max_x, exit_pt->x ());
00457
00458 record_region (band,
00459 min_x,
00460 max_x,
00461 find_region_type (prev_band,
00462 band,
00463 band + increment,
00464 entry_pt->x (),
00465 exit_pt->x ()),
00466 region_occ_list);
00467 prev_band = band;
00468 band = band + increment;
00469 }
00470
00471 temp_pt = entry_pt;
00472 entry_pt = exit_pt;
00473 exit_pt = temp_pt;
00474 min_x = max_x = entry_pt->x ();
00475 next_region(&pt_it,
00476 band,
00477 &next_band,
00478 &min_x,
00479 &max_x,
00480 &increment,
00481 exit_pt);
00482
00483 record_region (band,
00484 min_x,
00485 max_x,
00486 find_region_type (prev_band,
00487 band,
00488 band + increment,
00489 entry_pt->x (),
00490 exit_pt->x ()),
00491 region_occ_list);
00492 }
00493 while (!pt_it.cycled_list ());
00494 }
00495 }
00496 }
00497 }
00498
00504 void record_region(
00505 INT16 band,
00506 float new_min,
00507 float new_max,
00508 INT16 region_type,
00509 REGION_OCC_LIST *region_occ_list) {
00510 REGION_OCC_IT it (&(region_occ_list[band]));
00511
00512
00513
00514
00515
00516 if ((region_type == REGION_TYPE_UPPER_UNBOUND) ||
00517 (region_type == REGION_TYPE_LOWER_UNBOUND) ||
00518 (region_type == REGION_TYPE_EMPTY))
00519 return;
00520
00521 if (it.empty ()) {
00522 it.add_after_stay_put (new REGION_OCC (new_min, new_max, region_type));
00523 }
00524 else {
00525
00526
00527
00528 while ((new_min + new_max > it.data ()->min_x + it.data ()->max_x) &&
00529 (!it.at_last ()))
00530 it.forward ();
00531
00532 if ((it.at_last ()) &&
00533 (new_min + new_max > it.data ()->min_x + it.data ()->max_x))
00534
00535 it.add_after_stay_put (new REGION_OCC (new_min,
00536 new_max, region_type));
00537 else {
00538 it.add_before_stay_put (new REGION_OCC (new_min,
00539 new_max, region_type));
00540 }
00541 }
00542 }
00543
00544
00548 INT16 find_containing_maximal_band(
00549 float y1,
00550 float y2,
00551 BOOL8 *doubly_contained) {
00552 INT16 band;
00553
00554 *doubly_contained = FALSE;
00555
00556 for (band = 1; band <= blockocc_band_count; band++) {
00557 if (bands[band].range_in_maximal (y1, y2)) {
00558 if ((band < blockocc_band_count) &&
00559 (bands[band + 1].range_in_maximal (y1, y2)))
00560 *doubly_contained = TRUE;
00561 return band;
00562 }
00563 }
00564 return UNDEFINED_BAND;
00565 }
00566
00567
00572 void find_significant_line(POLYPT_IT it, INT16 *band) {
00573 *band = find_overlapping_minimal_band (it.data ()->pos.y (),
00574 it.data ()->pos.y () +
00575 it.data ()->vec.y ());
00576
00577 while (*band == UNDEFINED_BAND) {
00578 it.forward ();
00579 *band = find_overlapping_minimal_band (it.data ()->pos.y (),
00580 it.data ()->pos.y () +
00581 it.data ()->vec.y ());
00582 }
00583 }
00584
00585
00589 INT16 find_overlapping_minimal_band(
00590 float y1,
00591 float y2) {
00592 INT16 band;
00593
00594 for (band = 1; band <= blockocc_band_count; band++) {
00595 if (bands[band].range_overlaps_minimal (y1, y2))
00596 return band;
00597 }
00598 return UNDEFINED_BAND;
00599 }
00600
00601
00605 INT16 find_region_type(INT16 entry_band,
00606 INT16 current_band,
00607 INT16 exit_band,
00608 float entry_x,
00609 float exit_x) {
00610 if (entry_band > exit_band)
00611 return REGION_TYPE_OPEN_RIGHT;
00612 if (entry_band < exit_band)
00613 return REGION_TYPE_OPEN_LEFT;
00614 if (entry_x == exit_x)
00615 return REGION_TYPE_EMPTY;
00616 if (entry_band > current_band) {
00617 if (entry_x < exit_x)
00618 return REGION_TYPE_UPPER_BOUND;
00619 else
00620 return REGION_TYPE_UPPER_UNBOUND;
00621 }
00622 else {
00623 if (entry_x > exit_x)
00624 return REGION_TYPE_LOWER_BOUND;
00625 else
00626 return REGION_TYPE_LOWER_UNBOUND;
00627 }
00628 }
00629
00630
00634 void find_trans_point(POLYPT_IT *pt_it,
00635 INT16 current_band,
00636 INT16 next_band,
00637 FCOORD *transition_pt) {
00638 float x1, x2, y1, y2;
00639 float gradient;
00640 float offset;
00641
00642 if (current_band < next_band)
00643 transition_pt->set_y (bands[current_band].max);
00644
00645 else
00646 transition_pt->set_y (bands[current_band].min);
00647
00648
00649 x1 = pt_it->data ()->pos.x ();
00650 x2 = x1 + pt_it->data ()->vec.x ();
00651 y1 = pt_it->data ()->pos.y ();
00652 y2 = y1 + pt_it->data ()->vec.y ();
00653
00654 if (x1 == x2)
00655 transition_pt->set_x (x1);
00656 else {
00657 if (y1 == y2)
00658 transition_pt->set_x ((x1 + x2) / 2.0);
00659 else {
00660 gradient = (y1 - y2) / (float) (x1 - x2);
00661 offset = y1 - x1 * gradient;
00662 transition_pt->set_x ((transition_pt->y () - offset) / gradient);
00663 }
00664 }
00665 }
00666
00681 void next_region(POLYPT_IT *start_pt_it,
00682 INT16 start_band,
00683 INT16 *to_band,
00684 float *min_x,
00685 float *max_x,
00686 INT16 *increment,
00687 FCOORD *exit_pt) {
00688
00689 INT16 band;
00690 INT16 prev_band = start_band;
00691 POLYPT_IT last_transition_out_it;
00692 INT16 last_trans_out_to_band = 0;
00693 float ext_min_x = 0.0f;
00694 float ext_max_x = 0.0f;
00695
00696 start_pt_it->forward ();
00697 band = find_band (start_pt_it->data ()->pos.y ());
00698
00699 while ((band == start_band) ||
00700 bands[start_band].in_maximal (start_pt_it->data ()->pos.y ())) {
00701 if (band == start_band) {
00702
00703 if (prev_band != start_band) {
00704 *min_x = ext_min_x;
00705 *max_x = ext_max_x;
00706 }
00707 maintain_limits (min_x, max_x, start_pt_it->data ()->pos.x ());
00708 }
00709 else {
00710 if (prev_band == start_band) {
00711
00712
00713 last_transition_out_it = *start_pt_it;
00714
00715 last_transition_out_it.backward ();
00716
00717 last_trans_out_to_band = band;
00718 ext_min_x = *min_x;
00719 ext_max_x = *max_x;
00720 }
00721 maintain_limits (&ext_min_x, &ext_max_x,
00722 start_pt_it->data ()->pos.x ());
00723 }
00724 prev_band = band;
00725 start_pt_it->forward ();
00726 band = find_band (start_pt_it->data ()->pos.y ());
00727 }
00728
00729 if (prev_band == start_band) {
00730 *to_band = band;
00731
00732 last_transition_out_it = *start_pt_it;
00733
00734 last_transition_out_it.backward ();
00735 }
00736 else {
00737 *to_band = last_trans_out_to_band;
00738 }
00739
00740 if (*to_band > start_band)
00741 *increment = 1;
00742 else
00743 *increment = -1;
00744
00745 find_trans_point (&last_transition_out_it, start_band,
00746 start_band + *increment, exit_pt);
00747 maintain_limits (min_x, max_x, exit_pt->x ());
00748 *start_pt_it = last_transition_out_it;
00749 }
00750
00751
00755 INT16 find_band(
00756 float y) {
00757 INT16 band;
00758
00759 for (band = 1; band <= blockocc_band_count; band++) {
00760 if (bands[band].in_nominal (y))
00761 return band;
00762 }
00763 BLOCKOCC.error ("find_band", ABORT, "Cant find band for %d", y);
00764 return 0;
00765 }
00766
00767
00771 void compress_region_list(
00772 REGION_OCC_LIST *region_occ_list) {
00773 REGION_OCC_IT it (&(region_occ_list[0]));
00774 REGION_OCC *open_right = NULL;
00775
00776 INT16 i = 0;
00777
00778 for (i = 0; i <= blockocc_band_count; i++) {
00779 it.set_to_list (&(region_occ_list[i]));
00780 if (!it.empty ()) {
00781
00782
00783 open_right = NULL;
00784 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00785 switch (it.data ()->region_type) {
00786 case REGION_TYPE_OPEN_RIGHT:
00787 {
00788 if (open_right != NULL)
00789 BLOCKOCC.error ("compress_region_list", ABORT,
00790 "unmatched right");
00791 else
00792 open_right = it.data ();
00793 break;
00794 }
00795 case REGION_TYPE_OPEN_LEFT:
00796 {
00797 if (open_right == NULL)
00798 BLOCKOCC.error ("compress_region_list", ABORT,
00799 "unmatched left");
00800 else {
00801 open_right->max_x = it.data ()->max_x;
00802 open_right = NULL;
00803 delete it.extract ();
00804 }
00805 break;
00806 }
00807 default:
00808 break;
00809 }
00810 }
00811 if (open_right != NULL)
00812 BLOCKOCC.error ("compress_region_list", ABORT,
00813 "unmatched right remaining");
00814
00815
00816
00817 it.move_to_first ();
00818 open_right = it.data ();
00819 while (!it.at_last ()) {
00820 it.forward ();
00821 if (it.data ()->min_x <= open_right->max_x) {
00822
00823 if (it.data ()->max_x > open_right->max_x)
00824 open_right->max_x = it.data ()->max_x;
00825
00826 delete it.extract ();
00827 }
00828 else
00829 open_right = it.data ();
00830 }
00831 }
00832 }
00833 }
00834
00835
00841 void find_fbox(OUTLINE_IT *out_it,
00842 float *min_x,
00843 float *min_y,
00844 float *max_x,
00845 float *max_y) {
00846 POLYPT_IT pt_it = out_it->data ()->polypts ();
00847 FCOORD pt;
00848 *min_x = 9999.0f;
00849 *min_y = 9999.0f;
00850 *max_x = 0.0f;
00851 *max_y = 0.0f;
00852
00853 for (pt_it.mark_cycle_pt (); !pt_it.cycled_list (); pt_it.forward ()) {
00854 pt = pt_it.data ()->pos;
00855 maintain_limits (min_x, max_x, pt.x ());
00856 maintain_limits (min_y, max_y, pt.y ());
00857 }
00858 }
00859
00860
00864 void maintain_limits(float *min_x, float *max_x, float x) {
00865 if (x > *max_x)
00866 *max_x = x;
00867 if (x < *min_x)
00868 *min_x = x;
00869 }