textord/gap_map.cpp

Go to the documentation of this file.
00001 
00018 #include "mfcpch.h"
00019 #include          "statistc.h"
00020 #include          "gap_map.h"
00021 
00022 #define EXTERN
00023 
00026 EXTERN BOOL_VAR (gapmap_debug, FALSE, "Say which blocks have tables");
00027 EXTERN BOOL_VAR (gapmap_use_ends, FALSE,
00028 "Use large space at start and end of rows");
00029 EXTERN BOOL_VAR (gapmap_no_isolated_quanta, FALSE,
00030 "Ensure gaps not less than 2quanta wide");
00031 EXTERN double_VAR (gapmap_big_gaps, 1.75, "xht multiplier");
00034 /*
00035 moved explanation of what is a gapmap to the glossary
00036 see ccmain/tessvars.dox
00037 */
00038 
00042 GAPMAP::GAPMAP(                 //Constructor
00043                TO_BLOCK *block  //block
00044               ) {
00045   TO_ROW_IT row_it;              //row iterator
00046   TO_ROW *row;                   //current row
00047   BLOBNBOX_IT blob_it;           //iterator
00048   BOX blob_box;
00049   BOX prev_blob_box;
00050   INT16 gap_width;
00051   INT16 start_of_row;
00052   INT16 end_of_row;
00053   STATS xht_stats (0, 128);
00054   INT16 min_quantum;
00055   INT16 max_quantum;
00056   INT16 i;
00057 
00058   row_it.set_to_list (block->get_rows ());
00059   /*
00060     Find left and right extremes and bucket size
00061   */
00062   map = NULL;
00063   min_left = MAX_INT16;
00064   max_right = -MAX_INT16;
00065   total_rows = 0;
00066   any_tabs = FALSE;
00067   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00068     row = row_it.data ();
00069     if (!row->blob_list ()->empty ()) {
00070       total_rows++;
00071       xht_stats.add ((INT16) floor (row->xheight + 0.5), 1);
00072       blob_it.set_to_list (row->blob_list ());
00073       start_of_row = blob_it.data ()->bounding_box ().left ();
00074       end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
00075       if (min_left > start_of_row)
00076         min_left = start_of_row;
00077       if (max_right < end_of_row)
00078         max_right = end_of_row;
00079     }
00080   }
00081   if ((total_rows < 3) || (min_left >= max_right)) {
00082     total_rows = 0;
00083     min_left = max_right = 0;
00084     return;
00085   }
00086   bucket_size = (INT16) floor (xht_stats.median () + 0.5) / 2;
00087   map_max = (max_right - min_left) / bucket_size;
00088   map = (INT16 *) alloc_mem ((map_max + 1) * sizeof (INT16));
00089   for (i = 0; i <= map_max; i++)
00090     map[i] = 0;
00091 
00092   for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
00093     row = row_it.data ();
00094     if (!row->blob_list ()->empty ()) {
00095       blob_it.set_to_list (row->blob_list ());
00096       blob_it.mark_cycle_pt ();
00097       blob_box = box_next (&blob_it);
00098       prev_blob_box = blob_box;
00099       if (gapmap_use_ends) {
00100         /* Leading space */
00101         gap_width = blob_box.left () - min_left;
00102         if ((gap_width > gapmap_big_gaps * row->xheight)
00103         && gap_width > 2) {
00104           max_quantum = (blob_box.left () - min_left) / bucket_size;
00105           for (i = 0; i <= max_quantum; i++)
00106             map[i]++;
00107         }
00108       }
00109       while (!blob_it.cycled_list ()) {
00110         blob_box = box_next (&blob_it);
00111         gap_width = blob_box.left () - prev_blob_box.right ();
00112         if ((gap_width > gapmap_big_gaps * row->xheight)
00113         && gap_width > 2) {
00114           min_quantum =
00115             (prev_blob_box.right () - min_left) / bucket_size;
00116           max_quantum = (blob_box.left () - min_left) / bucket_size;
00117           for (i = min_quantum; i <= max_quantum; i++)
00118             map[i]++;
00119         }
00120         prev_blob_box = blob_box;
00121       }
00122       if (gapmap_use_ends) {
00123         /* Trailing space */
00124         gap_width = max_right - prev_blob_box.right ();
00125         if ((gap_width > gapmap_big_gaps * row->xheight)
00126         && gap_width > 2) {
00127           min_quantum =
00128             (prev_blob_box.right () - min_left) / bucket_size;
00129           for (i = min_quantum; i <= map_max; i++)
00130             map[i]++;
00131         }
00132       }
00133     }
00134   }
00135   for (i = 0; i <= map_max; i++) {
00136     if (map[i] > total_rows / 2) {
00137       if (gapmap_no_isolated_quanta &&
00138         (((i == 0) &&
00139         (map[i + 1] <= total_rows / 2)) ||
00140         ((i == map_max) &&
00141         (map[i - 1] <= total_rows / 2)) ||
00142         ((i > 0) &&
00143         (i < map_max) &&
00144         (map[i - 1] <= total_rows / 2) &&
00145       (map[i + 1] <= total_rows / 2)))) {
00146         map[i] = 0;              //prevent isolated quantum
00147       }
00148       else
00149         any_tabs = TRUE;
00150     }
00151   }
00152   if (gapmap_debug && any_tabs)
00153     tprintf ("Table found\n");
00154 }
00155 
00156 
00163 BOOL8 GAPMAP::table_gap(             //Is gap a table?
00164                         INT16 left,  //From here
00165                         INT16 right  //To here
00166                        ) {
00167   INT16 min_quantum;
00168   INT16 max_quantum;
00169   INT16 i;
00170   BOOL8 tab_found = FALSE;
00171 
00172   if (!any_tabs)
00173     return FALSE;
00174 
00175   min_quantum = (left - min_left) / bucket_size;
00176   max_quantum = (right - min_left) / bucket_size;
00177   for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++)
00178     if (map[i] > total_rows / 2)
00179       tab_found = TRUE;
00180   return tab_found;
00181 }

Generated on Wed Feb 28 19:49:11 2007 for Tesseract by  doxygen 1.5.1