textord/edgblob.h File Reference

#include "grphics.h"
#include "varable.h"
#include "img.h"
#include "ocrblock.h"
#include "coutln.h"
#include "crakedge.h"
#include "notdll.h"

Go to the source code of this file.

Classes

Defines

Functions


Define Documentation

#define BUCKETSIZE   16

FIX: Must be the same size as an INT16

Definition at line 31 of file edgblob.h.

Referenced by OL_BUCKETS::count_children(), OL_BUCKETS::extract_children(), OL_BUCKETS::OL_BUCKETS(), and OL_BUCKETS::operator()().


Function Documentation

BOOL8 capture_children ( OL_BUCKETS buckets,
C_BLOB_IT *  reject_it,
C_OUTLINE_IT *  blob_it 
)

Find children.

Parameters:
buckets Bucket-sort Class
reject_it Dead grandchildren
blob_it Output outlines
Returns:
TRUE if outline legal
Find all neighbouring outlines that are children of this outline. Validates true 'children' outlines - 'B's two inner outlines and 'O's inner outline - vs other relationships. If valid, moves them to the output list & return TRUE Else, declares this outline illegal and return FALSE.

Only CLEARLY illegal outlines are rejected - e.g., a box around a letter - but when that happens, the outlines overlapped are ALSO rejected (so, the box around the letter and the letter itself are gone)

Definition at line 444 of file edgblob.cpp.

References OL_BUCKETS::count_children(), cprintf(), OL_BUCKETS::extract_children(), FALSE, and TRUE.

Referenced by empty_buckets().

00448                         {
00449   BOOL8 anydone;                 //anything canned
00450   C_OUTLINE *outline;            //master outline
00451   C_OUTLINE *child;              //child under test
00452   C_OUTLINE_IT test_it;          //for grandchildren
00453   INT32 child_count;             //no of children
00454   C_BLOB *blob;                  //reject
00455   C_OUTLINE_LIST r_list;         //rejects
00456   C_OUTLINE_IT r_it;             //iterator
00457 
00458   outline = blob_it->data ();
00459   child_count = buckets->count_children (outline, edges_children_count_limit);
00460 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00461   cprintf("L");
00462 #endif
00463   if (child_count > edges_children_count_limit)
00464     return FALSE;
00465   if (child_count == 0)
00466     return TRUE;
00467                                  //get single level
00468   buckets->extract_children (outline, blob_it);
00469   if (child_count == 1) {
00470 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00471   cprintf("K");
00472 #endif
00473     return TRUE;
00474   }
00475   do {
00476     anydone = FALSE;
00477     blob_it->move_to_first ();
00478     for (blob_it->mark_cycle_pt (); !blob_it->cycled_list ();
00479     blob_it->forward ()) {
00480       child = blob_it->data ();
00481       if (child != outline) {
00482         for (test_it = *blob_it, test_it.mark_cycle_pt ();
00483         !test_it.cycled_list (); test_it.forward ()) {
00484           if (test_it.data () != child && *test_it.data () < *child) {
00485             r_it.set_to_list (&r_list);
00486             r_it.add_after_then_move (test_it.extract ());
00487                                  //turn to blob
00488             blob = new C_BLOB (&r_list);
00489             reject_it->add_after_then_move (blob);
00490             anydone = TRUE;
00491 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00492   cprintf("N");
00493 #endif
00494           }
00495         }
00496         if (anydone) {
00497 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00498   cprintf("M");
00499 #endif
00500           break;                 //got to restart
00501       }
00502 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00503   cprintf("L");
00504 #endif
00505       }
00506     }
00507   }
00508   while (anydone);               //got to restart
00509 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00510   cprintf("O\n");
00511 #endif
00512   return TRUE;
00513 }//capture_children

void empty_buckets ( BLOCK block,
OL_BUCKETS buckets 
)

Find blobs.

Parameters:
block Block to scan
buckets Output buckets
Run the edge detector over the block and return a list of blobs.

Definition at line 377 of file edgblob.cpp.

References capture_children(), cprintf(), OL_BUCKETS::scan_next(), and OL_BUCKETS::start_scan().

Referenced by outlines_to_blobs().

00380                     {
00381   BOOL8 good_blob;               //healthy blob
00382   C_OUTLINE_LIST outlines;       //outlines in block
00383                                  //iterator
00384   C_OUTLINE_IT out_it = &outlines;
00385   C_OUTLINE_IT bucket_it = buckets->start_scan ();
00386   C_OUTLINE_IT parent_it;        //parent outline
00387   C_BLOB *blob;                  //new blob
00388   C_BLOB_IT good_blobs = block->blob_list ();
00389   C_BLOB_IT junk_blobs = block->reject_blobs ();
00390 
00391 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00392   cprintf("1");
00393 #endif
00394   while (!bucket_it.empty ()) {
00395     out_it.set_to_list (&outlines);
00396     do {
00397       parent_it = bucket_it;     //find outermost
00398       do
00399          bucket_it.forward ();
00400       while (!bucket_it.at_first ()
00401         && !(*parent_it.data () < *bucket_it.data ()));
00402     }
00403     while (!bucket_it.at_first ());
00404 
00405                                  //move to new list
00406     out_it.add_after_then_move (parent_it.extract ());
00407     good_blob = capture_children (buckets, &junk_blobs, &out_it);
00408     blob = new C_BLOB (&outlines);
00409     if (good_blob) {
00410       good_blobs.add_after_then_move (blob);
00411 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00412   cprintf("2");
00413 #endif
00414     }
00415    else {
00416       junk_blobs.add_after_then_move (blob);
00417 #if defined(TEXT_VERBOSE) && defined(TV_FOCUSI)
00418   cprintf("3");
00419 #endif
00420     }
00421 
00422     bucket_it.set_to_list (buckets->scan_next ());
00423   }
00424 }

void extract_edges ( WINDOW  window,
IMAGE image,
IMAGE t_image,
ICOORD  page_tr,
BLOCK block 
)

Find blobs by running edge-detector over block.

Parameters:
window Window for output
image Image to scan
t_image Thresholded image
page_tr Corner of page
block Pointer to BLOCK (will be updated)
Returns:
None, paramater block if updated

Definition at line 289 of file edgblob.cpp.

References get_outlines(), and outlines_to_blobs().

Referenced by edges_and_textord().

00297                     {
00298   ICOORD bleft;                  //block box
00299   ICOORD tright;
00300   C_OUTLINE_LIST outlines;       //outlines in block
00301                                  //iterator
00302   C_OUTLINE_IT out_it = &outlines;
00303 
00304 #ifndef GRAPHICS_DISABLED
00305   get_outlines (window, image, t_image, page_tr, (PDBLK *) block, &out_it);
00306 #else
00307   get_outlines (image, t_image, page_tr, (PDBLK *) block, &out_it);
00308 #endif
00309                                  //block box
00310   block->bounding_box (bleft, tright);
00311                                  //make blobs
00312   outlines_to_blobs(block, bleft, tright, &outlines); 
00313 
00314 }

void fill_buckets ( C_OUTLINE_LIST *  outlines,
OL_BUCKETS buckets 
)

Find blobs.

Parameters:
outlines Outlines in block
buckets Output buckets
Run the edge detector over the block and return a list of blobs.

Definition at line 349 of file edgblob.cpp.

References BOX::bottom(), and BOX::left().

Referenced by outlines_to_blobs().

00352                    {
00353   BOX ol_box;                    //outline box
00354   C_OUTLINE_IT out_it = outlines;//iterator
00355   C_OUTLINE_IT bucket_it;        //iterator in bucket
00356   C_OUTLINE *outline;            //current outline
00357 
00358   int t=0;
00359   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00360     outline = out_it.extract (); //take off list
00361                                  //get box
00362     ol_box = outline->bounding_box ();
00363     bucket_it.set_to_list ((*buckets) (ol_box.left (), ol_box.bottom ()));
00364     bucket_it.add_to_end (outline);
00365   }
00366 }

void outlines_to_blobs ( BLOCK block,
ICOORD  bleft,
ICOORD  tright,
C_OUTLINE_LIST *  outlines 
)

Find blobs.

Parameters:
block Block to scan
bleft Block box, bottom-left
tright Block box, top-right
outlines Outlines in block
Returns:
None, paramater outlines is modified
Gather together outlines into blobs using the usual bucket sort.

Definition at line 328 of file edgblob.cpp.

References empty_buckets(), and fill_buckets().

Referenced by extract_edges().

00332                                                  {
00333                                  //make buckets
00334   OL_BUCKETS buckets(bleft, tright); 
00335 
00336   fill_buckets(outlines, &buckets);
00337   empty_buckets(block, &buckets);//Makes blobs WHILE emptying buckets
00338 }


Generated on Wed Feb 28 19:49:24 2007 for Tesseract by  doxygen 1.5.1