ccstruct/blread.cpp

Go to the documentation of this file.
00001 
00020 #include "mfcpch.h"
00021 #include          <stdlib.h>
00022 #ifdef __UNIX__
00023 #include          <assert.h>
00024 #endif
00025 #include          "scanutils.h"
00026 #include          "fileerr.h"
00027 #include          "imgtiff.h"
00028 #include          "pdclass.h"
00029 #include          "rwpoly.h"
00030 #include          "blread.h"
00031 
00032 #define PD_EXT        ".pd"
00033 #define VEC_EXT       ".vec"     //accupage file
00034 #define HPD_EXT       ".bl"      //hand pd file
00035                                  //unlv zone file
00036 #define UNLV_EXT            ".uzn"
00037 #define BLOCK_EXPANSION   8      //boundary expansion
00038 #define EXTERN
00039 
00042 EXTERN BOOL_EVAR (ignore_weird_blocks, TRUE, "Don't read weird blocks");
00045 static BOX convert_vec_block(                        //make non-rect block
00046                              VEC_ENTRY *entries,     //vectors
00047                              UINT16 entry_count,     //no of entries
00048                              INT32 ysize,            //image size
00049                              ICOORDELT_IT *left_it,  //block sides
00050                              ICOORDELT_IT *right_it);
00051 
00055 BOOL8 read_pd_file(                    //print list of sides
00056                    STRING name,        //basename of file
00057                    INT32 xsize,        //image size
00058                    INT32 ysize,        //image size
00059                    BLOCK_LIST *blocks  //output list
00060                   ) {
00061   FILE *pdfp;                    //file pointer
00062   BLOCK *block;                  //current block
00063   INT32 block_count;             //no of blocks
00064   INT32 junk_count;              //no of junks to read
00065   INT32 junks[4];                //junk elements
00066   INT32 vertex_count;            //boundary vertices
00067   INT32 xcoord;                  //current coords
00068   INT32 ycoord;
00069   INT32 prevx;                   //previous coords
00070   INT32 prevy;
00071   BLOCK_IT block_it = blocks;    //block iterator
00072   ICOORDELT_LIST dummy;          //for constructor
00073   ICOORDELT_IT left_it = &dummy; //iterator
00074   ICOORDELT_IT right_it = &dummy;//iterator
00075 
00076   if (read_hpd_file (name, xsize, ysize, blocks))
00077     return TRUE;                 //succeeded
00078   if (read_vec_file (name, xsize, ysize, blocks))
00079     return TRUE;                 //succeeded
00080   if (read_unlv_file (name, xsize, ysize, blocks))
00081     return TRUE;                 //succeeded
00082   name += PD_EXT;                //add extension
00083   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00084                                  //make rect block
00085     block = new BLOCK (name.string (), TRUE, 0, 0, 0, 0, xsize, ysize);
00086     block_it.add_to_end (block); //on end of list
00087     return FALSE;                //didn't read one
00088   }
00089   else {
00090     if (fread (&block_count, sizeof (block_count), 1, pdfp) != 1)
00091       READFAILED.error ("read_pd_file", EXIT, "Block count");
00092     tprintf ("%d blocks in .pd file.\n", block_count);
00093     while (block_count > 0) {
00094       if (fread (&junk_count, sizeof (junk_count), 1, pdfp) != 1)
00095         READFAILED.error ("read_pd_file", EXIT, "Junk count");
00096       if (fread (&vertex_count, sizeof (vertex_count), 1, pdfp) != 1)
00097         READFAILED.error ("read_pd_file", EXIT, "Vertex count");
00098       block = new BLOCK;         //make a block
00099                                  //on end of list
00100       block_it.add_to_end (block);
00101       left_it.set_to_list (&block->leftside);
00102       right_it.set_to_list (&block->rightside);
00103 
00104                                  //read a pair
00105       get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00106       vertex_count -= 2;         //count read ones
00107       prevx = xcoord;
00108       do {
00109         if (xcoord == prevx) {
00110           if (!right_it.empty ()) {
00111             if (right_it.data ()->x () <= xcoord + BLOCK_EXPANSION)
00112               right_it.data ()->set_y (right_it.data ()->y () +
00113                 BLOCK_EXPANSION);
00114             else
00115               right_it.data ()->set_y (right_it.data ()->y () -
00116                 BLOCK_EXPANSION);
00117           }
00118           right_it.
00119             add_before_then_move (new
00120             ICOORDELT (xcoord + BLOCK_EXPANSION,
00121             ycoord));
00122         }
00123         prevx = xcoord;          //remember previous
00124         prevy = ycoord;
00125         get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00126         vertex_count -= 2;       //count read ones
00127       }
00128       while (ycoord <= prevy);
00129       right_it.data ()->set_y (right_it.data ()->y () - BLOCK_EXPANSION);
00130 
00131                                  //start of left
00132       left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION, prevy - BLOCK_EXPANSION));
00133 
00134       do {
00135         prevx = xcoord;          //remember previous
00136         get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00137         vertex_count -= 2;
00138         if (xcoord != prevx && vertex_count > 0) {
00139           if (xcoord > prevx)
00140             left_it.
00141               add_to_end (new
00142               ICOORDELT (xcoord - BLOCK_EXPANSION,
00143               ycoord + BLOCK_EXPANSION));
00144           else
00145             left_it.
00146               add_to_end (new
00147               ICOORDELT (xcoord - BLOCK_EXPANSION,
00148               ycoord - BLOCK_EXPANSION));
00149         }
00150         else if (vertex_count == 0)
00151           left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION,
00152               ycoord + BLOCK_EXPANSION));
00153       }
00154       while (vertex_count > 0);  //until all read
00155 
00156       while (junk_count > 0) {
00157         if (fread (junks, sizeof (INT32), 4, pdfp) != 4)
00158           READFAILED.error ("read_pd_file", EXIT, "Junk coords");
00159         junk_count--;
00160       }
00161       block_count--;             //count read blocks
00162     }
00163   }
00164   fclose(pdfp);
00165   return TRUE;                   //read one
00166 }
00167 
00168 
00176 void get_pd_vertex(                //get new vertex
00177                    FILE *pdfp,     //file to read
00178                    INT32 xsize,    //image size
00179                    INT32 ysize,    //image size
00180                    BOX *box,       //bounding box
00181                    INT32 &xcoord,  //output coords
00182                    INT32 &ycoord) {
00183   BOX new_coord;                 //expansion box
00184 
00185                                  //get new coords
00186   if (fread (&xcoord, sizeof (xcoord), 1, pdfp) != 1)
00187     READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00188   if (fread (&ycoord, sizeof (ycoord), 1, pdfp) != 1)
00189     READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00190   ycoord = ysize - ycoord;       //invert y
00191   if (xcoord < BLOCK_EXPANSION)
00192     xcoord = BLOCK_EXPANSION;    //clip to limits
00193   if (xcoord > xsize - BLOCK_EXPANSION)
00194     xcoord = xsize - BLOCK_EXPANSION;
00195   if (ycoord < BLOCK_EXPANSION)
00196     ycoord = BLOCK_EXPANSION;
00197   if (ycoord > ysize - BLOCK_EXPANSION)
00198     ycoord = ysize - BLOCK_EXPANSION;
00199 
00200   new_coord =
00201     BOX (ICOORD (xcoord - BLOCK_EXPANSION, ycoord - BLOCK_EXPANSION),
00202     ICOORD (xcoord + BLOCK_EXPANSION, ycoord + BLOCK_EXPANSION));
00203   (*box) += new_coord;
00204 }
00205 
00206 
00212 BOOL8 read_hpd_file(                    //print list of sides
00213                     STRING name,        //basename of file
00214                     INT32 xsize,        //image size
00215                     INT32 ysize,        //image size
00216                     BLOCK_LIST *blocks  //output list
00217                    ) {
00218   FILE *pdfp;                    //file pointer
00219   PAGE_BLOCK_LIST *page_blocks;
00220   INT32 block_no;                //no of blocks
00221   BLOCK_IT block_it = blocks;    //block iterator
00222 
00223   name += HPD_EXT;               //add extension
00224   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00225     return FALSE;                //can't find it
00226   }
00227   fclose(pdfp);
00228   page_blocks = read_poly_blocks (name.string ());
00229   block_no = 0;
00230   scan_hpd_blocks (name.string (), page_blocks, block_no, &block_it);
00231   tprintf ("Text region count=%d\n", block_no);
00232   return TRUE;                   //read one
00233 }
00234 
00235 
00241 void scan_hpd_blocks(                               //print list of sides
00242                      const char *name,              //block label
00243                      PAGE_BLOCK_LIST *page_blocks,  //head of full pag
00244                      INT32 &block_no,               //no of blocks
00245                      BLOCK_IT *block_it             //block iterator
00246                     ) {
00247   BLOCK *block;                  //current block
00248                                  //page blocks
00249   PAGE_BLOCK_IT pb_it = page_blocks;
00250   PAGE_BLOCK *current_block;
00251   TEXT_REGION_IT tr_it;
00252   TEXT_BLOCK *tb;
00253   TEXT_REGION *tr;
00254   BOX *block_box;                //from text region
00255 
00256   for (pb_it.mark_cycle_pt (); !pb_it.cycled_list (); pb_it.forward ()) {
00257     current_block = pb_it.data ();
00258     if (current_block->type () == PB_TEXT) {
00259       tb = (TEXT_BLOCK *) current_block;
00260       if (!tb->regions ()->empty ()) {
00261         tr_it.set_to_list (tb->regions ());
00262         for (tr_it.mark_cycle_pt ();
00263         !tr_it.cycled_list (); tr_it.forward ()) {
00264           block_no++;
00265           tr = tr_it.data ();
00266           block_box = tr->bounding_box ();
00267           block = new BLOCK (name, TRUE, 0, 0,
00268             block_box->left (), block_box->bottom (),
00269             block_box->right (), block_box->top ());
00270           block->hand_block = tr;
00271           block->hand_poly = tr;
00272           block_it->add_after_then_move (block);
00273         }
00274       }
00275     }
00276     else if (current_block->type () == PB_WEIRD
00277       && !ignore_weird_blocks
00278     && ((WEIRD_BLOCK *) current_block)->id_no () > 0) {
00279       block_no++;
00280       block_box = current_block->bounding_box ();
00281       block = new BLOCK (name, TRUE, 0, 0,
00282         block_box->left (), block_box->bottom (),
00283         block_box->right (), block_box->top ());
00284       block->hand_block = NULL;
00285       block->hand_poly = current_block;
00286       block_it->add_after_then_move (block);
00287     }
00288     if (!current_block->child ()->empty ())
00289       scan_hpd_blocks (name, current_block->child (), block_no, block_it);
00290   }
00291 }
00292 
00293 
00299 BOOL8 read_vec_file(                    //print list of sides
00300                     STRING name,        //basename of file
00301                     INT32 xsize,        //image size
00302                     INT32 ysize,        //image size
00303                     BLOCK_LIST *blocks  //output list
00304                    ) {
00305   FILE *pdfp;                    //file pointer
00306   BLOCK *block;                  //current block
00307   INT32 block_no;                //no of blocks
00308   INT32 block_index;             //current blocks
00309   INT32 vector_count;            //total vectors
00310   VEC_HEADER header;             //file header
00311   BLOCK_HEADER *vec_blocks;      //blocks from file
00312   VEC_ENTRY *vec_entries;        //vectors from file
00313   BLOCK_IT block_it = blocks;    //block iterator
00314   ICOORDELT_IT left_it;          //iterators
00315   ICOORDELT_IT right_it;
00316 
00317   name += VEC_EXT;               //add extension
00318   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00319     return FALSE;                //can't find it
00320   }
00321   if (fread (&header, sizeof (header), 1, pdfp) != 1)
00322     READFAILED.error ("read_vec_file", EXIT, "Header");
00323                                  //from intel
00324   header.filesize = reverse32 (header.filesize);
00325   header.bytesize = reverse16 (header.bytesize);
00326   header.arraysize = reverse16 (header.arraysize);
00327   header.width = reverse16 (header.width);
00328   header.height = reverse16 (header.height);
00329   header.res = reverse16 (header.res);
00330   header.bpp = reverse16 (header.bpp);
00331   tprintf ("%d blocks in %s file:", header.arraysize, VEC_EXT);
00332   vector_count = header.filesize - header.arraysize * sizeof (BLOCK_HEADER);
00333   vector_count /= sizeof (VEC_ENTRY);
00334   vec_blocks =
00335     (BLOCK_HEADER *) alloc_mem (header.arraysize * sizeof (BLOCK_HEADER));
00336   vec_entries = (VEC_ENTRY *) alloc_mem (vector_count * sizeof (VEC_ENTRY));
00337   xsize = header.width;          //real image size
00338   ysize = header.height;
00339   if (fread (vec_blocks, sizeof (BLOCK_HEADER), header.arraysize, pdfp)
00340     != static_cast<size_t>(header.arraysize))
00341     READFAILED.error ("read_vec_file", EXIT, "Blocks");
00342   if (fread (vec_entries, sizeof (VEC_ENTRY), vector_count, pdfp)
00343     != static_cast<size_t>(vector_count))
00344     READFAILED.error ("read_vec_file", EXIT, "Vectors");
00345   for (block_index = 0; block_index < header.arraysize; block_index++) {
00346     vec_blocks[block_index].offset =
00347       reverse16 (vec_blocks[block_index].offset);
00348     vec_blocks[block_index].order =
00349       reverse16 (vec_blocks[block_index].order);
00350     vec_blocks[block_index].entries =
00351       reverse16 (vec_blocks[block_index].entries);
00352     vec_blocks[block_index].charsize =
00353       reverse16 (vec_blocks[block_index].charsize);
00354   }
00355   for (block_index = 0; block_index < vector_count; block_index++) {
00356     vec_entries[block_index].start =
00357       ICOORD (reverse16 (vec_entries[block_index].start.x ()),
00358       reverse16 (vec_entries[block_index].start.y ()));
00359     vec_entries[block_index].end =
00360       ICOORD (reverse16 (vec_entries[block_index].end.x ()),
00361       reverse16 (vec_entries[block_index].end.y ()));
00362   }
00363   for (block_no = 1; block_no <= header.arraysize; block_no++) {
00364     for (block_index = 0; block_index < header.arraysize; block_index++) {
00365       if (vec_blocks[block_index].order == block_no
00366       && vec_blocks[block_index].valid) {
00367         block = new BLOCK;
00368         left_it.set_to_list (&block->leftside);
00369         right_it.set_to_list (&block->rightside);
00370         block->box =
00371           convert_vec_block (&vec_entries
00372           [vec_blocks[block_index].offset],
00373           vec_blocks[block_index].entries, ysize,
00374           &left_it, &right_it);
00375         block->set_xheight (vec_blocks[block_index].charsize);
00376                                  //on end of list
00377         block_it.add_to_end (block);
00378         //                              tprintf("Block at (%d,%d)->(%d,%d) has index %d and order %d\n",
00379         //                                      block->box.left(),
00380         //                                      block->box.bottom(),
00381         //                                      block->box.right(),
00382         //                                      block->box.top(),
00383         //                                      block_index,vec_blocks[block_index].order);
00384       }
00385     }
00386   }
00387   free_mem(vec_blocks);
00388   free_mem(vec_entries);
00389   tprintf ("%d valid\n", block_it.length ());
00390   fclose(pdfp);
00391   return TRUE;                   //read one
00392 }
00393 
00394 
00400 static BOX convert_vec_block(                        //make non-rect block
00401                              VEC_ENTRY *entries,     //vectors
00402                              UINT16 entry_count,     //no of entries
00403                              INT32 ysize,            //image size
00404                              ICOORDELT_IT *left_it,  //block sides
00405                              ICOORDELT_IT *right_it) {
00406   BOX block_box;                 //bounding box
00407   BOX vec_box;                   //box of vec
00408   ICOORD box_point;              //expanded coord
00409   ICOORD shift_vec;              //for box expansion
00410   ICOORD prev_pt;                //previous coord
00411   ICOORD end_pt;                 //end of vector
00412   INT32 vertex_index;            //boundary vertices
00413 
00414   for (vertex_index = 0; vertex_index < entry_count; vertex_index++) {
00415     entries[vertex_index].start = ICOORD (entries[vertex_index].start.x (),
00416       ysize - 1 -
00417       entries[vertex_index].start.y ());
00418     entries[vertex_index].end =
00419       ICOORD (entries[vertex_index].end.x (),
00420       ysize - 1 - entries[vertex_index].end.y ());
00421     vec_box = BOX (entries[vertex_index].start, entries[vertex_index].end);
00422     block_box += vec_box;        //find total bounds
00423   }
00424 
00425   for (vertex_index = 0; vertex_index < entry_count
00426     && (entries[vertex_index].start.y () != block_box.bottom ()
00427     || entries[vertex_index].end.y () != block_box.bottom ());
00428     vertex_index++);
00429   ASSERT_HOST (vertex_index < entry_count);
00430   prev_pt = entries[vertex_index].start;
00431   end_pt = entries[vertex_index].end;
00432   do {
00433     for (vertex_index = 0; vertex_index < entry_count
00434       && entries[vertex_index].start != end_pt; vertex_index++);
00435                                  //found start of vertical
00436     ASSERT_HOST (vertex_index < entry_count);
00437     box_point = entries[vertex_index].start;
00438     if (box_point.x () <= prev_pt.x ())
00439       shift_vec = ICOORD (-BLOCK_EXPANSION, -BLOCK_EXPANSION);
00440     else
00441       shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00442     left_it->add_to_end (new ICOORDELT (box_point + shift_vec));
00443     prev_pt = box_point;
00444     for (vertex_index = 0; vertex_index < entry_count
00445       && entries[vertex_index].start != end_pt; vertex_index++);
00446                                  //found horizontal
00447     ASSERT_HOST (vertex_index < entry_count);
00448     end_pt = entries[vertex_index].end;
00449   }
00450   while (end_pt.y () < block_box.top ());
00451   shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00452   left_it->add_to_end (new ICOORDELT (end_pt + shift_vec));
00453 
00454   for (vertex_index = 0; vertex_index < entry_count
00455     && (entries[vertex_index].start.y () != block_box.top ()
00456     || entries[vertex_index].end.y () != block_box.top ());
00457     vertex_index++);
00458   ASSERT_HOST (vertex_index < entry_count);
00459   prev_pt = entries[vertex_index].start;
00460   end_pt = entries[vertex_index].end;
00461   do {
00462     for (vertex_index = 0; vertex_index < entry_count
00463       && entries[vertex_index].start != end_pt; vertex_index++);
00464                                  //found start of vertical
00465     ASSERT_HOST (vertex_index < entry_count);
00466     box_point = entries[vertex_index].start;
00467     if (box_point.x () < prev_pt.x ())
00468       shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00469     else
00470       shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00471     right_it->add_before_then_move (new ICOORDELT (box_point + shift_vec));
00472     prev_pt = box_point;
00473     for (vertex_index = 0; vertex_index < entry_count
00474       && entries[vertex_index].start != end_pt; vertex_index++);
00475                                  //found horizontal
00476     ASSERT_HOST (vertex_index < entry_count);
00477     end_pt = entries[vertex_index].end;
00478   }
00479   while (end_pt.y () > block_box.bottom ());
00480   shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00481   right_it->add_before_then_move (new ICOORDELT (end_pt + shift_vec));
00482 
00483   shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00484   box_point = block_box.botleft () - shift_vec;
00485   end_pt = block_box.topright () + shift_vec;
00486   return BOX (box_point, end_pt);
00487 }
00488 
00489 
00493 BOOL8 read_unlv_file(                    //print list of sides
00494                      STRING name,        //basename of file
00495                      INT32 xsize,        //image size
00496                      INT32 ysize,        //image size
00497                      BLOCK_LIST *blocks  //output list
00498                     ) {
00499   FILE *pdfp;                    //file pointer
00500   BLOCK *block;                  //current block
00501   int x;                         //current top-down coords
00502   int y;
00503   int width;                     //of current block
00504   int height;
00505   BLOCK_IT block_it = blocks;    //block iterator
00506 
00507   name += UNLV_EXT;              //add extension
00508   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00509     return FALSE;                //didn't read one
00510   }
00511   else {
00512     while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
00513                                  //make rect block
00514       block = new BLOCK (name.string (), TRUE, 0, 0, (INT16) x, (INT16) (ysize - 1 - y - height), (INT16) (x + width), (INT16) (ysize - 1 - y));
00515                                  //on end of list
00516       block_it.add_to_end (block);
00517     }
00518     fclose(pdfp);
00519   }
00520   return true;
00521 }

Generated on Wed Feb 28 19:49:08 2007 for Tesseract by  doxygen 1.5.1