00001 
00020 #include "mfcpch.h"
00021 #include          <stdlib.h>
00022 #ifdef __UNIX__
00023 #include          <assert.h>
00024 #endif
00025 #include          "scanutils.h"
00026 #include          "fileerr.h"
00027 #include          "imgtiff.h"
00028 #include          "pdclass.h"
00029 #include          "rwpoly.h"
00030 #include          "blread.h"
00031 
00032 #define PD_EXT        ".pd"
00033 #define VEC_EXT       ".vec"     //accupage file
00034 #define HPD_EXT       ".bl"      //hand pd file
00035                                  
00036 #define UNLV_EXT            ".uzn"
00037 #define BLOCK_EXPANSION   8      //boundary expansion
00038 #define EXTERN
00039 
00042 EXTERN BOOL_EVAR (ignore_weird_blocks, TRUE, "Don't read weird blocks");
00045 static BOX convert_vec_block(                        
00046                              VEC_ENTRY *entries,     
00047                              UINT16 entry_count,     
00048                              INT32 ysize,            
00049                              ICOORDELT_IT *left_it,  
00050                              ICOORDELT_IT *right_it);
00051 
00055 BOOL8 read_pd_file(                    
00056                    STRING name,        
00057                    INT32 xsize,        
00058                    INT32 ysize,        
00059                    BLOCK_LIST *blocks  
00060                   ) {
00061   FILE *pdfp;                    
00062   BLOCK *block;                  
00063   INT32 block_count;             
00064   INT32 junk_count;              
00065   INT32 junks[4];                
00066   INT32 vertex_count;            
00067   INT32 xcoord;                  
00068   INT32 ycoord;
00069   INT32 prevx;                   
00070   INT32 prevy;
00071   BLOCK_IT block_it = blocks;    
00072   ICOORDELT_LIST dummy;          
00073   ICOORDELT_IT left_it = &dummy; 
00074   ICOORDELT_IT right_it = &dummy;
00075 
00076   if (read_hpd_file (name, xsize, ysize, blocks))
00077     return TRUE;                 
00078   if (read_vec_file (name, xsize, ysize, blocks))
00079     return TRUE;                 
00080   if (read_unlv_file (name, xsize, ysize, blocks))
00081     return TRUE;                 
00082   name += PD_EXT;                
00083   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00084                                  
00085     block = new BLOCK (name.string (), TRUE, 0, 0, 0, 0, xsize, ysize);
00086     block_it.add_to_end (block); 
00087     return FALSE;                
00088   }
00089   else {
00090     if (fread (&block_count, sizeof (block_count), 1, pdfp) != 1)
00091       READFAILED.error ("read_pd_file", EXIT, "Block count");
00092     tprintf ("%d blocks in .pd file.\n", block_count);
00093     while (block_count > 0) {
00094       if (fread (&junk_count, sizeof (junk_count), 1, pdfp) != 1)
00095         READFAILED.error ("read_pd_file", EXIT, "Junk count");
00096       if (fread (&vertex_count, sizeof (vertex_count), 1, pdfp) != 1)
00097         READFAILED.error ("read_pd_file", EXIT, "Vertex count");
00098       block = new BLOCK;         
00099                                  
00100       block_it.add_to_end (block);
00101       left_it.set_to_list (&block->leftside);
00102       right_it.set_to_list (&block->rightside);
00103 
00104                                  
00105       get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00106       vertex_count -= 2;         
00107       prevx = xcoord;
00108       do {
00109         if (xcoord == prevx) {
00110           if (!right_it.empty ()) {
00111             if (right_it.data ()->x () <= xcoord + BLOCK_EXPANSION)
00112               right_it.data ()->set_y (right_it.data ()->y () +
00113                 BLOCK_EXPANSION);
00114             else
00115               right_it.data ()->set_y (right_it.data ()->y () -
00116                 BLOCK_EXPANSION);
00117           }
00118           right_it.
00119             add_before_then_move (new
00120             ICOORDELT (xcoord + BLOCK_EXPANSION,
00121             ycoord));
00122         }
00123         prevx = xcoord;          
00124         prevy = ycoord;
00125         get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00126         vertex_count -= 2;       
00127       }
00128       while (ycoord <= prevy);
00129       right_it.data ()->set_y (right_it.data ()->y () - BLOCK_EXPANSION);
00130 
00131                                  
00132       left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION, prevy - BLOCK_EXPANSION));
00133 
00134       do {
00135         prevx = xcoord;          
00136         get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00137         vertex_count -= 2;
00138         if (xcoord != prevx && vertex_count > 0) {
00139           if (xcoord > prevx)
00140             left_it.
00141               add_to_end (new
00142               ICOORDELT (xcoord - BLOCK_EXPANSION,
00143               ycoord + BLOCK_EXPANSION));
00144           else
00145             left_it.
00146               add_to_end (new
00147               ICOORDELT (xcoord - BLOCK_EXPANSION,
00148               ycoord - BLOCK_EXPANSION));
00149         }
00150         else if (vertex_count == 0)
00151           left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION,
00152               ycoord + BLOCK_EXPANSION));
00153       }
00154       while (vertex_count > 0);  
00155 
00156       while (junk_count > 0) {
00157         if (fread (junks, sizeof (INT32), 4, pdfp) != 4)
00158           READFAILED.error ("read_pd_file", EXIT, "Junk coords");
00159         junk_count--;
00160       }
00161       block_count--;             
00162     }
00163   }
00164   fclose(pdfp);
00165   return TRUE;                   
00166 }
00167 
00168 
00176 void get_pd_vertex(                
00177                    FILE *pdfp,     
00178                    INT32 xsize,    
00179                    INT32 ysize,    
00180                    BOX *box,       
00181                    INT32 &xcoord,  
00182                    INT32 &ycoord) {
00183   BOX new_coord;                 
00184 
00185                                  
00186   if (fread (&xcoord, sizeof (xcoord), 1, pdfp) != 1)
00187     READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00188   if (fread (&ycoord, sizeof (ycoord), 1, pdfp) != 1)
00189     READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00190   ycoord = ysize - ycoord;       
00191   if (xcoord < BLOCK_EXPANSION)
00192     xcoord = BLOCK_EXPANSION;    
00193   if (xcoord > xsize - BLOCK_EXPANSION)
00194     xcoord = xsize - BLOCK_EXPANSION;
00195   if (ycoord < BLOCK_EXPANSION)
00196     ycoord = BLOCK_EXPANSION;
00197   if (ycoord > ysize - BLOCK_EXPANSION)
00198     ycoord = ysize - BLOCK_EXPANSION;
00199 
00200   new_coord =
00201     BOX (ICOORD (xcoord - BLOCK_EXPANSION, ycoord - BLOCK_EXPANSION),
00202     ICOORD (xcoord + BLOCK_EXPANSION, ycoord + BLOCK_EXPANSION));
00203   (*box) += new_coord;
00204 }
00205 
00206 
00212 BOOL8 read_hpd_file(                    
00213                     STRING name,        
00214                     INT32 xsize,        
00215                     INT32 ysize,        
00216                     BLOCK_LIST *blocks  
00217                    ) {
00218   FILE *pdfp;                    
00219   PAGE_BLOCK_LIST *page_blocks;
00220   INT32 block_no;                
00221   BLOCK_IT block_it = blocks;    
00222 
00223   name += HPD_EXT;               
00224   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00225     return FALSE;                
00226   }
00227   fclose(pdfp);
00228   page_blocks = read_poly_blocks (name.string ());
00229   block_no = 0;
00230   scan_hpd_blocks (name.string (), page_blocks, block_no, &block_it);
00231   tprintf ("Text region count=%d\n", block_no);
00232   return TRUE;                   
00233 }
00234 
00235 
00241 void scan_hpd_blocks(                               
00242                      const char *name,              
00243                      PAGE_BLOCK_LIST *page_blocks,  
00244                      INT32 &block_no,               
00245                      BLOCK_IT *block_it             
00246                     ) {
00247   BLOCK *block;                  
00248                                  
00249   PAGE_BLOCK_IT pb_it = page_blocks;
00250   PAGE_BLOCK *current_block;
00251   TEXT_REGION_IT tr_it;
00252   TEXT_BLOCK *tb;
00253   TEXT_REGION *tr;
00254   BOX *block_box;                
00255 
00256   for (pb_it.mark_cycle_pt (); !pb_it.cycled_list (); pb_it.forward ()) {
00257     current_block = pb_it.data ();
00258     if (current_block->type () == PB_TEXT) {
00259       tb = (TEXT_BLOCK *) current_block;
00260       if (!tb->regions ()->empty ()) {
00261         tr_it.set_to_list (tb->regions ());
00262         for (tr_it.mark_cycle_pt ();
00263         !tr_it.cycled_list (); tr_it.forward ()) {
00264           block_no++;
00265           tr = tr_it.data ();
00266           block_box = tr->bounding_box ();
00267           block = new BLOCK (name, TRUE, 0, 0,
00268             block_box->left (), block_box->bottom (),
00269             block_box->right (), block_box->top ());
00270           block->hand_block = tr;
00271           block->hand_poly = tr;
00272           block_it->add_after_then_move (block);
00273         }
00274       }
00275     }
00276     else if (current_block->type () == PB_WEIRD
00277       && !ignore_weird_blocks
00278     && ((WEIRD_BLOCK *) current_block)->id_no () > 0) {
00279       block_no++;
00280       block_box = current_block->bounding_box ();
00281       block = new BLOCK (name, TRUE, 0, 0,
00282         block_box->left (), block_box->bottom (),
00283         block_box->right (), block_box->top ());
00284       block->hand_block = NULL;
00285       block->hand_poly = current_block;
00286       block_it->add_after_then_move (block);
00287     }
00288     if (!current_block->child ()->empty ())
00289       scan_hpd_blocks (name, current_block->child (), block_no, block_it);
00290   }
00291 }
00292 
00293 
00299 BOOL8 read_vec_file(                    
00300                     STRING name,        
00301                     INT32 xsize,        
00302                     INT32 ysize,        
00303                     BLOCK_LIST *blocks  
00304                    ) {
00305   FILE *pdfp;                    
00306   BLOCK *block;                  
00307   INT32 block_no;                
00308   INT32 block_index;             
00309   INT32 vector_count;            
00310   VEC_HEADER header;             
00311   BLOCK_HEADER *vec_blocks;      
00312   VEC_ENTRY *vec_entries;        
00313   BLOCK_IT block_it = blocks;    
00314   ICOORDELT_IT left_it;          
00315   ICOORDELT_IT right_it;
00316 
00317   name += VEC_EXT;               
00318   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00319     return FALSE;                
00320   }
00321   if (fread (&header, sizeof (header), 1, pdfp) != 1)
00322     READFAILED.error ("read_vec_file", EXIT, "Header");
00323                                  
00324   header.filesize = reverse32 (header.filesize);
00325   header.bytesize = reverse16 (header.bytesize);
00326   header.arraysize = reverse16 (header.arraysize);
00327   header.width = reverse16 (header.width);
00328   header.height = reverse16 (header.height);
00329   header.res = reverse16 (header.res);
00330   header.bpp = reverse16 (header.bpp);
00331   tprintf ("%d blocks in %s file:", header.arraysize, VEC_EXT);
00332   vector_count = header.filesize - header.arraysize * sizeof (BLOCK_HEADER);
00333   vector_count /= sizeof (VEC_ENTRY);
00334   vec_blocks =
00335     (BLOCK_HEADER *) alloc_mem (header.arraysize * sizeof (BLOCK_HEADER));
00336   vec_entries = (VEC_ENTRY *) alloc_mem (vector_count * sizeof (VEC_ENTRY));
00337   xsize = header.width;          
00338   ysize = header.height;
00339   if (fread (vec_blocks, sizeof (BLOCK_HEADER), header.arraysize, pdfp)
00340     != static_cast<size_t>(header.arraysize))
00341     READFAILED.error ("read_vec_file", EXIT, "Blocks");
00342   if (fread (vec_entries, sizeof (VEC_ENTRY), vector_count, pdfp)
00343     != static_cast<size_t>(vector_count))
00344     READFAILED.error ("read_vec_file", EXIT, "Vectors");
00345   for (block_index = 0; block_index < header.arraysize; block_index++) {
00346     vec_blocks[block_index].offset =
00347       reverse16 (vec_blocks[block_index].offset);
00348     vec_blocks[block_index].order =
00349       reverse16 (vec_blocks[block_index].order);
00350     vec_blocks[block_index].entries =
00351       reverse16 (vec_blocks[block_index].entries);
00352     vec_blocks[block_index].charsize =
00353       reverse16 (vec_blocks[block_index].charsize);
00354   }
00355   for (block_index = 0; block_index < vector_count; block_index++) {
00356     vec_entries[block_index].start =
00357       ICOORD (reverse16 (vec_entries[block_index].start.x ()),
00358       reverse16 (vec_entries[block_index].start.y ()));
00359     vec_entries[block_index].end =
00360       ICOORD (reverse16 (vec_entries[block_index].end.x ()),
00361       reverse16 (vec_entries[block_index].end.y ()));
00362   }
00363   for (block_no = 1; block_no <= header.arraysize; block_no++) {
00364     for (block_index = 0; block_index < header.arraysize; block_index++) {
00365       if (vec_blocks[block_index].order == block_no
00366       && vec_blocks[block_index].valid) {
00367         block = new BLOCK;
00368         left_it.set_to_list (&block->leftside);
00369         right_it.set_to_list (&block->rightside);
00370         block->box =
00371           convert_vec_block (&vec_entries
00372           [vec_blocks[block_index].offset],
00373           vec_blocks[block_index].entries, ysize,
00374           &left_it, &right_it);
00375         block->set_xheight (vec_blocks[block_index].charsize);
00376                                  
00377         block_it.add_to_end (block);
00378         
00379         
00380         
00381         
00382         
00383         
00384       }
00385     }
00386   }
00387   free_mem(vec_blocks);
00388   free_mem(vec_entries);
00389   tprintf ("%d valid\n", block_it.length ());
00390   fclose(pdfp);
00391   return TRUE;                   
00392 }
00393 
00394 
00400 static BOX convert_vec_block(                        
00401                              VEC_ENTRY *entries,     
00402                              UINT16 entry_count,     
00403                              INT32 ysize,            
00404                              ICOORDELT_IT *left_it,  
00405                              ICOORDELT_IT *right_it) {
00406   BOX block_box;                 
00407   BOX vec_box;                   
00408   ICOORD box_point;              
00409   ICOORD shift_vec;              
00410   ICOORD prev_pt;                
00411   ICOORD end_pt;                 
00412   INT32 vertex_index;            
00413 
00414   for (vertex_index = 0; vertex_index < entry_count; vertex_index++) {
00415     entries[vertex_index].start = ICOORD (entries[vertex_index].start.x (),
00416       ysize - 1 -
00417       entries[vertex_index].start.y ());
00418     entries[vertex_index].end =
00419       ICOORD (entries[vertex_index].end.x (),
00420       ysize - 1 - entries[vertex_index].end.y ());
00421     vec_box = BOX (entries[vertex_index].start, entries[vertex_index].end);
00422     block_box += vec_box;        
00423   }
00424 
00425   for (vertex_index = 0; vertex_index < entry_count
00426     && (entries[vertex_index].start.y () != block_box.bottom ()
00427     || entries[vertex_index].end.y () != block_box.bottom ());
00428     vertex_index++);
00429   ASSERT_HOST (vertex_index < entry_count);
00430   prev_pt = entries[vertex_index].start;
00431   end_pt = entries[vertex_index].end;
00432   do {
00433     for (vertex_index = 0; vertex_index < entry_count
00434       && entries[vertex_index].start != end_pt; vertex_index++);
00435                                  
00436     ASSERT_HOST (vertex_index < entry_count);
00437     box_point = entries[vertex_index].start;
00438     if (box_point.x () <= prev_pt.x ())
00439       shift_vec = ICOORD (-BLOCK_EXPANSION, -BLOCK_EXPANSION);
00440     else
00441       shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00442     left_it->add_to_end (new ICOORDELT (box_point + shift_vec));
00443     prev_pt = box_point;
00444     for (vertex_index = 0; vertex_index < entry_count
00445       && entries[vertex_index].start != end_pt; vertex_index++);
00446                                  
00447     ASSERT_HOST (vertex_index < entry_count);
00448     end_pt = entries[vertex_index].end;
00449   }
00450   while (end_pt.y () < block_box.top ());
00451   shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00452   left_it->add_to_end (new ICOORDELT (end_pt + shift_vec));
00453 
00454   for (vertex_index = 0; vertex_index < entry_count
00455     && (entries[vertex_index].start.y () != block_box.top ()
00456     || entries[vertex_index].end.y () != block_box.top ());
00457     vertex_index++);
00458   ASSERT_HOST (vertex_index < entry_count);
00459   prev_pt = entries[vertex_index].start;
00460   end_pt = entries[vertex_index].end;
00461   do {
00462     for (vertex_index = 0; vertex_index < entry_count
00463       && entries[vertex_index].start != end_pt; vertex_index++);
00464                                  
00465     ASSERT_HOST (vertex_index < entry_count);
00466     box_point = entries[vertex_index].start;
00467     if (box_point.x () < prev_pt.x ())
00468       shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00469     else
00470       shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00471     right_it->add_before_then_move (new ICOORDELT (box_point + shift_vec));
00472     prev_pt = box_point;
00473     for (vertex_index = 0; vertex_index < entry_count
00474       && entries[vertex_index].start != end_pt; vertex_index++);
00475                                  
00476     ASSERT_HOST (vertex_index < entry_count);
00477     end_pt = entries[vertex_index].end;
00478   }
00479   while (end_pt.y () > block_box.bottom ());
00480   shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00481   right_it->add_before_then_move (new ICOORDELT (end_pt + shift_vec));
00482 
00483   shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00484   box_point = block_box.botleft () - shift_vec;
00485   end_pt = block_box.topright () + shift_vec;
00486   return BOX (box_point, end_pt);
00487 }
00488 
00489 
00493 BOOL8 read_unlv_file(                    
00494                      STRING name,        
00495                      INT32 xsize,        
00496                      INT32 ysize,        
00497                      BLOCK_LIST *blocks  
00498                     ) {
00499   FILE *pdfp;                    
00500   BLOCK *block;                  
00501   int x;                         
00502   int y;
00503   int width;                     
00504   int height;
00505   BLOCK_IT block_it = blocks;    
00506 
00507   name += UNLV_EXT;              
00508   if ((pdfp = fopen (name.string (), "r")) == NULL) {
00509     return FALSE;                
00510   }
00511   else {
00512     while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
00513                                  
00514       block = new BLOCK (name.string (), TRUE, 0, 0, (INT16) x, (INT16) (ysize - 1 - y - height), (INT16) (x + width), (INT16) (ysize - 1 - y));
00515                                  
00516       block_it.add_to_end (block);
00517     }
00518     fclose(pdfp);
00519   }
00520   return true;
00521 }