00001
00020 #include "mfcpch.h"
00021 #include <stdlib.h>
00022 #ifdef __UNIX__
00023 #include <assert.h>
00024 #endif
00025 #include "scanutils.h"
00026 #include "fileerr.h"
00027 #include "imgtiff.h"
00028 #include "pdclass.h"
00029 #include "rwpoly.h"
00030 #include "blread.h"
00031
00032 #define PD_EXT ".pd"
00033 #define VEC_EXT ".vec" //accupage file
00034 #define HPD_EXT ".bl" //hand pd file
00035
00036 #define UNLV_EXT ".uzn"
00037 #define BLOCK_EXPANSION 8 //boundary expansion
00038 #define EXTERN
00039
00042 EXTERN BOOL_EVAR (ignore_weird_blocks, TRUE, "Don't read weird blocks");
00045 static BOX convert_vec_block(
00046 VEC_ENTRY *entries,
00047 UINT16 entry_count,
00048 INT32 ysize,
00049 ICOORDELT_IT *left_it,
00050 ICOORDELT_IT *right_it);
00051
00055 BOOL8 read_pd_file(
00056 STRING name,
00057 INT32 xsize,
00058 INT32 ysize,
00059 BLOCK_LIST *blocks
00060 ) {
00061 FILE *pdfp;
00062 BLOCK *block;
00063 INT32 block_count;
00064 INT32 junk_count;
00065 INT32 junks[4];
00066 INT32 vertex_count;
00067 INT32 xcoord;
00068 INT32 ycoord;
00069 INT32 prevx;
00070 INT32 prevy;
00071 BLOCK_IT block_it = blocks;
00072 ICOORDELT_LIST dummy;
00073 ICOORDELT_IT left_it = &dummy;
00074 ICOORDELT_IT right_it = &dummy;
00075
00076 if (read_hpd_file (name, xsize, ysize, blocks))
00077 return TRUE;
00078 if (read_vec_file (name, xsize, ysize, blocks))
00079 return TRUE;
00080 if (read_unlv_file (name, xsize, ysize, blocks))
00081 return TRUE;
00082 name += PD_EXT;
00083 if ((pdfp = fopen (name.string (), "r")) == NULL) {
00084
00085 block = new BLOCK (name.string (), TRUE, 0, 0, 0, 0, xsize, ysize);
00086 block_it.add_to_end (block);
00087 return FALSE;
00088 }
00089 else {
00090 if (fread (&block_count, sizeof (block_count), 1, pdfp) != 1)
00091 READFAILED.error ("read_pd_file", EXIT, "Block count");
00092 tprintf ("%d blocks in .pd file.\n", block_count);
00093 while (block_count > 0) {
00094 if (fread (&junk_count, sizeof (junk_count), 1, pdfp) != 1)
00095 READFAILED.error ("read_pd_file", EXIT, "Junk count");
00096 if (fread (&vertex_count, sizeof (vertex_count), 1, pdfp) != 1)
00097 READFAILED.error ("read_pd_file", EXIT, "Vertex count");
00098 block = new BLOCK;
00099
00100 block_it.add_to_end (block);
00101 left_it.set_to_list (&block->leftside);
00102 right_it.set_to_list (&block->rightside);
00103
00104
00105 get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00106 vertex_count -= 2;
00107 prevx = xcoord;
00108 do {
00109 if (xcoord == prevx) {
00110 if (!right_it.empty ()) {
00111 if (right_it.data ()->x () <= xcoord + BLOCK_EXPANSION)
00112 right_it.data ()->set_y (right_it.data ()->y () +
00113 BLOCK_EXPANSION);
00114 else
00115 right_it.data ()->set_y (right_it.data ()->y () -
00116 BLOCK_EXPANSION);
00117 }
00118 right_it.
00119 add_before_then_move (new
00120 ICOORDELT (xcoord + BLOCK_EXPANSION,
00121 ycoord));
00122 }
00123 prevx = xcoord;
00124 prevy = ycoord;
00125 get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00126 vertex_count -= 2;
00127 }
00128 while (ycoord <= prevy);
00129 right_it.data ()->set_y (right_it.data ()->y () - BLOCK_EXPANSION);
00130
00131
00132 left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION, prevy - BLOCK_EXPANSION));
00133
00134 do {
00135 prevx = xcoord;
00136 get_pd_vertex (pdfp, xsize, ysize, &block->box, xcoord, ycoord);
00137 vertex_count -= 2;
00138 if (xcoord != prevx && vertex_count > 0) {
00139 if (xcoord > prevx)
00140 left_it.
00141 add_to_end (new
00142 ICOORDELT (xcoord - BLOCK_EXPANSION,
00143 ycoord + BLOCK_EXPANSION));
00144 else
00145 left_it.
00146 add_to_end (new
00147 ICOORDELT (xcoord - BLOCK_EXPANSION,
00148 ycoord - BLOCK_EXPANSION));
00149 }
00150 else if (vertex_count == 0)
00151 left_it.add_to_end (new ICOORDELT (prevx - BLOCK_EXPANSION,
00152 ycoord + BLOCK_EXPANSION));
00153 }
00154 while (vertex_count > 0);
00155
00156 while (junk_count > 0) {
00157 if (fread (junks, sizeof (INT32), 4, pdfp) != 4)
00158 READFAILED.error ("read_pd_file", EXIT, "Junk coords");
00159 junk_count--;
00160 }
00161 block_count--;
00162 }
00163 }
00164 fclose(pdfp);
00165 return TRUE;
00166 }
00167
00168
00176 void get_pd_vertex(
00177 FILE *pdfp,
00178 INT32 xsize,
00179 INT32 ysize,
00180 BOX *box,
00181 INT32 &xcoord,
00182 INT32 &ycoord) {
00183 BOX new_coord;
00184
00185
00186 if (fread (&xcoord, sizeof (xcoord), 1, pdfp) != 1)
00187 READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00188 if (fread (&ycoord, sizeof (ycoord), 1, pdfp) != 1)
00189 READFAILED.error ("read_pd_file", EXIT, "Xcoord");
00190 ycoord = ysize - ycoord;
00191 if (xcoord < BLOCK_EXPANSION)
00192 xcoord = BLOCK_EXPANSION;
00193 if (xcoord > xsize - BLOCK_EXPANSION)
00194 xcoord = xsize - BLOCK_EXPANSION;
00195 if (ycoord < BLOCK_EXPANSION)
00196 ycoord = BLOCK_EXPANSION;
00197 if (ycoord > ysize - BLOCK_EXPANSION)
00198 ycoord = ysize - BLOCK_EXPANSION;
00199
00200 new_coord =
00201 BOX (ICOORD (xcoord - BLOCK_EXPANSION, ycoord - BLOCK_EXPANSION),
00202 ICOORD (xcoord + BLOCK_EXPANSION, ycoord + BLOCK_EXPANSION));
00203 (*box) += new_coord;
00204 }
00205
00206
00212 BOOL8 read_hpd_file(
00213 STRING name,
00214 INT32 xsize,
00215 INT32 ysize,
00216 BLOCK_LIST *blocks
00217 ) {
00218 FILE *pdfp;
00219 PAGE_BLOCK_LIST *page_blocks;
00220 INT32 block_no;
00221 BLOCK_IT block_it = blocks;
00222
00223 name += HPD_EXT;
00224 if ((pdfp = fopen (name.string (), "r")) == NULL) {
00225 return FALSE;
00226 }
00227 fclose(pdfp);
00228 page_blocks = read_poly_blocks (name.string ());
00229 block_no = 0;
00230 scan_hpd_blocks (name.string (), page_blocks, block_no, &block_it);
00231 tprintf ("Text region count=%d\n", block_no);
00232 return TRUE;
00233 }
00234
00235
00241 void scan_hpd_blocks(
00242 const char *name,
00243 PAGE_BLOCK_LIST *page_blocks,
00244 INT32 &block_no,
00245 BLOCK_IT *block_it
00246 ) {
00247 BLOCK *block;
00248
00249 PAGE_BLOCK_IT pb_it = page_blocks;
00250 PAGE_BLOCK *current_block;
00251 TEXT_REGION_IT tr_it;
00252 TEXT_BLOCK *tb;
00253 TEXT_REGION *tr;
00254 BOX *block_box;
00255
00256 for (pb_it.mark_cycle_pt (); !pb_it.cycled_list (); pb_it.forward ()) {
00257 current_block = pb_it.data ();
00258 if (current_block->type () == PB_TEXT) {
00259 tb = (TEXT_BLOCK *) current_block;
00260 if (!tb->regions ()->empty ()) {
00261 tr_it.set_to_list (tb->regions ());
00262 for (tr_it.mark_cycle_pt ();
00263 !tr_it.cycled_list (); tr_it.forward ()) {
00264 block_no++;
00265 tr = tr_it.data ();
00266 block_box = tr->bounding_box ();
00267 block = new BLOCK (name, TRUE, 0, 0,
00268 block_box->left (), block_box->bottom (),
00269 block_box->right (), block_box->top ());
00270 block->hand_block = tr;
00271 block->hand_poly = tr;
00272 block_it->add_after_then_move (block);
00273 }
00274 }
00275 }
00276 else if (current_block->type () == PB_WEIRD
00277 && !ignore_weird_blocks
00278 && ((WEIRD_BLOCK *) current_block)->id_no () > 0) {
00279 block_no++;
00280 block_box = current_block->bounding_box ();
00281 block = new BLOCK (name, TRUE, 0, 0,
00282 block_box->left (), block_box->bottom (),
00283 block_box->right (), block_box->top ());
00284 block->hand_block = NULL;
00285 block->hand_poly = current_block;
00286 block_it->add_after_then_move (block);
00287 }
00288 if (!current_block->child ()->empty ())
00289 scan_hpd_blocks (name, current_block->child (), block_no, block_it);
00290 }
00291 }
00292
00293
00299 BOOL8 read_vec_file(
00300 STRING name,
00301 INT32 xsize,
00302 INT32 ysize,
00303 BLOCK_LIST *blocks
00304 ) {
00305 FILE *pdfp;
00306 BLOCK *block;
00307 INT32 block_no;
00308 INT32 block_index;
00309 INT32 vector_count;
00310 VEC_HEADER header;
00311 BLOCK_HEADER *vec_blocks;
00312 VEC_ENTRY *vec_entries;
00313 BLOCK_IT block_it = blocks;
00314 ICOORDELT_IT left_it;
00315 ICOORDELT_IT right_it;
00316
00317 name += VEC_EXT;
00318 if ((pdfp = fopen (name.string (), "r")) == NULL) {
00319 return FALSE;
00320 }
00321 if (fread (&header, sizeof (header), 1, pdfp) != 1)
00322 READFAILED.error ("read_vec_file", EXIT, "Header");
00323
00324 header.filesize = reverse32 (header.filesize);
00325 header.bytesize = reverse16 (header.bytesize);
00326 header.arraysize = reverse16 (header.arraysize);
00327 header.width = reverse16 (header.width);
00328 header.height = reverse16 (header.height);
00329 header.res = reverse16 (header.res);
00330 header.bpp = reverse16 (header.bpp);
00331 tprintf ("%d blocks in %s file:", header.arraysize, VEC_EXT);
00332 vector_count = header.filesize - header.arraysize * sizeof (BLOCK_HEADER);
00333 vector_count /= sizeof (VEC_ENTRY);
00334 vec_blocks =
00335 (BLOCK_HEADER *) alloc_mem (header.arraysize * sizeof (BLOCK_HEADER));
00336 vec_entries = (VEC_ENTRY *) alloc_mem (vector_count * sizeof (VEC_ENTRY));
00337 xsize = header.width;
00338 ysize = header.height;
00339 if (fread (vec_blocks, sizeof (BLOCK_HEADER), header.arraysize, pdfp)
00340 != static_cast<size_t>(header.arraysize))
00341 READFAILED.error ("read_vec_file", EXIT, "Blocks");
00342 if (fread (vec_entries, sizeof (VEC_ENTRY), vector_count, pdfp)
00343 != static_cast<size_t>(vector_count))
00344 READFAILED.error ("read_vec_file", EXIT, "Vectors");
00345 for (block_index = 0; block_index < header.arraysize; block_index++) {
00346 vec_blocks[block_index].offset =
00347 reverse16 (vec_blocks[block_index].offset);
00348 vec_blocks[block_index].order =
00349 reverse16 (vec_blocks[block_index].order);
00350 vec_blocks[block_index].entries =
00351 reverse16 (vec_blocks[block_index].entries);
00352 vec_blocks[block_index].charsize =
00353 reverse16 (vec_blocks[block_index].charsize);
00354 }
00355 for (block_index = 0; block_index < vector_count; block_index++) {
00356 vec_entries[block_index].start =
00357 ICOORD (reverse16 (vec_entries[block_index].start.x ()),
00358 reverse16 (vec_entries[block_index].start.y ()));
00359 vec_entries[block_index].end =
00360 ICOORD (reverse16 (vec_entries[block_index].end.x ()),
00361 reverse16 (vec_entries[block_index].end.y ()));
00362 }
00363 for (block_no = 1; block_no <= header.arraysize; block_no++) {
00364 for (block_index = 0; block_index < header.arraysize; block_index++) {
00365 if (vec_blocks[block_index].order == block_no
00366 && vec_blocks[block_index].valid) {
00367 block = new BLOCK;
00368 left_it.set_to_list (&block->leftside);
00369 right_it.set_to_list (&block->rightside);
00370 block->box =
00371 convert_vec_block (&vec_entries
00372 [vec_blocks[block_index].offset],
00373 vec_blocks[block_index].entries, ysize,
00374 &left_it, &right_it);
00375 block->set_xheight (vec_blocks[block_index].charsize);
00376
00377 block_it.add_to_end (block);
00378
00379
00380
00381
00382
00383
00384 }
00385 }
00386 }
00387 free_mem(vec_blocks);
00388 free_mem(vec_entries);
00389 tprintf ("%d valid\n", block_it.length ());
00390 fclose(pdfp);
00391 return TRUE;
00392 }
00393
00394
00400 static BOX convert_vec_block(
00401 VEC_ENTRY *entries,
00402 UINT16 entry_count,
00403 INT32 ysize,
00404 ICOORDELT_IT *left_it,
00405 ICOORDELT_IT *right_it) {
00406 BOX block_box;
00407 BOX vec_box;
00408 ICOORD box_point;
00409 ICOORD shift_vec;
00410 ICOORD prev_pt;
00411 ICOORD end_pt;
00412 INT32 vertex_index;
00413
00414 for (vertex_index = 0; vertex_index < entry_count; vertex_index++) {
00415 entries[vertex_index].start = ICOORD (entries[vertex_index].start.x (),
00416 ysize - 1 -
00417 entries[vertex_index].start.y ());
00418 entries[vertex_index].end =
00419 ICOORD (entries[vertex_index].end.x (),
00420 ysize - 1 - entries[vertex_index].end.y ());
00421 vec_box = BOX (entries[vertex_index].start, entries[vertex_index].end);
00422 block_box += vec_box;
00423 }
00424
00425 for (vertex_index = 0; vertex_index < entry_count
00426 && (entries[vertex_index].start.y () != block_box.bottom ()
00427 || entries[vertex_index].end.y () != block_box.bottom ());
00428 vertex_index++);
00429 ASSERT_HOST (vertex_index < entry_count);
00430 prev_pt = entries[vertex_index].start;
00431 end_pt = entries[vertex_index].end;
00432 do {
00433 for (vertex_index = 0; vertex_index < entry_count
00434 && entries[vertex_index].start != end_pt; vertex_index++);
00435
00436 ASSERT_HOST (vertex_index < entry_count);
00437 box_point = entries[vertex_index].start;
00438 if (box_point.x () <= prev_pt.x ())
00439 shift_vec = ICOORD (-BLOCK_EXPANSION, -BLOCK_EXPANSION);
00440 else
00441 shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00442 left_it->add_to_end (new ICOORDELT (box_point + shift_vec));
00443 prev_pt = box_point;
00444 for (vertex_index = 0; vertex_index < entry_count
00445 && entries[vertex_index].start != end_pt; vertex_index++);
00446
00447 ASSERT_HOST (vertex_index < entry_count);
00448 end_pt = entries[vertex_index].end;
00449 }
00450 while (end_pt.y () < block_box.top ());
00451 shift_vec = ICOORD (-BLOCK_EXPANSION, BLOCK_EXPANSION);
00452 left_it->add_to_end (new ICOORDELT (end_pt + shift_vec));
00453
00454 for (vertex_index = 0; vertex_index < entry_count
00455 && (entries[vertex_index].start.y () != block_box.top ()
00456 || entries[vertex_index].end.y () != block_box.top ());
00457 vertex_index++);
00458 ASSERT_HOST (vertex_index < entry_count);
00459 prev_pt = entries[vertex_index].start;
00460 end_pt = entries[vertex_index].end;
00461 do {
00462 for (vertex_index = 0; vertex_index < entry_count
00463 && entries[vertex_index].start != end_pt; vertex_index++);
00464
00465 ASSERT_HOST (vertex_index < entry_count);
00466 box_point = entries[vertex_index].start;
00467 if (box_point.x () < prev_pt.x ())
00468 shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00469 else
00470 shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00471 right_it->add_before_then_move (new ICOORDELT (box_point + shift_vec));
00472 prev_pt = box_point;
00473 for (vertex_index = 0; vertex_index < entry_count
00474 && entries[vertex_index].start != end_pt; vertex_index++);
00475
00476 ASSERT_HOST (vertex_index < entry_count);
00477 end_pt = entries[vertex_index].end;
00478 }
00479 while (end_pt.y () > block_box.bottom ());
00480 shift_vec = ICOORD (BLOCK_EXPANSION, -BLOCK_EXPANSION);
00481 right_it->add_before_then_move (new ICOORDELT (end_pt + shift_vec));
00482
00483 shift_vec = ICOORD (BLOCK_EXPANSION, BLOCK_EXPANSION);
00484 box_point = block_box.botleft () - shift_vec;
00485 end_pt = block_box.topright () + shift_vec;
00486 return BOX (box_point, end_pt);
00487 }
00488
00489
00493 BOOL8 read_unlv_file(
00494 STRING name,
00495 INT32 xsize,
00496 INT32 ysize,
00497 BLOCK_LIST *blocks
00498 ) {
00499 FILE *pdfp;
00500 BLOCK *block;
00501 int x;
00502 int y;
00503 int width;
00504 int height;
00505 BLOCK_IT block_it = blocks;
00506
00507 name += UNLV_EXT;
00508 if ((pdfp = fopen (name.string (), "r")) == NULL) {
00509 return FALSE;
00510 }
00511 else {
00512 while (fscanf (pdfp, "%d %d %d %d %*s", &x, &y, &width, &height) >= 4) {
00513
00514 block = new BLOCK (name.string (), TRUE, 0, 0, (INT16) x, (INT16) (ysize - 1 - y - height), (INT16) (x + width), (INT16) (ysize - 1 - y));
00515
00516 block_it.add_to_end (block);
00517 }
00518 fclose(pdfp);
00519 }
00520 return true;
00521 }