ccutil/ocrclass.h

Go to the documentation of this file.
00001 
00025 #ifndef           OCRCLASS_H
00026 #define           OCRCLASS_H
00027 
00028 #include          <time.h>
00029 #ifdef __MSW32__
00030 #include          <windows.h>
00031 #endif
00032 #include          "host.h"
00033 
00034 /*Maximum lengths of various strings*/
00035 #define MAX_FONT_NAME   34       /* name of font */
00036 #define MAX_OCR_NAME    32       /* name of engine */
00037 #define MAX_OCR_VERSION   17     /* version code of engine */
00038 
00039 /*Image parameters*/
00040 #define MIN_IMAGE_SIZE    64     /* smallest image that will be passed */
00041 #define IMAGE_ROUNDING    32     /* all sizes are multiple of this */
00042 
00043 #if defined(__SLOW_TIMES__)
00044 /*Maximum timeouts of various functions (in secs)*/
00045 #define STARTUP_TIMEOUT   100    /* start of OCR engine */
00046 #define SHUTDOWN_TIMEOUT  50     /* end of OCR engine */
00047 #define SENDIM_TIMEOUT    50     /* send of image */
00048 #define RELEASE_TIMEOUT   50     /* release of semaphore */
00049 #define READIM_TIMEOUT    100    /* read of image */
00050 #define READTEXT_TIMEOUT  50     /* read of text */
00051 #define PROGRESS_TIMEOUT  30     /* progress every 3 seconds */
00052 #define BADTIMES_TIMEOUT  7      /* max lack of progress */
00053 #else
00054 /*Maximum timeouts of various functions (in secs)*/
00055 #define STARTUP_TIMEOUT   10     /* start of OCR engine */
00056 #define SHUTDOWN_TIMEOUT  6      /* end of OCR engine */
00057 #define SENDIM_TIMEOUT    5      /* send of image */
00058 #define RELEASE_TIMEOUT   5      /* release of semaphore */
00059 #define READIM_TIMEOUT    10     /* read of image */
00060 #define READTEXT_TIMEOUT  5      /* read of text */
00061 #define PROGRESS_TIMEOUT  3      /* progress every 3 seconds */
00062 #define BADTIMES_TIMEOUT  7      /* max lack of progress */
00063 #endif
00064 
00065 /*language definitions are identical to RTF*/
00066 #define LANGE_NONE      0x0400   /*no language */
00067 #define LANGE_ALBANIAN    0x041c /*Albanian */
00068 #define LANGE_BRITISH   0x0809   /*International English */
00069 #define LANGE_BULGARIAN   0x0402 /*Bulgarian */
00070 #define LANGE_CROATIAN    0x041a /*Croatian(latin alphabet) */
00071 #define LANGE_CZECH     0x0405   /*Czech */
00072 #define LANGE_DANISH    0x0406   /*Danish */
00073 #define LANGE_DUTCH     0x0413   /*Dutch */
00074 #define LANGE_FINNISH   0x040b   /*Finnish */
00075 #define LANGE_FRENCH    0x040c   /*French */
00076 #define LANGE_GERMAN    0x0407   /*German */
00077 #define LANGE_GREEK     0x0408   /*Greek */
00078 #define LANGE_HUNGARIAN   0x040e /*Hungarian */
00079 #define LANGE_ITALIAN   0x0410   /*Italian */
00080 #define LANGE_JAPANESE    0x0411 /*Japanese */
00081 #define LANGE_KOREAN    0x0412   /*Korean */
00082 #define LANGE_NORWEGIAN   0x0414 /*Bokmal */
00083 #define LANGE_POLISH    0x0415   /*Polish */
00084 #define LANGE_PORTUGESE   0x0416 /*Brazilian Portugese */
00085 #define LANGE_ROMANIAN    0x0418 /*Romanian */
00086 #define LANGE_RUSSIAN   0x0419   /*Russian */
00087 #define LANGE_SCHINESE    0x0804 /*Simplified Chinese */
00088 #define LANGE_SLOVAK    0x041b   /*Slovak */
00089 #define LANGE_SPANISH   0x040a   /*Castilian */
00090 #define LANGE_SWEDISH   0x041d   /*Swedish */
00091 #define LANGE_TCHINESE    0x0404 /*Traditional Chinese */
00092 #define LANGE_TURKISH   0x041f   /*Turkish */
00093 #define LANGE_USENGLISH   0x0409 /*American */
00094 
00095 /*font family definitions are identical to RTF*/
00096 #define FFAM_NONE     0          /* unknown */
00097 #define FFAM_ROMAN      1        /* serifed prop */
00098 #define FFAM_SWISS      2        /* sans-serif prop */
00099 #define FFAM_MODERN     3        /* fixed pitch */
00100 
00101 /*character set definitions are identical to RTF*/
00102 #define CHSET_ANSI      0        /* Ansi efigs */
00103 #define CHSET_SHIFT_JIS   128    /* JIS X 0208-1990 */
00104 #define CHSET_KOREAN    129      /* KS C 5601-1992 */
00105 #define CHSET_SCHINESE    134    /* GB 2312-80 */
00106 #define CHSET_BIG5      136      /* Big Five */
00107 #define CHSET_CYRILLIC    204    /* Cyrillic */
00108 #define CHSET_EEUROPE   238      /* Eastern Europe */
00109 
00110 /*pitch set definitions are identical to RTF*/
00111 #define PITCH_DEF     0          /* default */
00112 #define PITCH_FIXED     1        /* fixed pitch */
00113 #define PITCH_VAR     2          /* variable pitch */
00114 
00115 /*Bitmasks for character enhancements.
00116 OR these together for enhancement in ocr_append_char*/
00117 #define EUC_BOLD      1          /* bold character */
00118 #define EUC_ITALIC      2        /* italic char */
00119 #define EUC_UNDERLINE   4        /* underlined char */
00120 #define EUC_SUBSCRIPT   8        /* subscript char */
00121 #define EUC_SUPERSCRIPT   16     /* superscript char */
00122 
00127 enum OCR_CHAR_DIRECTION
00128 {
00129   OCR_CDIR_RIGHT_LEFT,           /* right to left horizontal */
00130   OCR_CDIR_LEFT_RIGHT,           /* left to right horizontal */
00131   OCR_CDIR_TOP_BOTTOM,           /* top to bottom vertical */
00132   OCR_CDIR_BOTTOM_TOP            /* bottom to top vertical */
00133 };
00134 
00139 enum OCR_LINE_DIRECTION
00140 {
00141   OCR_LDIR_DOWN_RIGHT,           /* horizontal lines go down */
00142   /*vertical lines go right */
00143   OCR_LDIR_UP_LEFT               /* horizontal lines go up */
00144 };
00145 
00150 enum OCR_NEWLINE_TYPE
00151 {
00152   OCR_NL_NONE,                   // not a newline
00153   OCR_NL_NEWLINE,                // this is a newline but not new para
00154   OCR_NL_NEWPARA                 // this is a newline and a new para
00155 };
00156 
00157 /*error codes that can be returned from the API functions other than OKAY and HPERR*/
00158 #define OCR_API_NO_MEM    (-2)   /*filled output buffer */
00159 #define OCR_API_BAD_CHAR  (-3)   /*whitespace sent to ocr_append_char */
00160 #define OCR_API_BAD_STATE (-4)   /*invalid call sequence */
00161 
00166 enum OCR_ERR_CODE
00167 {
00168   OCR_ERR_NONE,                  /* no error */
00169   OCR_ERR_CLEAN_EXIT,            /* no error */
00170   OCR_ERR_NO_MEM,                /* out of memory */
00171   OCR_ERR_FILE_READ,             /* failed to read data file */
00172   OCR_ERR_TMP_WRITE,             /* failed to write temp file */
00173   OCR_ERR_TMP_READ,              /* failed to read temp file */
00174   OCR_ERR_BAD_DLL,               /* missing or invalid dll subcomponent */
00175   OCR_ERR_BAD_EXE,               /* missing or invalid exe subcomponent */
00176   OCR_ERR_BAD_LOAD,              /* failed to load subcomponent */
00177   OCR_ERR_BAD_LANG,              /* unable to recognize requested language */
00178   OCR_ERR_BAD_STATE,             /* engine did call out of sequence */
00179   OCR_ERR_INTERNAL1,             /* internal error type 1 */
00180   OCR_ERR_INTERNAL2,             /* internal error type 1 */
00181   OCR_ERR_INTERNAL3,             /* internal error type 1 */
00182   OCR_ERR_INTERNAL4,             /* internal error type 1 */
00183   OCR_ERR_INTERNAL5,             /* internal error type 1 */
00184   OCR_ERR_INTERNAL6,             /* internal error type 1 */
00185   OCR_ERR_INTERNAL7,             /* internal error type 1 */
00186   OCR_ERR_INTERNAL8,             /* internal error type 1 */
00187   OCR_ERR_TIMEOUT                /* timed out in comms */
00188 };                               /*for calls to ocr_error */
00189 
00198 typedef struct                   /*font description */
00199 {
00200   UINT16 language;               /* default language */
00201   UINT8 font_family;             /* serif/not, fixed/not */
00202   UINT8 char_set;                /* character set standard */
00203   UINT8 pitch;                   /* fixed or prop */
00204   INT8 name[MAX_FONT_NAME + 1];  /* plain ascii name */
00205 } EFONT_DESC;                    /* font description */
00206 
00216 typedef struct                   /*startup info */
00217 {
00218   INT32 protocol;                /* interface version */
00219   UINT32 font_count;             /* number of fonts */
00220   UINT16 language;               /* default language */
00221   UINT16 name[MAX_OCR_NAME + 1]; /* name of engine */
00222                                  /* version of engine */
00223   UINT16 version[MAX_OCR_VERSION + 1];
00224   EFONT_DESC fonts[1];           /* array of fonts */
00225 } EOCR_DESC;                     /*startup info */
00226 
00238 typedef struct                   /*bitmap strip */
00239 {
00240   INT16 x_size;                  /* width in pixels */
00241   INT16 y_size;                  /* of full image */
00242   INT16 strip_size;              /* of this strip */
00243   INT16 resolution;              /* pixels per inch */
00244   UINT8 data[8];                 /* image data */
00245 } ESTRIP_DESC;                   /* bitmap strip */
00246 
00276 typedef struct                   /*single character */
00277 {
00278   UINT16 char_code;              /* character itself */
00279   INT16 left;                    /* of char (-1) */
00280   INT16 right;                   /* of char (-1) */
00281   INT16 top;                     /* of char (-1) */
00282   INT16 bottom;                  /* of char (-1) */
00283   INT16 font_index;              /* what font (0) */
00284   UINT8 confidence;              /* 0=perfect, 100=reject (0/100) */
00285   UINT8 point_size;              /* of char, 72=i inch, (10) */
00286   INT8 blanks;                   /* no of spaces before this char (1) */
00287   UINT8 formatting;              /* char formatting (0) */
00288 } EANYCODE_CHAR;                 /*single character */
00289 
00290 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
00310 typedef struct                   /*output header */
00311 {
00312   INT16 count;                   /* chars in this buffer(0) */
00313   INT16 progress;                /* percent complete increasing (0-100) */
00314   INT8 more_to_come;             /* true if not last */
00315   INT8 ocr_alive;                /* ocr sets to 1, HP 0 */
00316   INT8 err_code;                 /* for errcode use */
00317   CANCEL_FUNC cancel;            /*returns true to cancel */
00318   void* cancel_this;             /*this or other data for cancel*/
00319   clock_t end_time;              /*time to stop if not 0*/
00320   EANYCODE_CHAR text[1];         /*character data */
00321 } ETEXT_DESC;                    /*output header */
00322 
00323 #ifdef __MSW32__
00324 
00329 typedef struct                   /*shared mem info */
00330 {
00331   HANDLE shm_hand;               /*handle to shm */
00332   HANDLE mutex;                  /*alive check */
00333   HANDLE ocr_sem;                /*ocr semaphore */
00334   HANDLE hp_sem;                 /*hp semaphore */
00335   void *shm_mem;                 /*shared memory */
00336   INT32 shm_size;                /*size of shm */
00337 } ESHM_INFO;                     /*shared mem info */
00338 #elif defined (__MAC__)
00339 
00344 typedef struct                   /*shared mem info */
00345 {
00346   Boolean mutex;                 /*alive check */
00347   Boolean ocr_sem;               /*ocr semaphore */
00348   Boolean hp_sem;                /*hp semaphore */
00349   void *shm_mem;                 /*shared memory */
00350   INT32 shm_size;                /*size of shm */
00351   INT16 language;
00352 
00353   // Process management information follows:
00354   ProcessSerialNumber IPEProcess;
00355   ProcessSerialNumber OCRProcess;
00356 } ESHM_INFO;
00357 #elif defined (__UNIX__)
00358 
00367 typedef struct
00368 {
00369   void *shm_mem;                 /* shared memory */
00370   INT32 shm_size;                /* size of shm */
00371 } ESHM_INFO;
00372 #endif
00373 #endif

Generated on Wed Feb 28 19:49:09 2007 for Tesseract by  doxygen 1.5.1