00001
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include "mfcpch.h"
00051 #include "applybox.h"
00052 #include "control.h"
00053 #include "tessvars.h"
00054 #include "tessedit.h"
00055 #include "baseapi.h"
00056 #include "pageres.h"
00057 #include "imgs.h"
00058 #include "varabled.h"
00059 #include "tprintf.h"
00060 #include "tesseractmain.h"
00061 #include "stderr.h"
00062 #include "notdll.h"
00063 #include "mainblk.h"
00064 #include "globals.h"
00065 #include "tfacep.h"
00066 #include "callnet.h"
00067
00069 #define VARDIR "configs/"
00071 #define API_CONFIG "configs/api_config"
00072
00073 #define EXTERN
00074
00076 EXTERN BOOL_VAR (tessedit_read_image, TRUE, "Ensure the image is read");
00077 EXTERN BOOL_VAR (tessedit_write_images, FALSE,
00078 "Capture the image from the IPE");
00079 EXTERN BOOL_VAR (tessedit_debug_to_screen, FALSE, "Dont use debug file");
00082 extern INT16 XOFFSET;
00083 extern INT16 YOFFSET;
00084 extern int NO_BLOCK;
00085
00086 const ERRCODE USAGE = "Usage";
00088 char szAppName[] = "Tessedit";
00089
00094 #ifndef GRAPHICS_DISABLED
00095 int main(int argc, char **argv) {
00096 STRING outfile;
00097
00098 if (argc < 3) {
00099 USAGE.error (argv[0], EXIT,
00100 "%s imagename outputbase [configfile [[+|-]varfile]...]\n", argv[0]);
00101 }
00102
00103 if (argc == 3)
00104 TessBaseAPI::Init(argv[0], argv[1], NULL, false, 0, argv + 2);
00105 else
00106 TessBaseAPI::Init(argv[0], argv[1], argv[3], false, argc - 4, argv + 4);
00107
00108 tprintf ("Tesseract Open Source OCR Engine\n");
00109
00110 IMAGE image;
00111 #ifdef _TIFFIO_
00112 TIFF* tif = TIFFOpen(argv[1], "r");
00113 if (tif) {
00114 read_tiff_image(tif, &image);
00115 TIFFClose(tif);
00116 } else {
00117 READFAILED.error (argv[0], EXIT, argv[1]);
00118 }
00119 #else
00120 if (image.read_header(argv[1]) < 0)
00121 READFAILED.error (argv[0], EXIT, argv[1]);
00122 if (image.read(image.get_ysize ()) < 0) {
00123 MEMORY_OUT.error(argv[0], EXIT, "Read of image %s",
00124 argv[1]);
00125 }
00126 #endif // _TIFFIO_
00127 int bytes_per_line = check_legal_image_size(image.get_xsize(),
00128 image.get_ysize(),
00129 image.get_bpp());
00130 char* text = TessBaseAPI::TesseractRect(image.get_buffer(), image.get_bpp()/8,
00131 bytes_per_line, 0, 0,
00132 image.get_xsize(), image.get_ysize());
00133 outfile = argv[2];
00134 outfile += ".txt";
00135 FILE* fp = fopen(outfile.string(), "w");
00136 if (fp != NULL) {
00137 fwrite(text, 1, strlen(text), fp);
00138 fclose(fp);
00139 }
00140 delete [] text;
00141 TessBaseAPI::End();
00142
00143 return 0;
00144 }
00145 #else // GRAPHICS_DISABLED
00146
00147 int main(int argc, char **argv) {
00148 UINT16 lang;
00149 STRING pagefile;
00150
00151 if (argc < 4) {
00152 USAGE.error (argv[0], EXIT,
00153 "%s imagename outputbase configfile [[+|-]varfile]...\n", argv[0]);
00154 }
00155
00156 time_t t_start = time(NULL);
00157
00158 init_tessembedded (argv[0], argv[2], argv[3], argc - 4, argv + 4);
00159
00160 tprintf ("Tesseract Open Source OCR Engine (graphics disabled)\n");
00161
00162 if (tessedit_read_image) {
00163 #ifdef _TIFFIO_
00164 TIFF* tif = TIFFOpen(argv[1], "r");
00165 if (tif) {
00166 read_tiff_image(tif);
00167 TIFFClose(tif);
00168 } else
00169 READFAILED.error (argv[0], EXIT, argv[1]);
00170
00171 #else
00172 if (page_image.read_header (argv[1]) < 0)
00173 READFAILED.error (argv[0], EXIT, argv[1]);
00174 if (page_image.read (page_image.get_ysize ()) < 0) {
00175 MEMORY_OUT.error (argv[0], EXIT, "Read of image %s",
00176 argv[1]);
00177 }
00178 #endif // _TIFFIO_
00179 }
00180
00181 pagefile = argv[1];
00182
00183 BLOCK_LIST current_block_list;
00184 tessembedded_read_file(pagefile, ¤t_block_list);
00185 tprintf ("Done reading files.\n");
00186
00187 PAGE_RES page_res(¤t_block_list);
00188
00189 recog_all_words(&page_res, NULL);
00190
00191 current_block_list.clear();
00192 ResetAdaptiveClassifier();
00193
00194 time_t t_end = time(NULL);
00195 double secs = difftime(t_end, t_start);
00196 tprintf ("Done. Number of seconds: %d\n", (int)secs);
00197 return 0;
00198 }
00199
00200 #endif // not def GRAPHICS_DISABLED
00201
00202 int initialized = 0;
00203
00204 #ifdef __MSW32__
00205
00214 int WINAPI WinMain(
00215 HINSTANCE hInstance,
00216 HINSTANCE hPrevInstance,
00217 LPSTR lpszCmdLine,
00218 int nCmdShow) {
00219 WNDCLASS wc;
00220 HWND hwnd;
00221 MSG msg;
00222
00223 char **argv;
00224 char *argsin[2];
00225 int argc;
00226 int exit_code;
00227
00228 wc.style = CS_NOCLOSE | CS_OWNDC;
00229 wc.lpfnWndProc = (WNDPROC) WndProc;
00230 wc.cbClsExtra = 0;
00231 wc.cbWndExtra = 0;
00232 wc.hInstance = hInstance;
00233 wc.hIcon = NULL;
00234 wc.hCursor = NULL;
00235 wc.hbrBackground = (HBRUSH) (COLOR_WINDOW + 1);
00236 wc.lpszMenuName = NULL;
00237 wc.lpszClassName = szAppName;
00238
00239 RegisterClass(&wc);
00240
00241 hwnd = CreateWindow (szAppName, szAppName,
00242 WS_OVERLAPPEDWINDOW | WS_DISABLED,
00243 CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT,
00244 CW_USEDEFAULT, HWND_DESKTOP, NULL, hInstance, NULL);
00245
00246 argsin[0] = strdup (szAppName);
00247 argsin[1] = strdup (lpszCmdLine);
00248
00249
00250 argv =
00251 (char **) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) *
00252 sizeof (char *));
00253
00254
00255 argc = parse_args (2, argsin, argv);
00256
00257
00258
00259
00260 if (initialized) {
00261 exit_code = main (argc, argv);
00262 free (argsin[0]);
00263 free (argsin[1]);
00264 free(argv);
00265 return exit_code;
00266 }
00267 while (GetMessage (&msg, NULL, 0, 0)) {
00268 TranslateMessage(&msg);
00269 DispatchMessage(&msg);
00270 if (initialized) {
00271 exit_code = main (argc, argv);
00272 break;
00273 }
00274 else
00275 exit_code = msg.wParam;
00276 }
00277 free (argsin[0]);
00278 free (argsin[1]);
00279 free(argv);
00280 return exit_code;
00281 }
00282
00283
00292 LONG WINAPI WndProc(
00293 HWND hwnd,
00294 UINT msg,
00295 WPARAM wParam,
00296 LPARAM lParam) {
00297 HDC hdc;
00298
00299 if (msg == WM_CREATE) {
00300
00301
00302
00303 hdc = GetDC (hwnd);
00304 ReleaseDC(hwnd, hdc);
00305 initialized = 1;
00306 return 0;
00307 }
00308 return DefWindowProc (hwnd, msg, wParam, lParam);
00309 }
00310
00311
00321 int
00322 parse_args (
00323 int argc,
00324 char *argv[],
00325 char *arglist[]
00326 ) {
00327 int argcount;
00328 char *testchar;
00329 int arg;
00330
00331 argcount = 0;
00332 for (arg = 0; arg < argc; arg++) {
00333 testchar = argv[arg];
00334 do {
00335 while (*testchar
00336 && (*testchar == ' ' || *testchar == '\n'
00337 || *testchar == '\t'))
00338 testchar++;
00339 if (*testchar) {
00340 arglist[argcount++] = testchar;
00341
00342 for (testchar++; *testchar && *testchar != ' ' && *testchar != '\n' &&
00343 *testchar != '\t'; testchar++);
00344 if (*testchar)
00345 *testchar++ = '\0';
00346 }
00347 }
00348 while (*testchar);
00349 }
00350 return argcount;
00351 }
00352 #endif