00001
00020 #include "mfcpch.h"
00021
00022
00023
00024
00025 #include "tfacep.h"
00026
00027 #include "stderr.h"
00028 #include "basedir.h"
00029 #include "tessvars.h"
00030
00031
00032 #include "control.h"
00033 #include "imgs.h"
00034 #include "reject.h"
00035 #include "pageres.h"
00036
00037 #include "mainblk.h"
00038 #include "nwmain.h"
00039 #include "pgedit.h"
00040 #include "varabled.h"
00041 #include "ocrshell.h"
00042 #include "tprintf.h"
00043
00044
00045 #include "tessedit.h"
00046
00047 #include "applybox.h"
00048 #include "permute.h"
00049 #include "permdawg.h"
00050 #include "permnum.h"
00051 #include "stopper.h"
00052 #include "adaptmatch.h"
00053 #include "intmatcher.h"
00054 #include "chop.h"
00055 #include "globals.h"
00056
00057 #include "fileerr.h"
00058 #ifdef __UNIX__
00059 #include <unistd.h>
00060 #include <signal.h>
00061 #else
00062 #include <io.h>
00063 #endif
00064 #include <stdlib.h>
00065 #include "basedir.h"
00066 #include "mainblk.h"
00067
00068
00069 #include "callnet.h"
00070
00071 #include "notdll.h"
00072
00073 #define VARDIR "configs/" //variables files
00074 #define API_CONFIG "configs/api_config" //config under api
00075 #define EXTERN
00076
00077
00078 #define BLOB_MATCHING_ON
00079 #define VARDIR "configs/" //variables files
00080
00081 EXTERN DLLSYM STRING datadir;
00082 EXTERN DLLSYM STRING imagebasename;
00083 const ERRCODE NO_PATH =
00084 "Warning:explicit path for executable will not be used for configs";
00085 static const ERRCODE USAGE = "Usage";
00086
00089 EXTERN BOOL_EVAR (tessedit_write_vars, FALSE, "Write all vars to file");
00090 EXTERN BOOL_VAR (tessedit_tweaking_tess_vars, FALSE,
00091 "Fiddle tess config values");
00092 BOOL_VAR(tessedit_resegment_from_boxes, FALSE,
00093 "Take segmentation and labeling from box file");
00094 BOOL_VAR(tessedit_train_from_boxes, FALSE,
00095 "Generate training data from boxed chars");
00096
00097 EXTERN INT_VAR (tweak_ReliableConfigThreshold, 2, "Tess VAR");
00098
00099 EXTERN double_VAR (tweak_garbage, 1.5, "Tess VAR");
00100 EXTERN double_VAR (tweak_ok_word, 1.25, "Tess VAR");
00101 EXTERN double_VAR (tweak_good_word, 1.1, "Tess VAR");
00102 EXTERN double_VAR (tweak_freq_word, 1.0, "Tess VAR");
00103 EXTERN double_VAR (tweak_ok_number, 1.4, "Tess VAR");
00104 EXTERN double_VAR (tweak_good_number, 1.1, "Tess VAR");
00105 EXTERN double_VAR (tweak_non_word, 1.25, "Tess VAR");
00106 EXTERN double_VAR (tweak_CertaintyPerChar, -0.5, "Tess VAR");
00107 EXTERN double_VAR (tweak_NonDictCertainty, -2.5, "Tess VAR");
00108 EXTERN double_VAR (tweak_RejectCertaintyOffset, 1.0, "Tess VAR");
00109 EXTERN double_VAR (tweak_GoodAdaptiveMatch, 0.125, "Tess VAR");
00110 EXTERN double_VAR (tweak_GreatAdaptiveMatch, 0.10, "Tess VAR");
00111 EXTERN INT_VAR (tweak_AdaptProtoThresh, 230, "Tess VAR");
00112 EXTERN INT_VAR (tweak_AdaptFeatureThresh, 230, "Tess VAR");
00113 EXTERN INT_VAR (tweak_min_outline_points, 6, "Tess VAR");
00114 EXTERN INT_VAR (tweak_min_outline_area, 2000, "Tess VAR");
00115 EXTERN double_VAR (tweak_good_split, 50.0, "Tess VAR");
00116 EXTERN double_VAR (tweak_ok_split, 100.0, "Tess VAR");
00117
00118
00119 EXTERN BOOL_VAR (m_print_variables, FALSE,
00120 "Print initial values of all variables");
00121 EXTERN STRING_VAR (m_data_sub_dir, "tessdata/", "Directory for data files");
00122 EXTERN INT_VAR (memgrab_size, 13000000, "Preallocation size for batch use");
00125 extern INT16 XOFFSET;
00126 extern INT16 YOFFSET;
00127 extern int NO_BLOCK;
00128
00130 ETEXT_DESC *global_monitor = NULL;
00131
00144 int init_tesseract(const char *arg0,
00145 const char *textbase,
00146 const char *configfile,
00147 int configc,
00148 const char *const *configv) {
00149 FILE *var_file;
00150 static char c_path[MAX_PATH];
00151
00152
00153
00154
00155 INT32 arg;
00156 INT32 offset;
00157 FILE *fp;
00158 char flag[2];
00159 STRING varfile;
00160
00161 imagebasename = textbase;
00162 if (getpath (arg0, datadir) < 0)
00163 #ifdef __UNIX__
00164 CANTOPENFILE.error ("main", ABORT, "%s to get path", arg0[0]);
00165 #else
00166 NO_PATH.error ("main", DBG, NULL);
00167 #endif
00168
00169 for (arg = 0; arg < configc; arg++) {
00170 if (configv[arg][0] == '+' || configv[arg][0] == '-') {
00171 offset = 1;
00172 flag[0] = configv[arg][0];
00173 }
00174 else {
00175 offset = 0;
00176 }
00177 flag[offset] = '\0';
00178 varfile = flag;
00179
00180 fp = fopen (configv[arg] + offset, "r");
00181 if (fp != NULL) {
00182 fclose(fp);
00183 }
00184 else {
00185 varfile += datadir;
00186 varfile += m_data_sub_dir;
00187 varfile += VARDIR;
00188 }
00189
00190 varfile += configv[arg] + offset;
00191 read_variables_file (varfile.string ());
00192 }
00193
00194 if (m_print_variables)
00195 print_variables(stdout);
00196
00197
00198
00199
00200 #define TESSDATA_PREFIX /usr/local/tesseract-1.02/
00201
00202 #ifdef TESSDATA_PREFIX
00203 # define _STR(a) #a
00204 # define _XSTR(a) _STR(a)
00205 datadir = _XSTR(TESSDATA_PREFIX);
00206 # undef _XSTR
00207 # undef _STR
00208 #endif
00209
00210 datadir += m_data_sub_dir;
00211 #ifdef __UNIX__
00212 if (memgrab_size > 0) {
00213 void *membuf;
00214
00215 membuf = malloc (memgrab_size);
00216 if (membuf == NULL) {
00217 raise(SIGTTOU);
00218 sleep (10);
00219 }
00220 else
00221 free(membuf);
00222 }
00223 #endif
00224 debug_window_on.set_value (FALSE);
00225
00226 #define FORCE_VAR_DUMP 0 // set to 1 to make variables file
00227
00228 if (tessedit_write_vars || FORCE_VAR_DUMP) {
00229 var_file = fopen ("edited.cfg", "w");
00230 if (var_file != NULL) {
00231 print_variables(var_file);
00232 fclose(var_file);
00233 }
00234 }
00235 strcpy (c_path, datadir.string ());
00236 c_path[strlen (c_path) - strlen (m_data_sub_dir.string ())] = '\0';
00237 demodir = c_path;
00238 start_recog(configfile, textbase);
00239
00240 ReliableConfigThreshold = tweak_ReliableConfigThreshold;
00241
00242 set_tess_tweak_vars();
00243
00244 if (tessedit_use_nn)
00245 init_net();
00246 return 0;
00247 }
00248