ccmain/tessedit2.cpp

Go to the documentation of this file.
00001 
00020 #include "mfcpch.h"
00021 //#include    <osfcn.h>
00022 //#include    <signal.h>
00023 //#include    <time.h>
00024 //#include    <unistd.h>
00025 #include "tfacep.h"     //must be before main.h
00026 //#include    "fileerr.h"
00027 #include "stderr.h"
00028 #include "basedir.h"
00029 #include "tessvars.h"
00030 //#include    "debgwin.h"
00031 //#include    "epapdest.h"
00032 #include "control.h"
00033 #include "imgs.h"
00034 #include "reject.h"
00035 #include "pageres.h"
00036 //#include    "gpapdest.h"
00037 #include "mainblk.h"
00038 #include "nwmain.h"
00039 #include "pgedit.h"
00040 #include "varabled.h"
00041 #include "ocrshell.h"
00042 #include "tprintf.h"
00043 //#include    "ipeerr.h"
00044 //#include    "restart.h"
00045 #include "tessedit.h"
00046 //#include    "fontfind.h"
00047 #include "applybox.h"
00048 #include "permute.h"
00049 #include "permdawg.h"
00050 #include "permnum.h"
00051 #include "stopper.h"
00052 #include "adaptmatch.h"
00053 #include "intmatcher.h"
00054 #include "chop.h"
00055 #include "globals.h"
00056 // from ccutil/mainblk.cpp
00057 #include "fileerr.h"
00058 #ifdef __UNIX__
00059 #include <unistd.h>
00060 #include <signal.h>
00061 #else
00062 #include <io.h>
00063 #endif
00064 #include <stdlib.h>
00065 #include "basedir.h"
00066 #include "mainblk.h"
00067 
00068 //extern "C" {
00069 #include "callnet.h"    //phils nn stuff
00070 //}
00071 #include "notdll.h"     //phils nn stuff
00072 
00073 #define VARDIR "configs/" //variables files
00074 #define API_CONFIG "configs/api_config" //config under api
00075 #define EXTERN
00076 
00077 // from ccutil/mainblk.cpp
00078 #define BLOB_MATCHING_ON
00079 #define VARDIR "configs/" //variables files
00080 
00081 EXTERN DLLSYM STRING datadir;    //dir for data files
00082 EXTERN DLLSYM STRING imagebasename; //name of image
00083 const ERRCODE NO_PATH =
00084 "Warning:explicit path for executable will not be used for configs";
00085 static const ERRCODE USAGE = "Usage";
00086 
00089 EXTERN BOOL_EVAR (tessedit_write_vars, FALSE, "Write all vars to file");
00090 EXTERN BOOL_VAR (tessedit_tweaking_tess_vars, FALSE,
00091 "Fiddle tess config values");
00092 BOOL_VAR(tessedit_resegment_from_boxes, FALSE,
00093          "Take segmentation and labeling from box file");
00094 BOOL_VAR(tessedit_train_from_boxes, FALSE,
00095          "Generate training data from boxed chars");
00096 
00097 EXTERN INT_VAR (tweak_ReliableConfigThreshold, 2, "Tess VAR");
00098 
00099 EXTERN double_VAR (tweak_garbage, 1.5, "Tess VAR");
00100 EXTERN double_VAR (tweak_ok_word, 1.25, "Tess VAR");
00101 EXTERN double_VAR (tweak_good_word, 1.1, "Tess VAR");
00102 EXTERN double_VAR (tweak_freq_word, 1.0, "Tess VAR");
00103 EXTERN double_VAR (tweak_ok_number, 1.4, "Tess VAR");
00104 EXTERN double_VAR (tweak_good_number, 1.1, "Tess VAR");
00105 EXTERN double_VAR (tweak_non_word, 1.25, "Tess VAR");
00106 EXTERN double_VAR (tweak_CertaintyPerChar, -0.5, "Tess VAR");
00107 EXTERN double_VAR (tweak_NonDictCertainty, -2.5, "Tess VAR");
00108 EXTERN double_VAR (tweak_RejectCertaintyOffset, 1.0, "Tess VAR");
00109 EXTERN double_VAR (tweak_GoodAdaptiveMatch, 0.125, "Tess VAR");
00110 EXTERN double_VAR (tweak_GreatAdaptiveMatch, 0.10, "Tess VAR");
00111 EXTERN INT_VAR (tweak_AdaptProtoThresh, 230, "Tess VAR");
00112 EXTERN INT_VAR (tweak_AdaptFeatureThresh, 230, "Tess VAR");
00113 EXTERN INT_VAR (tweak_min_outline_points, 6, "Tess VAR");
00114 EXTERN INT_VAR (tweak_min_outline_area, 2000, "Tess VAR");
00115 EXTERN double_VAR (tweak_good_split, 50.0, "Tess VAR");
00116 EXTERN double_VAR (tweak_ok_split, 100.0, "Tess VAR");
00117 
00118 // from ccutil/mainblk.cpp
00119 EXTERN BOOL_VAR (m_print_variables, FALSE,
00120 "Print initial values of all variables");
00121 EXTERN STRING_VAR (m_data_sub_dir, "tessdata/", "Directory for data files");
00122 EXTERN INT_VAR (memgrab_size, 13000000, "Preallocation size for batch use");
00125 extern INT16 XOFFSET;
00126 extern INT16 YOFFSET;
00127 extern int NO_BLOCK;
00128 
00130 ETEXT_DESC *global_monitor = NULL;
00131 
00144 int init_tesseract(const char *arg0,
00145                    const char *textbase,
00146                    const char *configfile,
00147                    int configc,
00148                    const char *const *configv) {
00149   FILE *var_file;
00150   static char c_path[MAX_PATH];  //path for c code
00151 
00152   // Set the basename, compute the data directory and read C++ configs.
00153 
00154   // from ccutil/mainblk.cpp
00155   INT32 arg;                     /*argument */
00156   INT32 offset;                  //for flag
00157   FILE *fp;                      /*variables file */
00158   char flag[2];                  //+/-
00159   STRING varfile;                /*name of file */
00160 
00161   imagebasename = textbase;      /*name of image */
00162   if (getpath (arg0, datadir) < 0)
00163   #ifdef __UNIX__
00164     CANTOPENFILE.error ("main", ABORT, "%s to get path", arg0[0]);
00165   #else
00166   NO_PATH.error ("main", DBG, NULL);
00167   #endif
00168 
00169   for (arg = 0; arg < configc; arg++) {
00170     if (configv[arg][0] == '+' || configv[arg][0] == '-') {
00171       offset = 1;
00172       flag[0] = configv[arg][0];
00173     }
00174     else {
00175       offset = 0;
00176     }
00177     flag[offset] = '\0';
00178     varfile = flag;
00179                                  /*attempt open */
00180     fp = fopen (configv[arg] + offset, "r");
00181     if (fp != NULL) {
00182       fclose(fp);  /*was only to test */
00183     }
00184     else {
00185       varfile += datadir;
00186       varfile += m_data_sub_dir; /*data directory */
00187       varfile += VARDIR;         /*variables dir */
00188     }
00189                                  /*actual name */
00190     varfile += configv[arg] + offset;
00191     read_variables_file (varfile.string ());
00192   }
00193 
00194   if (m_print_variables)
00195     print_variables(stdout);  /*print them all */
00196 
00197 // Allows the required data files to be at a fixed location
00198 // not relative to the directory where the binary was executed
00199 // from a patch on the forums on sf.net - need to add attribution
00200 #define TESSDATA_PREFIX /usr/local/tesseract-1.02/
00201 
00202 #ifdef TESSDATA_PREFIX
00203 # define _STR(a) #a
00204 # define _XSTR(a) _STR(a)
00205   datadir = _XSTR(TESSDATA_PREFIX);
00206 # undef _XSTR
00207 # undef _STR
00208 #endif
00209 
00210   datadir += m_data_sub_dir;     /*data directory */
00211   #ifdef __UNIX__
00212   if (memgrab_size > 0) {
00213     void *membuf;                //test virtual mem
00214                                  //test memory
00215     membuf = malloc (memgrab_size);
00216     if (membuf == NULL) {
00217       raise(SIGTTOU);  //hangup for jobber
00218       sleep (10);
00219     }
00220     else
00221       free(membuf); 
00222   }
00223   #endif
00224   debug_window_on.set_value (FALSE);
00225 
00226 #define FORCE_VAR_DUMP 0   // set to 1 to make variables file
00227 
00228   if (tessedit_write_vars || FORCE_VAR_DUMP) {
00229     var_file = fopen ("edited.cfg", "w");
00230     if (var_file != NULL) {
00231       print_variables(var_file); 
00232       fclose(var_file); 
00233     }
00234   }
00235   strcpy (c_path, datadir.string ());
00236   c_path[strlen (c_path) - strlen (m_data_sub_dir.string ())] = '\0';
00237   demodir = c_path;
00238   start_recog(configfile, textbase); 
00239 
00240   ReliableConfigThreshold = tweak_ReliableConfigThreshold;
00241 
00242   set_tess_tweak_vars(); 
00243 
00244   if (tessedit_use_nn)           //Phils NeuralNet stuff
00245     init_net(); 
00246   return 0;                      //Normal exit
00247 }
00248 

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1