ccmain/tessedit.cpp

Go to the documentation of this file.
00001 
00020 #include "mfcpch.h"
00021 //#include    <osfcn.h>
00022 //#include    <signal.h>
00023 //#include    <time.h>
00024 //#include    <unistd.h>
00025 #include "tfacep.h"     //must be before main.h
00026 //#include    "fileerr.h"
00027 #include "stderr.h"
00028 #include "basedir.h"
00029 #include "tessvars.h"
00030 //#include    "debgwin.h"
00031 //#include    "epapdest.h"
00032 #include "control.h"
00033 #include "imgs.h"
00034 #include "reject.h"
00035 #include "pageres.h"
00036 //#include    "gpapdest.h"
00037 #include "mainblk.h"
00038 #include "nwmain.h"
00039 #include "pgedit.h"
00040 #include "ocrshell.h"
00041 #include "tprintf.h"
00042 //#include    "ipeerr.h"
00043 //#include    "restart.h"
00044 #include "tessedit.h"
00045 //#include    "fontfind.h"
00046 #include "permute.h"
00047 #include "permdawg.h"
00048 #include "permnum.h"
00049 #include "stopper.h"
00050 #include "adaptmatch.h"
00051 #include "intmatcher.h"
00052 #include "chop.h"
00053 #include "globals.h"
00054 
00055 //extern "C" {
00056 #include "callnet.h"    //phils nn stuff
00057 //}
00058 #include "notdll.h"     //phils nn stuff
00059 
00060 #define VARDIR "configs/" //variables files
00061 
00062 #define API_CONFIG "configs/api_config" //config under api
00063 #define EXTERN
00064 
00067 EXTERN BOOL_EVAR (tessedit_write_vars, FALSE, "Write all vars to file");
00068 EXTERN BOOL_VAR (tessedit_tweaking_tess_vars, FALSE,
00069 "Fiddle tess config values");
00070 
00071 EXTERN INT_VAR (tweak_ReliableConfigThreshold, 2, "Tess VAR");
00072 
00073 EXTERN double_VAR (tweak_garbage, 1.5, "Tess VAR");
00074 EXTERN double_VAR (tweak_ok_word, 1.25, "Tess VAR");
00075 EXTERN double_VAR (tweak_good_word, 1.1, "Tess VAR");
00076 EXTERN double_VAR (tweak_freq_word, 1.0, "Tess VAR");
00077 EXTERN double_VAR (tweak_ok_number, 1.4, "Tess VAR");
00078 EXTERN double_VAR (tweak_good_number, 1.1, "Tess VAR");
00079 EXTERN double_VAR (tweak_non_word, 1.25, "Tess VAR");
00080 EXTERN double_VAR (tweak_CertaintyPerChar, -0.5, "Tess VAR");
00081 EXTERN double_VAR (tweak_NonDictCertainty, -2.5, "Tess VAR");
00082 EXTERN double_VAR (tweak_RejectCertaintyOffset, 1.0, "Tess VAR");
00083 EXTERN double_VAR (tweak_GoodAdaptiveMatch, 0.125, "Tess VAR");
00084 EXTERN double_VAR (tweak_GreatAdaptiveMatch, 0.10, "Tess VAR");
00085 EXTERN INT_VAR (tweak_AdaptProtoThresh, 230, "Tess VAR");
00086 EXTERN INT_VAR (tweak_AdaptFeatureThresh, 230, "Tess VAR");
00087 EXTERN INT_VAR (tweak_min_outline_points, 6, "Tess VAR");
00088 EXTERN INT_VAR (tweak_min_outline_area, 2000, "Tess VAR");
00089 EXTERN double_VAR (tweak_good_split, 50.0, "Tess VAR");
00090 EXTERN double_VAR (tweak_ok_split, 100.0, "Tess VAR");
00091 // \endcond
00092 
00093 extern INT16 XOFFSET;
00094 extern INT16 YOFFSET;
00095 extern int NO_BLOCK;
00096 
00098 ETEXT_DESC *global_monitor = NULL;
00099 
00100 int init_tesseract(const char *arg0,
00101                    const char *textbase,
00102                    const char *configfile,
00103                    int configc,
00104                    const char *const *configv) {
00105   FILE *var_file;
00106   static char c_path[MAX_PATH];  //path for c code
00107 
00108   // Set the basename, compute the data directory and read C++ configs.
00109   main_setup(arg0, textbase, configc, configv);
00110   debug_window_on.set_value (FALSE);
00111 
00112   if (tessedit_write_vars) {
00113     var_file = fopen ("edited.cfg", "w");
00114     if (var_file != NULL) {
00115       print_variables(var_file);
00116       fclose(var_file);
00117     }
00118   }
00119   strcpy (c_path, datadir.string ());
00120   c_path[strlen (c_path) - strlen (m_data_sub_dir.string ())] = '\0';
00121   demodir = c_path;
00122   start_recog(configfile, textbase);
00123 
00124   ReliableConfigThreshold = tweak_ReliableConfigThreshold;
00125 
00126   set_tess_tweak_vars();
00127 
00128   if (tessedit_use_nn)           //phils nn stuff
00129     init_net();
00130   return 0;                      //Normal exit
00131 }
00132 
00133 void end_tesseract() {
00134   end_recog();
00135 }
00136 
00144 #ifdef _TIFFIO_
00145 void read_tiff_image(TIFF* tif, IMAGE* image) {
00146   tdata_t buf;
00147   uint32 image_width, image_height;
00148   uint16 photometric;
00149   short bpp;
00150   TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &image_width); 
00151   TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &image_height); 
00152   TIFFGetField(tif, TIFFTAG_BITSPERSAMPLE, &bpp); 
00153   TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometric);
00154   /*
00155   Tesseract's internal representation is 0-is-black,
00156   so if the photometric is 1 (min is black) then high-valued pixels
00157   are 1 (white), otherwise they are 0 (black).
00158   */
00159   UINT8 high_value = photometric == 1;
00160   image->create(image_width, image_height, bpp);
00161   IMAGELINE line;
00162   line.init(image_width);
00163 
00164   buf = _TIFFmalloc(TIFFScanlineSize(tif));
00165   int bytes_per_line = (image_width*bpp + 7)/8;
00166   UINT8* dest_buf = image->get_buffer() + bytes_per_line*image_height;
00167   if (!dest_buf) {
00168    tprintf ("Error: out of memory in read_tiff_image()\n");
00169    exit(1);
00170   }
00171   // This will go badly wrong with one of the more exotic tiff formats,
00172   // but the majority will work OK.
00173   for (int y = 0; y < image_height; ++y) {
00174     TIFFReadScanline(tif, buf, y); 
00175     memcpy(dest_buf, buf, bytes_per_line);
00176     dest_buf += bytes_per_line;     // patch for v1.03b :-)
00177   }
00178   if (high_value == 0)
00179     invert_image(image);
00180   _TIFFfree(buf); 
00181 }
00182 #endif
00183 
00188 enum CMD_EVENTS
00189 {
00190   ACTION_1_CMD_EVENT,
00191   RECOG_WERDS,
00192   RECOG_PSEUDO,
00193   ACTION_2_CMD_EVENT
00194 };
00195 
00216 void extend_menu( 
00217                  RADIO_MENU *modes_menu,
00218                  INT16 modes_id_base,
00219                  NON_RADIO_MENU *other_menu,
00220                  INT16 other_id_base
00221                 ) {
00222   /* Example new mode */
00223 
00224   modes_menu->add_child (new RADIO_MENU_LEAF ("Recog Words",
00225     modes_id_base + RECOG_WERDS));
00226   modes_menu->add_child (new RADIO_MENU_LEAF ("Recog Blobs",
00227     modes_id_base + RECOG_PSEUDO));
00228 
00229   /* Example toggle
00230 
00231   other_menu->add_child(
00232     new TOGGLE_MENU_LEAF( "Action 2",              //Display string
00233             other_id_base + ACTION_2_CMD_EVENT, //offset command id
00234             FALSE ) );                    //Initial value
00235 
00236    Example text parm  (commented out)
00237 
00238     other_menu->add_child(
00239     new VARIABLE_MENU_LEAF( "Parm change",            //Display string
00240             other_id_base + ACTION_3_CMD_EVENT, //offset command id
00241             "default value" ) );          //default value string
00242   */
00243 }
00244 
00245 
00255 void extend_moded_commands( 
00256                            INT32 mode,
00257                            BOX selection_box
00258                           ) {
00259   char msg[MAX_CHARS + 1];
00260 
00261   switch (mode) {
00262     case RECOG_WERDS:
00263       command_window->msg ("Recogging selected words");
00264 
00265       /* This is how to apply a "word processor" function to each selected word */
00266 
00267       process_selected_words(current_block_list,
00268                              selection_box,
00269                              &recog_interactive);
00270       break;
00271     case RECOG_PSEUDO:
00272       command_window->msg ("Recogging selected blobs");
00273 
00274       /* This is how to apply a "word processor" function to each selected word */
00275 
00276       recog_pseudo_word(current_block_list, selection_box);
00277       break;
00278     default:
00279       sprintf (msg, "Unexpected extended mode " INT32FORMAT, mode);
00280       command_window->msg (msg);
00281   }
00282 }
00283 
00284 
00289 void extend_unmoded_commands(                 //current mode
00290                              INT32 cmd_event,
00291                              char *new_value  //changed value if any
00292                             ) {
00293   char msg[MAX_CHARS + 1];
00294 
00295   switch (cmd_event) {
00296     case ACTION_2_CMD_EVENT:     //a toggle event
00297       if (new_value[0] == 'T')
00298                                  //Display message
00299         command_window->msg ("Extended Action 2 ON!!");
00300       else
00301         command_window->msg ("Extended Action 2 OFF!!");
00302       break;
00303     default:
00304       sprintf (msg, "Unrecognised extended command " INT32FORMAT " (%s)",
00305         cmd_event, new_value);
00306       command_window->msg (msg);
00307       break;
00308   }
00309 }
00310 
00311 
00330 void set_tess_tweak_vars() { 
00331   if (tessedit_tweaking_tess_vars) {
00332     garbage = tweak_garbage;
00333     ok_word = tweak_ok_word;
00334     good_word = tweak_good_word;
00335     freq_word = tweak_freq_word;
00336     ok_number = tweak_ok_number;
00337     good_number = tweak_good_number;
00338     non_word = tweak_non_word;
00339     CertaintyPerChar = tweak_CertaintyPerChar;
00340     NonDictCertainty = tweak_NonDictCertainty;
00341     RejectCertaintyOffset = tweak_RejectCertaintyOffset;
00342     GoodAdaptiveMatch = tweak_GoodAdaptiveMatch;
00343     GreatAdaptiveMatch = tweak_GreatAdaptiveMatch;
00344     AdaptProtoThresh = tweak_AdaptProtoThresh;
00345     AdaptFeatureThresh = tweak_AdaptFeatureThresh;
00346     min_outline_points = tweak_min_outline_points;
00347     min_outline_area = tweak_min_outline_area;
00348     good_split = tweak_good_split;
00349     ok_split = tweak_ok_split;
00350   }
00351   //   if (expiry_day * 24 * 60 * 60 < time(NULL))
00352   //         err_exit();
00353 }

Generated on Wed Feb 28 19:49:07 2007 for Tesseract by  doxygen 1.5.1