00001
00020 #ifndef TORDMAIN_H
00021 #define TORDMAIN_H
00022
00023 #include <time.h>
00024 #include "varable.h"
00025 #include "ocrblock.h"
00026 #include "tessclas.h"
00027 #include "blobbox.h"
00028 #include "notdll.h"
00029
00032 extern BOOL_VAR_H (textord_show_blobs, FALSE, "Display unsorted blobs");
00033 extern BOOL_VAR_H (textord_new_initial_xheight, TRUE,
00034 "Use test xheight mechanism");
00035 extern BOOL_VAR_H (textord_exit_after, FALSE,
00036 "Exit after completing textord");
00037 extern INT_VAR_H (textord_max_noise_size, 7, "Pixel size of noise");
00038 extern double_VAR_H (textord_blob_size_bigile, 95,
00039 "Percentile for large blobs");
00040 extern double_VAR_H (textord_noise_area_ratio, 0.7,
00041 "Fraction of bounding box for noise");
00042 extern double_VAR_H (textord_blob_size_smallile, 20,
00043 "Percentile for small blobs");
00044 extern double_VAR_H (textord_initialx_ile, 0.75,
00045 "Ile of sizes for xheight guess");
00046 extern double_VAR_H (textord_initialasc_ile, 0.90,
00047 "Ile of sizes for xheight guess");
00048 extern INT_VAR_H (textord_noise_sizefraction, 10,
00049 "Fraction of size for maxima");
00050 extern double_VAR_H (textord_noise_sizelimit, 0.5,
00051 "Fraction of x for big t count");
00052 extern INT_VAR_H (textord_noise_translimit, 16,
00053 "Transitions for normal blob");
00054 extern double_VAR_H (textord_noise_normratio, 2.0,
00055 "Dot to norm ratio for deletion");
00056 extern BOOL_VAR_H (textord_noise_rejwords, TRUE, "Reject noise-like words");
00057 extern BOOL_VAR_H (textord_noise_rejrows, TRUE, "Reject noise-like rows");
00058 extern double_VAR_H (textord_noise_syfract, 0.2,
00059 "xh fract error for norm blobs");
00060 extern double_VAR_H (textord_noise_sxfract, 0.4,
00061 "xh fract width error for norm blobs");
00062 extern INT_VAR_H (textord_noise_sncount, 1, "super norm blobs to save row");
00063 extern double_VAR_H (textord_noise_rowratio, 6.0,
00064 "Dot to norm ratio for deletion");
00065 extern BOOL_VAR_H (textord_noise_debug, FALSE, "Debug row garbage detector");
00066 extern double_VAR_H (textord_blshift_maxshift, 0.00, "Max baseline shift");
00067 extern double_VAR_H (textord_blshift_xfraction, 9.99,
00068 "Min size of baseline shift");
00069
00070 extern STRING_EVAR_H (tessedit_image_ext, ".tif", "Extension for image file");
00073 extern clock_t previous_cpu;
00074 void make_blocks_from_blobs(
00075 TBLOB *tessblobs,
00076 const char *filename,
00077 ICOORD page_tr,
00078 BOOL8 do_shift,
00079 BLOCK_LIST *blocks
00080 );
00081 void read_and_textord(
00082 const char *filename,
00083 BLOCK_LIST *blocks);
00084 void edges_and_textord(
00085 const char *filename,
00086 BLOCK_LIST *blocks);
00087 #if 0
00088 void assign_blobs_to_blocks(
00089 PBLOB_LIST *blobs,
00090 BLOCK_LIST *blocks,
00091 TO_BLOCK_LIST *land_blocks,
00092 TO_BLOCK_LIST *port_blocks
00093 );
00094 #endif
00095
00096 void assign_blobs_to_blocks2(
00097 BLOCK_LIST *blocks,
00098 TO_BLOCK_LIST *land_blocks,
00099 TO_BLOCK_LIST *port_blocks
00100 );
00101 void filter_blobs(
00102 ICOORD page_tr,
00103 TO_BLOCK_LIST *blocks,
00104 BOOL8 testing_on
00105 );
00106 float filter_noise_blobs(
00107 BLOBNBOX_LIST *src_list,
00108 BLOBNBOX_LIST *noise_list,
00109 BLOBNBOX_LIST *small_list,
00110 BLOBNBOX_LIST *large_list
00111 );
00112 float filter_noise_blobs2(
00113 BLOBNBOX_LIST *src_list,
00114 BLOBNBOX_LIST *noise_list,
00115 BLOBNBOX_LIST *small_list,
00116 BLOBNBOX_LIST *large_list
00117 );
00118 void textord_page(
00119 ICOORD page_tr,
00120 BLOCK_LIST *blocks,
00121 TO_BLOCK_LIST *land_blocks,
00122 TO_BLOCK_LIST *port_blocks
00123 );
00124 void cleanup_blocks(
00125 BLOCK_LIST *blocks
00126 );
00127 BOOL8 clean_noise_from_row(
00128 ROW *row
00129 );
00130 void clean_noise_from_words(
00131 ROW *row
00132 );
00133 void tweak_row_baseline(
00134 ROW *row
00135 );
00136 INT32 blob_y_order(
00137 void *item1,
00138 void *item2);
00139 #endif