wordrec/heuristic.cpp File Reference

#include "heuristic.h"
#include "baseline.h"
#include "metrics.h"
#include "freelist.h"
#include <math.h>

Go to the source code of this file.

Defines

Functions


Define Documentation

#define BAD_RATING   1000.0

Definition at line 33 of file heuristic.cpp.

Referenced by rating_priority().

#define MAX_SQUAT   2.0

Note:
File: heuristic.cpp (Formerly heuristic.c)
On segmentation & priority
Author:
Mark Seaman, OCR Technology
Date:
Fri Oct 16 14:37:00 1987 Wed Jul 10 14:15:08 1991 (Mark Seaman) marks
 * (c) Copyright 1987, Hewlett-Packard Company.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.

Definition at line 32 of file heuristic.cpp.

Referenced by width_priority().


Function Documentation

FLOAT32 prioritize_state ( CHUNKS_RECORD chunks_record,
SEARCH_RECORD the_search,
STATE old_state 
)

Create a priority for this state, which represents the urgency of checking this state.

Definition at line 43 of file heuristic.cpp.

References SEARCH_RECORD::num_joints, rating_priority(), record_priorities(), SEARCH_RECORD::this_state, and width_priority().

Referenced by expand_node().

00045                                            {
00046   FLOAT32 width_pri;
00047   FLOAT32 match_pri;
00048 
00049   match_pri = rating_priority (chunks_record, the_search->this_state,
00050     old_state, the_search->num_joints);
00051 
00052   width_pri = width_priority (chunks_record, the_search->this_state,
00053     the_search->num_joints) * 1000.0;
00054 
00055   record_priorities(the_search, old_state, match_pri, width_pri); 
00056 
00057   return (width_pri + match_pri);
00058 }

FLOAT32 rating_priority ( CHUNKS_RECORD chunks_record,
STATE state,
STATE old_state,
int  num_joints 
)

Assign a segmentation priority based on the ratings of the blobs (in that segmentation) that have been classified.

The average "goodness" (i.e. rating / weight) for each blob is used to indicate the segmentation priority.

Definition at line 69 of file heuristic.cpp.

References BAD_RATING, best_probability, bin_to_pieces(), matrix_get, NIL, NOT_CLASSIFIED, ratings, CHUNKS_RECORD::ratings, and CHUNKS_RECORD::weights.

Referenced by prioritize_state().

00072                                         {
00073   PIECES_STATE blob_chunks;
00074   INT16 x;
00075   INT16 y;
00076   CHOICES this_choice;
00077   INT16 first_chunk = 0;
00078   INT16 last_chunk;
00079   INT16 ratings = 0;
00080   INT16 weights = 0;
00081 
00082   bin_to_pieces(state, num_joints, blob_chunks); 
00083 
00084   for (x = 0; blob_chunks[x]; x++) {
00085                                  // Iterate each blob
00086     last_chunk = first_chunk + blob_chunks[x] - 1;
00087 
00088     this_choice = matrix_get (chunks_record->ratings,
00089       first_chunk, last_chunk);
00090 
00091     if (this_choice == NIL)
00092       return (BAD_RATING);
00093 
00094     if (this_choice != NOT_CLASSIFIED) {
00095 
00096       ratings += (INT16) best_probability (this_choice);
00097       for (y = first_chunk; y <= last_chunk; y++) {
00098         weights += (INT16) (chunks_record->weights[y]);
00099       }
00100     }
00101     first_chunk += blob_chunks[x];
00102   }
00103   if (weights <= 0)
00104     weights = 1;
00105   return ((FLOAT32) ratings / weights);
00106 }

WIDTH_RECORD* state_char_widths ( WIDTH_RECORD chunk_widths,
STATE state,
int  num_joints,
SEARCH_STATE search_state 
)

Return a character width record corresponding to the character width that will be generated in this segmentation state.

Definition at line 114 of file heuristic.cpp.

References bin_to_chunks(), chunks_gap, chunks_width(), memalloc(), WIDTH_RECORD::num_chars, and WIDTH_RECORD::widths.

Referenced by width_priority().

00117                                                             {
00118   WIDTH_RECORD *width_record;
00119   int num_blobs;
00120   int x;
00121   int y;
00122   int i;
00123   SEARCH_STATE new_chunks;
00124 
00125   new_chunks = bin_to_chunks (state, num_joints);
00126 
00127   num_blobs = new_chunks[0] + 1;
00128   width_record = (WIDTH_RECORD *) memalloc (sizeof (int) * num_blobs * 2);
00129   width_record->num_chars = num_blobs;
00130 
00131   x = 0;
00132   for (i = 1; i <= new_chunks[0] + 1; i++) {
00133     if (i > new_chunks[0])
00134       y = num_joints;
00135     else
00136       y = x + new_chunks[i];
00137 
00138     width_record->widths[2 * i - 2] = chunks_width (chunk_widths, x, y);
00139 
00140     if (i <= new_chunks[0])
00141       width_record->widths[2 * i - 1] = chunks_gap (chunk_widths, y);
00142 
00143     x = y + 1;
00144   }
00145 
00146   *search_state = new_chunks;
00147   return (width_record);
00148 }

FLOAT32 width_priority ( CHUNKS_RECORD chunks_record,
STATE state,
int  num_joints 
)

Return a priority value for this word segmentation based on the character widths present in the new segmentation.

Definition at line 156 of file heuristic.cpp.

References baseline_enable, BASELINE_SCALE, CHUNKS_RECORD::chunk_widths, free_widths, textrowstruct::lineheight, MAX_SQUAT, memfree(), WIDTH_RECORD::num_chars, CHUNKS_RECORD::row, state_char_widths(), and WIDTH_RECORD::widths.

Referenced by prioritize_state().

00158                                        {
00159   SEARCH_STATE new_chunks;
00160   FLOAT32 result = 0.0;
00161   WIDTH_RECORD *width_record;
00162   FLOAT32 squat;
00163   int x;
00164 
00165   width_record = state_char_widths (chunks_record->chunk_widths,
00166     state, num_joints, &new_chunks);
00167   for (x = 0; x < width_record->num_chars; x++) {
00168 
00169     squat = width_record->widths[2 * x];
00170     if (!baseline_enable) {
00171       squat /= chunks_record->row->lineheight;
00172     }
00173     else {
00174       squat /= BASELINE_SCALE;
00175     }
00176 
00177     if (squat > MAX_SQUAT)
00178       result += squat - MAX_SQUAT;
00179 
00180   }
00181 
00182   memfree(new_chunks); 
00183   free_widths(width_record); 
00184 
00185   return (result);
00186 }


Generated on Wed Feb 28 19:49:28 2007 for Tesseract by  doxygen 1.5.1