#include "trie.h"
#include "callcpp.h"
#include <stdio.h>
Go to the source code of this file.
void add_edge_linkage | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node1, | |||
NODE_REF | node2, | |||
INT32 | direction, | |||
char | character, | |||
INT32 | word_end | |||
) |
Add a single edge linkage to between the two nodes;.
can be used to add either forward or backward links.
Definition at line 46 of file trie.cpp.
References backward_edge, copy_edges, cprintf(), debug, DIRECTION_FLAG, edge_loop, edges_in_node(), FORWARD_EDGE, forward_edge, LAST_FLAG, link_edge, and WERD_END_FLAG.
Referenced by add_new_edge().
00051 { 00052 EDGE_REF edge1 = node1; 00053 EDGE_REF edge2; 00054 INT32 num_edges = edges_in_node (dawg, node1); 00055 INT32 last_one; 00056 00057 word_end = (word_end ? WERD_END_FLAG : 0); 00058 00059 if (num_edges == 0) { /* No edges yet */ 00060 direction = ((direction == FORWARD_EDGE) ? DIRECTION_FLAG : 0); 00061 link_edge (dawg, edge1, node2, character, 00062 LAST_FLAG | direction | word_end); 00063 } 00064 else { 00065 /* Forward links */ 00066 if (direction == FORWARD_EDGE) { 00067 last_one = (forward_edge (dawg, edge1) ? 0 : LAST_FLAG); 00068 if (debug) 00069 cprintf ("moving edges (nodes = %ld, %dl, num = %ld)\n", 00070 edge1, edge1+1, num_edges); 00071 copy_edges (dawg, edge1, edge1+1, num_edges); 00072 link_edge (dawg, edge1, node2, character, 00073 last_one | DIRECTION_FLAG | word_end); 00074 } 00075 else { /* Backward links */ 00076 00077 if (forward_edge (dawg, edge1)) 00078 edge_loop(dawg, edge1); 00079 00080 /* Existing back edges */ 00081 if (backward_edge (dawg,edge1)) { 00082 num_edges = 0; 00083 edge2 = edge1; 00084 do { num_edges++; } 00085 edge_loop(dawg, edge2); 00086 00087 if (debug) 00088 cprintf ("moving edges (nodes = %ld, %ld, num = %ld)\n", 00089 edge1, edge1+1, num_edges); 00090 copy_edges (dawg, edge1, edge1+1, num_edges); 00091 link_edge(dawg, edge1, node2, character, word_end); 00092 } 00093 else { /* First back edge */ 00094 link_edge (dawg, edge1, node2, character, 00095 LAST_FLAG | word_end); 00096 } 00097 } 00098 } 00099 }
void add_new_edge | ( | EDGE_ARRAY | dawg, | |
NODE_REF * | node1, | |||
NODE_REF * | node2, | |||
char | character, | |||
INT32 | word_end, | |||
INT32 | max_num_edges, | |||
INT32 | reserved_edges | |||
) |
Add an edge between two nodes in the DAWG & link the nodes both ways.
Definition at line 105 of file trie.cpp.
References add_edge_linkage(), BACKWARD_EDGE, cprintf(), debug, direction(), edge_counter, FORWARD_EDGE, and move_node_if_needed.
Referenced by add_word_to_dawg().
00111 { 00112 int direction; 00113 00114 if (debug) 00115 cprintf ("add_new_edge (nodes = %ld, %ld, ch = '%c', end = %d)\n", 00116 *node1, *node2, character, word_end); 00117 edge_counter++; 00118 00119 move_node_if_needed(dawg, *node1, max_num_edges, reserved_edges); 00120 move_node_if_needed(dawg, *node2, max_num_edges, reserved_edges); 00121 00122 direction = (int) FORWARD_EDGE; 00123 00124 add_edge_linkage(dawg, *node1, *node2, direction, character, word_end); 00125 00126 direction = (int) BACKWARD_EDGE; 00127 add_edge_linkage(dawg, *node2, *node1, direction, character, word_end); 00128 }
void add_word_to_dawg | ( | EDGE_ARRAY | dawg, | |
char * | string, | |||
INT32 | max_num_edges, | |||
INT32 | reserved_edges | |||
) |
Add in a word by creating the necessary nodes and edges.
Definition at line 134 of file trie.cpp.
References add_new_edge(), case_sensative, cprintf(), debug, DEFAULT_NODE_SIZE, edge_char_of(), edges_in_node(), FALSE, new_dawg_node(), next_node, NO_EDGE, remove_edge(), and TRUE.
Referenced by add_document_word(), and read_word_list().
00137 { 00138 EDGE_REF edge; 00139 NODE_REF last_node = 0; 00140 NODE_REF the_next_node; 00141 INT32 i; 00142 INT32 still_finding_chars = TRUE; 00143 INT32 word_end = FALSE; 00144 00145 for (i=0; i<strlen(string)-1; i++) { 00146 if (still_finding_chars) { 00147 edge = edge_char_of (dawg, last_node, string[i], word_end); 00148 if (debug) cprintf ("exploring edge = %d\n", edge); 00149 if (edge == NO_EDGE) 00150 still_finding_chars = FALSE; 00151 else 00152 if (next_node (dawg, edge) == 0) { 00153 word_end = TRUE; 00154 still_finding_chars = FALSE; 00155 if (! case_sensative) string[i] = tolower (string[i]); 00156 remove_edge (dawg, last_node, 0, string[i], word_end); 00157 } 00158 else { 00159 last_node = next_node (dawg, edge); 00160 } 00161 } 00162 00163 if (! still_finding_chars) { 00164 the_next_node = new_dawg_node (dawg, DEFAULT_NODE_SIZE, 00165 max_num_edges, reserved_edges); 00166 if (edges_in_node (dawg, last_node) + last_node == the_next_node) { 00167 cprintf ("Node collision at %d\n", the_next_node); 00168 the_next_node = new_dawg_node (dawg, DEFAULT_NODE_SIZE, 00169 max_num_edges, reserved_edges); 00170 } 00171 if (! case_sensative) string[i] = tolower (string[i]); 00172 add_new_edge (dawg, &last_node, &the_next_node, 00173 string[i], word_end, max_num_edges, reserved_edges); 00174 word_end = FALSE; 00175 if (debug) 00176 cprintf ("adding node = %ld\n", the_next_node); 00177 last_node = the_next_node; 00178 } 00179 } 00180 00181 the_next_node = 0; 00182 if (! case_sensative) string[i] = tolower (string[i]); 00183 add_new_edge (dawg, &last_node, &the_next_node, 00184 string[i], TRUE, max_num_edges, reserved_edges); 00185 00186 if (edges_in_node (dawg, 0) > reserved_edges) { 00187 cprintf ("error: Not enough room in root node, %d\n", 00188 edges_in_node (dawg, 0)); 00189 exit (1); 00190 } 00191 }
void initialize_dawg | ( | EDGE_ARRAY | dawg, | |
INT32 | max_num_edges | |||
) |
Initialize the DAWG data structure for further used & reset each of the edge cells to NO_EDGE.
Definition at line 198 of file trie.cpp.
References clear_all_edges.
Referenced by init_permute(), and read_word_list().
00198 { 00199 INT32 x; 00200 00201 clear_all_edges(dawg, x, max_num_edges); 00202 }
NODE_REF move_node | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node, | |||
INT32 | max_num_edges, | |||
INT32 | reserved_edges | |||
) |
Move the location in the edge array of this node in the DAWG.
Definition at line 208 of file trie.cpp.
References backward_edge, cprintf(), debug, edge_loop, EDGE_NUM_MARGIN, edges_in_node(), forward_edge, move_counter, move_edges, new_dawg_node(), next_node, print_dawg_node(), and relocate_edge().
00211 { 00212 NODE_REF this_new_node; 00213 EDGE_REF edge; 00214 INT32 num_edges = edges_in_node (dawg, node); 00215 00216 if (debug) 00217 print_dawg_node(dawg, node); 00218 00219 this_new_node = new_dawg_node (dawg, num_edges + EDGE_NUM_MARGIN, 00220 max_num_edges, reserved_edges); 00221 00222 if (debug) 00223 cprintf ("move_node (from = %ld, to = %ld, num = %ld)\n", 00224 node, this_new_node, num_edges); 00225 00226 move_edges(dawg, node, this_new_node, num_edges); 00227 00228 if (debug) 00229 print_dawg_node(dawg, this_new_node); 00230 00231 edge = this_new_node; 00232 if (forward_edge (dawg, edge)) { 00233 do { 00234 relocate_edge (dawg, next_node (dawg, edge), node, this_new_node); 00235 } edge_loop (dawg, edge); 00236 } 00237 if (backward_edge (dawg, edge)) { 00238 do { 00239 relocate_edge (dawg, next_node (dawg, edge), node, this_new_node); 00240 } edge_loop (dawg, edge); 00241 } 00242 00243 move_counter++; 00244 return (this_new_node); 00245 }
NODE_REF new_dawg_node | ( | EDGE_ARRAY | dawg, | |
INT32 | num_edges, | |||
INT32 | max_num_edges, | |||
INT32 | reserved_edges | |||
) |
Make space for node in DAWG.
Create a space within the DAWG data structure to house a node that consists of the requested number of edges.
Definition at line 254 of file trie.cpp.
References cprintf(), edge_counter, edge_occupied, FALSE, long_rand(), MAX_WERD_LENGTH, move_counter, new_counter, NUM_PLACEMENT_ATTEMPTS, and TRUE.
Referenced by add_word_to_dawg(), and move_node().
00257 { 00258 INT32 i; 00259 INT32 n; 00260 INT32 edge_index; 00261 INT32 edge_collision; 00262 /* Try several times */ 00263 for (i=0; i<NUM_PLACEMENT_ATTEMPTS; i++) { 00264 00265 edge_index = reserved_edges + 00266 long_rand (max_num_edges - MAX_WERD_LENGTH - reserved_edges); 00267 edge_collision = FALSE; 00268 /* Find N adjacent slots */ 00269 for (n=0; n<num_edges && !edge_collision; n++) { 00270 if (edge_occupied (dawg, edge_index + n)) 00271 edge_collision = TRUE; 00272 } 00273 00274 if (! edge_collision) { 00275 new_counter++; 00276 //? max_new_attempts = max (max_new_attempts, i); 00277 return (edge_index); 00278 } 00279 } 00280 00281 cprintf ("DAWG Table is too full, nodes = %ld, edges = %ld, moves %ld\n", 00282 new_counter, edge_counter, move_counter); 00283 exit (1); 00284 return 0; 00285 }
void read_word_list | ( | char * | filename, | |
EDGE_ARRAY | dawg, | |||
INT32 | max_num_edges, | |||
INT32 | reserved_edges | |||
) |
Read the requested file (containing a list of words) and add all the words to the DAWG.
Definition at line 292 of file trie.cpp.
References add_word_to_dawg(), CHARS_PER_LINE, cprintf(), initialize_dawg(), NULL, open_file(), and word_in_dawg().
Referenced by init_permute().
00295 { 00296 FILE *word_file; 00297 char string [CHARS_PER_LINE]; 00298 00299 word_file = open_file (filename, "r"); 00300 00301 initialize_dawg(dawg, max_num_edges); 00302 00303 while (fgets (string, CHARS_PER_LINE, word_file) != NULL) { 00304 string [strlen (string) - 1] = (char) 0; 00305 if (strlen (string)) { 00306 add_word_to_dawg(dawg, string, max_num_edges, reserved_edges); 00307 00308 if (! word_in_dawg (dawg, string)) { 00309 cprintf ("error: word not in DAWG after adding it '%s'\n", string); 00310 return; 00311 } 00312 } 00313 } 00314 }
void relocate_edge | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node, | |||
NODE_REF | old_node, | |||
NODE_REF | new_node | |||
) |
The location of a node has moved, so an edge entry in another node must be changed & change the value of this edge entry to match the new location of the node.
Definition at line 322 of file trie.cpp.
References backward_edge, cprintf(), debug, edge_loop, forward_edge, next_node, and set_next_edge.
Referenced by move_node().
00325 { 00326 EDGE_REF edge; 00327 00328 if (debug) cprintf ("relocate (%ld, %ld ==> %ld)\n", 00329 node, old_node, new_node); 00330 00331 edge = node; 00332 if (forward_edge (dawg, edge)) { 00333 do { 00334 if (next_node (dawg, edge) == old_node) { 00335 if (debug) 00336 cprintf ("forward assign (%ld, %ld ==> %ld)\n", 00337 edge, old_node, new_node); 00338 00339 set_next_edge(dawg, edge, new_node); 00340 } 00341 } edge_loop (dawg, edge); 00342 } 00343 00344 if (backward_edge (dawg, edge)) { 00345 do { 00346 if (next_node (dawg, edge) == old_node) { 00347 if (debug) 00348 cprintf ("backward assign (%ld, %ld ==> %ld)\n", 00349 edge, old_node, new_node); 00350 00351 set_next_edge(dawg, edge, new_node); 00352 } 00353 } 00354 edge_loop(dawg, edge); 00355 } 00356 }
void remove_edge | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node1, | |||
NODE_REF | node2, | |||
char | character, | |||
INT32 | word_end | |||
) |
Add a single edge linkage to between the two nodes; can be used to add either forward or backward links.
Definition at line 363 of file trie.cpp.
References BACKWARD_EDGE, FORWARD_EDGE, and remove_edge_linkage().
Referenced by add_word_to_dawg().
00367 { 00368 remove_edge_linkage(dawg, node1, node2, FORWARD_EDGE, character, word_end); 00369 00370 remove_edge_linkage(dawg, node2, node1, BACKWARD_EDGE, character, word_end); 00371 }
void remove_edge_linkage | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node, | |||
NODE_REF | next, | |||
INT32 | direction, | |||
char | character, | |||
INT32 | word_end | |||
) |
Remove this edge reference from this node & move the edge entries in this node to fill the gap.
Definition at line 378 of file trie.cpp.
References backward_edge, cprintf(), debug, edge_letter, edges_in_node(), end_of_word, FORWARD_EDGE, forward_edge, last_edge, move_edges, next_node, num_forward_edges(), print_dawg_node(), set_empty_edge, and set_last_flag.
Referenced by remove_edge().
00383 { 00384 INT32 forward_edges; 00385 INT32 num_edges; 00386 INT32 e = node; 00387 INT32 last_flag; 00388 00389 forward_edges = num_forward_edges (dawg, node); 00390 num_edges = edges_in_node (dawg, node); 00391 00392 for (e=node; e<node+num_edges; e++) { 00393 /* Is this the edge*/ 00394 if ((edge_letter (dawg, e) == character) && 00395 ((direction == FORWARD_EDGE) ? 00396 forward_edge(dawg,e) : backward_edge(dawg,e)) && 00397 (next_node (dawg, e) == next) && 00398 (word_end ? end_of_word(dawg,e) : (! end_of_word(dawg,e)))) { 00399 00400 if (debug) 00401 cprintf ("remove (edge = %ld, character is '%c')\n", 00402 e, edge_letter (dawg, e)); 00403 00404 /* Delete the slot */ 00405 last_flag = last_edge (dawg, e); 00406 set_empty_edge(dawg, e); 00407 move_edges (dawg, e+1, e, num_edges+node-e-1); 00408 /* Restore 'last' flag */ 00409 if (direction == FORWARD_EDGE) { 00410 if ((forward_edges - 1) && 00411 (forward_edges - 1 == e - node)) { 00412 set_last_flag (dawg, e - 1); 00413 } 00414 } 00415 else { 00416 if ((num_edges - forward_edges - 1) && 00417 (num_edges - 1 == e - node)) { 00418 set_last_flag (dawg, e - 1); 00419 } 00420 } 00421 if (debug) 00422 print_dawg_node(dawg, node); 00423 return; 00424 } 00425 } 00426 cprintf ("error: Could not find the edge to remove, %d\n", next); 00427 print_dawg_node(dawg, node); 00428 exit (1); 00429 }
INT32 room_in_node | ( | EDGE_ARRAY | dawg, | |
NODE_REF | node | |||
) |
Check to see if there is enough room left in this node for one more edge link which may be a forward or backward link.
Definition at line 436 of file trie.cpp.
References edge_occupied, edges_in_node(), FALSE, and TRUE.
00436 { 00437 EDGE_REF edge = node; 00438 00439 if (edge_occupied (dawg, edge + edges_in_node (dawg, node))) { 00440 return (FALSE); 00441 } 00442 else { 00443 return (TRUE); 00444 } 00445 }
INT32 edge_counter = 0 [static] |
INT32 move_counter = 0 [static] |
* (c) Copyright 1987, Hewlett-Packard Company. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.
Definition at line 34 of file trie.cpp.
Referenced by move_node(), and new_dawg_node().
INT32 new_counter = 0 [static] |