00001
00020 #ifndef DAWG_H
00021 #define DAWG_H
00022
00023
00024
00025
00026 #include <ctype.h>
00027 #include "general.h"
00028
00207
00208
00209
00217 #define MAX_WERD_LENGTH (INT32) 40
00218
00224 #define MAX_NODE_EDGES (INT32) 100
00225 #define LAST_FLAG (INT32) 1
00226 #define DIRECTION_FLAG (INT32) 2
00227
00228 #define WERD_END_FLAG (INT32) 4
00229
00230 #define FLAG_START_BIT 21
00231
00239 #define LETTER_START_BIT 24
00240
00245 #define NO_EDGE (INT32) 0x1fffff
00246
00251 typedef UINT32 EDGE_RECORD;
00256 typedef EDGE_RECORD *EDGE_ARRAY;
00261 typedef INT32 EDGE_REF;
00266 typedef INT32 NODE_REF;
00267
00268
00269
00270
00272 extern INT32 case_sensative;
00273 extern INT32 debug;
00274
00275
00276
00277
00281 #define next_node(edges,e) \
00282 ((edges)[e] & NO_EDGE)
00283
00287 #define set_next_edge(edges,e,value) \
00288 ( (edges)[e] = ((edges)[e] & (INT32) 0xffe00000) | (value & NO_EDGE) )
00289
00293 #define set_empty_edge(edges,e) \
00294 ((edges)[e] = NO_EDGE)
00295
00299 #define clear_all_edges(dawg,edge,max_num_edges) \
00300 for (edge=0; edge<max_num_edges; edge++) \
00301 set_empty_edge (dawg, edge);
00302
00306 #define edge_occupied(edges,e) \
00307 ((edges)[e] != NO_EDGE)
00308
00312 #define edge_letter(edges,e) \
00313 ((edges)[e] >> LETTER_START_BIT)
00314
00320 #define last_edge(edges,e) \
00321 ((edges)[e] & (LAST_FLAG << FLAG_START_BIT))
00322
00326 #define end_of_word(edges,e) \
00327 ((edges)[e] & (WERD_END_FLAG << FLAG_START_BIT))
00328
00332 #define forward_edge(edges,e) \
00333 ((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT) && \
00334 edge_occupied (edges,e))
00335
00339 #define backward_edge(edges,e) \
00340 (! ((edges)[e] & (DIRECTION_FLAG << FLAG_START_BIT)) && \
00341 edge_occupied (edges,e))
00342
00348 #define edge_loop(edges,e) \
00349 while (! last_edge (edges,e++))
00350
00358 #define case_is_okay(word,i) \
00359 (i ? \
00360 ((isupper(word[i]) && islower(word[i-1])) ? \
00361 FALSE : \
00362 ((islower(word[i]) && isupper(word[i-1]) && \
00363 i>1 && isalpha (word[i-2])) ? \
00364 FALSE : \
00365 TRUE)) : \
00366 TRUE)
00367
00371 #define trailing_punc(ch) \
00372 ((ch == '}' ) || \
00373 (ch == ':' ) || \
00374 (ch == ';' ) || \
00375 (ch == '-' ) || \
00376 (ch == ']' ) || \
00377 (ch == '!' ) || \
00378 (ch == '?' ) || \
00379 (ch == '`' ) || \
00380 (ch == ',' ) || \
00381 (ch == '.' ) || \
00382 (ch == ')' ) || \
00383 (ch == '\"' ) || \
00384 (ch == '\'' ))
00385
00389 #define leading_punc(ch) \
00390 ((ch == '\"' ) || \
00391 (ch == '(' ) || \
00392 (ch == '{' ) || \
00393 (ch == '[' ) || \
00394 (ch == '`' ) || \
00395 (ch == '\'' ))
00396
00397
00398
00399
00400 EDGE_REF edge_char_of(EDGE_ARRAY dawg,
00401 NODE_REF node,
00402 int character,
00403 int word_end);
00404
00405 INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
00406
00407 INT32 letter_is_okay(EDGE_ARRAY dawg,
00408 NODE_REF *node,
00409 INT32 char_index,
00410 char prevchar,
00411 const char *word,
00412 INT32 word_end);
00413
00414 INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
00415
00416 void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
00417
00418 void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges);
00419
00420 INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);
00421
00422 INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473 #endif