ccutil/scanutils.cpp

Go to the documentation of this file.
00001 
00031 #include <ctype.h>
00032 #include <stdarg.h>
00033 #include <stddef.h>
00034 #include <inttypes.h>
00035 #include <string.h>
00036 #include <limits.h>
00037 #include <stdio.h>
00038 #include <sys/types.h>
00039 #include <sys/stat.h>
00040 #include <fcntl.h>
00041 
00042 #include "scanutils.h"
00043 
00048 enum Flags {
00049   FL_SPLAT  = 0x01,   // Drop the value, do not assign
00050   FL_INV    = 0x02,   // Character-set with inverse
00051   FL_WIDTH  = 0x04,   // Field width specified
00052   FL_MINUS  = 0x08,   // Negative number
00053 };
00054 
00059 enum Ranks {
00060   RANK_CHAR = -2,
00061   RANK_SHORT  = -1,
00062   RANK_INT  = 0,
00063   RANK_LONG = 1,
00064   RANK_LONGLONG = 2,
00065   RANK_PTR = INT_MAX // Special value used for pointers
00066 };
00067 
00068 const enum Ranks kMinRank = RANK_CHAR;
00069 const enum Ranks kMaxRank = RANK_LONGLONG;
00070 
00071 const enum Ranks kIntMaxRank = RANK_LONGLONG;
00072 const enum Ranks kSizeTRank = RANK_LONG;
00073 const enum Ranks kPtrDiffRank = RANK_LONG;
00074 
00079 enum Bail {
00080   BAIL_NONE = 0,    // No error condition
00081   BAIL_EOF,         // Hit EOF
00082   BAIL_ERR          // Conversion mismatch
00083 };
00084 
00088 inline size_t LongBit() {
00089   return CHAR_BIT * sizeof(long);
00090 }
00091 
00095 static inline int
00096 SkipSpace(FILE *s)
00097 {
00098   int p;
00099   while (isspace(p = fgetc(s)));
00100   ungetc(p, s);  // Make sure next char is available for reading
00101   return p;
00102 }
00103 
00107 static inline void
00108 SetBit(unsigned long *bitmap, unsigned int bit)
00109 {
00110   bitmap[bit/LongBit()] |= 1UL << (bit%LongBit());
00111 }
00112 
00116 static inline int
00117 TestBit(unsigned long *bitmap, unsigned int bit)
00118 {
00119   return static_cast<int>(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1;
00120 }
00121 
00125 static inline int DigitValue(int ch)
00126 {
00127   if (ch >= '0' && ch <= '9') {
00128     return ch-'0';
00129   } else if (ch >= 'A' && ch <= 'Z') {
00130     return ch-'A'+10;
00131   } else if (ch >= 'a' && ch <= 'z') {
00132     return ch-'a'+10;
00133   } else {
00134     return -1;
00135   }
00136 }
00137 
00145 uintmax_t streamtoumax(FILE* s, int base)
00146 {
00147   int minus = 0;
00148   uintmax_t v = 0;
00149   int d, c = 0;
00150 
00151   for (c = fgetc(s); 
00152     isspace(static_cast<unsigned char>(c)) && (c != EOF); 
00153     c = fgetc(s)) 
00154   
00155   // Single optional + or - 
00156   if (c == '-' || c == '+') {
00157     minus = (c == '-');
00158     c = fgetc(s);
00159   }
00160 
00161   // Assign correct base
00162   if (base == 0) {
00163     if (c == '0') {
00164       c = fgetc(s);
00165       if (c == 'x' || c == 'X') {
00166         base = 16;
00167         c = fgetc(s);
00168       } else {
00169         base = 8;
00170       }
00171     }
00172   } else if (base == 16) {
00173     if (c == '0') {
00174       c = fgetc(s);
00175       if (c == 'x' && c == 'X') c = fgetc(s);
00176     }
00177   }
00178 
00179   // Actual number parsing
00180   for (; (c != EOF) && (d = DigitValue(c)) >= 0 && d < base; c = fgetc(s))
00181     v = v*base + d;
00182 
00183   ungetc(c, s);
00184   return minus ? -v : v;
00185 }
00186 
00195 double streamtofloat(FILE* s)
00196 {
00197   int minus = 0;
00198   int v = 0;
00199   int d, c = 0;
00200   int k = 1;
00201   int w = 0;
00202 
00203   for (c = fgetc(s); 
00204     isspace(static_cast<unsigned char>(c)) && (c != EOF); 
00205     c = fgetc(s)); 
00206   
00207   // Single optional + or -
00208   if (c == '-' || c == '+') {
00209     minus = (c == '-');
00210     c = fgetc(s);
00211   }
00212 
00213   // Actual number parsing
00214   for (; (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s))
00215     v = v*10 + d;
00216   if (c == '.') {
00217     for (c = fgetc(s); (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) {
00218       w = w*10 + d;
00219       k *= 10;
00220     }
00221   } else if (c == 'e' || c == 'E') 
00222     printf("WARNING: Scientific Notation not supported!");
00223   
00224   ungetc(c, s);
00225   double f  = static_cast<double>(v) 
00226             + static_cast<double>(w) / static_cast<double>(k);
00227   
00228   return minus ? -f : f;
00229 }
00230 
00239 double strtofloat(const char* s)
00240 {
00241   int minus = 0;
00242   int v = 0;
00243   int d, c;
00244   int k = 1;
00245   int w = 0;
00246 
00247   while(*s && isspace(static_cast<unsigned char>(*s))) s++;
00248 
00249   // Single optional + or -
00250   if (*s == '-' || *s == '+') {
00251     minus = (*s == '-');
00252     s++;
00253   }
00254 
00255   // Actual number parsing
00256   for (; *s && (d = DigitValue(*s)) >= 0; s++)
00257     v = v*10 + d;
00258   if (*s == '.') {
00259     for (++s; *s && (d = DigitValue(*s)) >= 0; s++) {
00260       w = w*10 + d;
00261       k *= 10;
00262     }
00263   } else if (*s == 'e' || *s == 'E') 
00264     printf("WARNING: Scientific Notation not supported!");
00265   
00266   double f  = static_cast<double>(v) 
00267             + static_cast<double>(w) / static_cast<double>(k);
00268  
00269   return minus ? -f : f;
00270 }
00271 
00279 int fscanf(FILE* stream, const char *format, ...)
00280 {
00281   va_list ap;
00282   int rv;
00283 
00284   va_start(ap, format);
00285   rv = vfscanf(stream, format, ap);
00286   va_end(ap);
00287 
00288   return rv;
00289 }
00290 
00299 int vfscanf(FILE* stream, const char *format, va_list ap)
00300 {
00301   const char *p = format;
00302   char ch;
00303   int q = 0;
00304   uintmax_t val = 0;
00305   int rank = RANK_INT;    // Default rank
00306   unsigned int width = UINT_MAX;
00307   int base;
00308   int flags = 0;
00309   enum {
00310     ST_NORMAL,        // Ground state
00311     ST_FLAGS,         // Special flags
00312     ST_WIDTH,         // Field width
00313     ST_MODIFIERS,     // Length or conversion modifiers
00314     ST_MATCH_INIT,    // Initial state of %[ sequence
00315     ST_MATCH,         // Main state of %[ sequence
00316     ST_MATCH_RANGE,   // After - in a %[ sequence
00317   } state = ST_NORMAL;
00318   char *oarg, *sarg = NULL;    // %s %c or %[ string argument
00319   enum Bail bail = BAIL_NONE;
00320   int sign;
00321   int converted = 0;    // Successful conversions
00322   unsigned long matchmap[((1 << CHAR_BIT)+(LongBit()-1))/LongBit()];
00323   int matchinv = 0;   // Is match map inverted?
00324   unsigned char range_start = 0;
00325   off_t start_off = ftell(stream);
00326 
00327   // Skip leading spaces
00328   SkipSpace(stream);
00329   
00330   while ((ch = *p++) && !bail) {
00331     switch (state) {
00332       case ST_NORMAL:
00333         if (ch == '%') {
00334           state = ST_FLAGS;
00335           flags = 0; rank = RANK_INT; width = UINT_MAX;
00336         } else if (isspace(static_cast<unsigned char>(ch))) {
00337           SkipSpace(stream);
00338         } else {
00339           if (fgetc(stream) != ch) 
00340             bail = BAIL_ERR;  // Match failure 
00341         }
00342         break;
00343 
00344       case ST_FLAGS:
00345         switch (ch) {
00346           case '*':
00347             flags |= FL_SPLAT;
00348           break;
00349 
00350           case '0' ... '9':
00351             width = (ch-'0');
00352             state = ST_WIDTH;
00353             flags |= FL_WIDTH;
00354           break;
00355 
00356           default:
00357             state = ST_MODIFIERS;
00358             p--;      // Process this character again
00359           break;
00360         }
00361       break;
00362 
00363       case ST_WIDTH:
00364         if (ch >= '0' && ch <= '9') {
00365           width = width*10+(ch-'0');
00366         } else {
00367           state = ST_MODIFIERS;
00368           p--;      // Process this character again
00369         }
00370       break;
00371 
00372       case ST_MODIFIERS:
00373         switch (ch) {
00374           // Length modifiers - nonterminal sequences
00375           case 'h':
00376             rank--;     // Shorter rank
00377           break;
00378           case 'l':
00379             rank++;     // Longer rank
00380           break;
00381           case 'j':
00382             rank = kIntMaxRank;
00383           break;
00384           case 'z':
00385             rank = kSizeTRank;
00386           break;
00387           case 't':
00388             rank = kPtrDiffRank;
00389           break;
00390           case 'L':
00391           case 'q':
00392             rank = RANK_LONGLONG; // long double/long long
00393           break;
00394 
00395           default:
00396             // Output modifiers - terminal sequences
00397             state = ST_NORMAL;  // Next state will be normal
00398             if (rank < kMinRank)  // Canonicalize rank
00399               rank = kMinRank;
00400             else if (rank > kMaxRank)
00401               rank = kMaxRank;
00402 
00403           switch (ch) {
00404             case 'P':   // Upper case pointer
00405             case 'p':   // Pointer
00406               rank = RANK_PTR;
00407               base = 0; sign = 0;
00408             goto scan_int;
00409         
00410             case 'i':   // Base-independent integer
00411               base = 0; sign = 1;
00412             goto scan_int;
00413         
00414             case 'd':   // Decimal integer
00415               base = 10; sign = 1;
00416             goto scan_int;
00417         
00418             case 'o':   // Octal integer
00419               base = 8; sign = 0;
00420             goto scan_int;
00421         
00422             case 'u':   // Unsigned decimal integer
00423               base = 10; sign = 0;
00424             goto scan_int;
00425             
00426             case 'x':   // Hexadecimal integer
00427             case 'X':
00428               base = 16; sign = 0;
00429             goto scan_int;
00430         
00431             case 'n':   // Number of characters consumed
00432               val = ftell(stream) - start_off;
00433             goto set_integer;
00434         
00435             scan_int:
00436               q = SkipSpace(stream);
00437               if ( q <= 0 ) {
00438                 bail = BAIL_EOF;
00439                 break;
00440               }
00441               val = streamtoumax(stream, base);
00442               converted++;
00443               // fall through
00444 
00445             set_integer:
00446               if (!(flags & FL_SPLAT)) {
00447                 switch(rank) {
00448                   case RANK_CHAR:
00449                     *va_arg(ap, unsigned char *) 
00450                       = static_cast<unsigned char>(val);
00451                   break;
00452                   case RANK_SHORT:
00453                     *va_arg(ap, unsigned short *) 
00454                       = static_cast<unsigned short>(val);
00455                   break;
00456                   case RANK_INT:
00457                     *va_arg(ap, unsigned int *) 
00458                       = static_cast<unsigned int>(val);
00459                   break;
00460                   case RANK_LONG:
00461                     *va_arg(ap, unsigned long *) 
00462                       = static_cast<unsigned long>(val);
00463                   break;
00464                   case RANK_LONGLONG:
00465                     *va_arg(ap, unsigned long long *) 
00466                       = static_cast<unsigned long long>(val);
00467                   break;
00468                   case RANK_PTR:
00469                     *va_arg(ap, void **) 
00470                       = reinterpret_cast<void *>(static_cast<uintptr_t>(val));
00471                   break;
00472                 }
00473               }
00474             break;
00475 
00476             case 'f':   // Preliminary float value parsing
00477             case 'g':
00478             case 'G':
00479             case 'e':
00480             case 'E':
00481               q = SkipSpace(stream);
00482               if (q <= 0) {
00483                 bail = BAIL_EOF;
00484                 break;
00485               }
00486 
00487               double fval = streamtofloat(stream);
00488               switch(rank) {
00489                 case RANK_INT:
00490                   *va_arg(ap, float *) = static_cast<float>(fval);
00491                 break;
00492                 case RANK_LONG:
00493                   *va_arg(ap, double *) = static_cast<double>(fval);
00494                 break;
00495               }
00496               converted++;
00497             break;
00498 
00499             case 'c':               // Character
00500               width = (flags & FL_WIDTH) ? width : 1; // Default width == 1
00501               sarg = va_arg(ap, char *);
00502               while (width--) {
00503                 if ((q = fgetc(stream)) <= 0) {
00504                   bail = BAIL_EOF;
00505                   break;
00506                 }
00507                 *sarg++ = q;
00508               }
00509               if (!bail)
00510                 converted++;
00511             break;
00512       
00513             case 's':               // String
00514             {
00515               char *sp;
00516               sp = sarg = va_arg(ap, char *);
00517               while (width--) {
00518                 q = fgetc(stream);
00519                 if (isspace(static_cast<unsigned char>(q)) || q <= 0) {
00520                   ungetc(q, stream);
00521                   break;
00522                 }
00523                 *sp++ = q;
00524               }
00525               if (sarg != sp) {
00526                 *sp = '\0'; // Terminate output
00527                 converted++;
00528               } else {
00529                 bail = BAIL_EOF;
00530               }
00531             }
00532             break;
00533           
00534             case '[':   // Character range
00535               sarg = va_arg(ap, char *);
00536               state = ST_MATCH_INIT;
00537               matchinv = 0;
00538               memset(matchmap, 0, sizeof matchmap);
00539             break;
00540       
00541             case '%':   // %% sequence
00542               if (fgetc(stream) != '%' ) 
00543                 bail = BAIL_ERR;
00544             break;
00545       
00546             default:    // Anything else
00547               bail = BAIL_ERR;  // Unknown sequence 
00548             break;
00549           }
00550         }
00551       break;
00552 
00553       case ST_MATCH_INIT:   // Initial state for %[ match
00554         if (ch == '^' && !(flags & FL_INV)) {
00555           matchinv = 1;
00556         } else {
00557           SetBit(matchmap, static_cast<unsigned char>(ch));
00558           state = ST_MATCH;
00559         }
00560       break;
00561   
00562       case ST_MATCH:    // Main state for %[ match
00563         if (ch == ']') {
00564           goto match_run;
00565         } else if (ch == '-') {
00566           range_start = static_cast<unsigned char>(ch);
00567           state = ST_MATCH_RANGE;
00568         } else {
00569           SetBit(matchmap, static_cast<unsigned char>(ch));
00570         }
00571       break;
00572   
00573       case ST_MATCH_RANGE:    // %[ match after -
00574         if (ch == ']') {
00575           SetBit(matchmap, static_cast<unsigned char>('-'));
00576           goto match_run;
00577         } else {
00578           int i;
00579           for (i = range_start ; i < (static_cast<unsigned char>(ch)) ; i++)
00580           SetBit(matchmap, i);
00581           state = ST_MATCH;
00582         }
00583       break;
00584 
00585       match_run:      // Match expression finished
00586         char* oarg = sarg;
00587         while (width) {
00588           q = fgetc(stream);
00589           unsigned char qc = static_cast<unsigned char>(q);
00590           if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) {
00591             ungetc(q, stream);
00592             break;
00593           }
00594           *sarg++ = q;
00595         }
00596         if (oarg != sarg) {
00597           *sarg = '\0';
00598           converted++;
00599         } else {
00600           bail = (q <= 0) ? BAIL_EOF : BAIL_ERR;
00601         }
00602       break;
00603     }
00604   }
00605 
00606   if (bail == BAIL_EOF && !converted)
00607     converted = -1;   // Return EOF (-1)
00608 
00609   return converted;
00610 }
00611 
00618 int creat(const char *pathname, mode_t mode) 
00619 {
00620   return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode);
00621 }

Generated on Wed Feb 28 19:49:09 2007 for Tesseract by  doxygen 1.5.1