X-Authentication-Warning: delorie.com: mailnull set sender to djgpp-workers-bounces using -f Date: Tue, 18 Dec 2001 12:01:01 -0500 From: AAganichev AT netscape DOT net (Alexander Aganichev) To: djgpp-workers AT delorie DOT com Subject: Re: regcomp NLS fix Message-ID: <6F32F7D2.12300D8B.09ACFA57@netscape.net> X-Mailer: Atlas Mailer 1.0 Content-Type: multipart/mixed; boundary=-------6f340302123118bb6f340302123118bb Reply-To: djgpp-workers AT delorie DOT com ---------6f340302123118bb6f340302123118bb Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8bit Content-Disposition: inline Hans-Bernhard Broeker wrote: >> regcomp() sometimes crashes when NLS characters are in use. This patch >> seems to fix this (though I'm not sure whether NLS characters may be a >> problem in other places): >I'm quite sure there are. The real problem is that lots of calls of > functions inside regcomp are incorrect because they pass char >values into them without casting to unsigned char first. This would only >be correct if our libc assumed char == unsigned char, but unless I'm >totally misremembering things, the opposite is true. I have redone the patch (regex.nls+warning.diff) by casting all ctype.h function's arguments to unsigned char. Also I have fix the warnings caused by the missed brackets and unused values calculated. There's 4 more warnings in pedantic mode about possible use of uninitialized variables, but they seems to be safe to ignore. The second fix (regex.nls2.diff) is intended for the proper working in NLS capable environment. -- alexander aganichev url: http://aaganichev.narod.ru __________________________________________________________________ Your favorite stores, helpful shopping tools and great gift ideas. Experience the convenience of buying online with Shop AT Netscape! http://shopnow.netscape.com/ Get your own FREE, personal Netscape Mail account today at http://webmail.netscape.com/ ---------6f340302123118bb6f340302123118bb Content-Type: text/plain; charset=iso-8859-1; name="regex.nls+warning.diff" Content-Transfer-Encoding: 8bit Content-Disposition: inline; filename="regex.nls+warning.diff" Content-Description: regex.nls+warning.diff diff -ru regex.orig/cclass.h regex/cclass.h --- regex.orig/cclass.h Thu Jul 16 21:20:30 1998 +++ regex/cclass.h Tue Dec 18 19:03:20 2001 @@ -5,28 +5,28 @@ char *chars; char *multis; } cclasses[] = { - "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "", - "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "", - "blank", " \t", "", - "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "", - "digit", "0123456789", "", - "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", "" }, + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + "" }, + { "blank", " \t", "" }, + { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", "" }, + { "digit", "0123456789", "" }, + { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "", - "lower", "abcdefghijklmnopqrstuvwxyz", - "", - "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ + "" }, + { "lower", "abcdefghijklmnopqrstuvwxyz", + "" }, + { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "", - "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "", - "space", "\t\n\v\f\r ", "", - "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "", - "xdigit", "0123456789ABCDEFabcdef", - "", - NULL, 0, "" + "" }, + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "" }, + { "space", "\t\n\v\f\r ", "" }, + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "" }, + { "xdigit", "0123456789ABCDEFabcdef", + "" }, + { NULL, 0, "" } }; diff -ru regex.orig/cname.h regex/cname.h --- regex.orig/cname.h Thu Jul 16 21:20:30 1998 +++ regex/cname.h Tue Dec 18 18:09:58 2001 @@ -4,100 +4,101 @@ char *name; char code; } cnames[] = { - "NUL", '\0', - "SOH", '\001', - "STX", '\002', - "ETX", '\003', - "EOT", '\004', - "ENQ", '\005', - "ACK", '\006', - "BEL", '\007', - "alert", '\007', - "BS", '\010', - "backspace", '\b', - "HT", '\011', - "tab", '\t', - "LF", '\012', - "newline", '\n', - "VT", '\013', - "vertical-tab", '\v', - "FF", '\014', - "form-feed", '\f', - "CR", '\015', - "carriage-return", '\r', - "SO", '\016', - "SI", '\017', - "DLE", '\020', - "DC1", '\021', - "DC2", '\022', - "DC3", '\023', - "DC4", '\024', - "NAK", '\025', - "SYN", '\026', - "ETB", '\027', - "CAN", '\030', - "EM", '\031', - "SUB", '\032', - "ESC", '\033', - "IS4", '\034', - "FS", '\034', - "IS3", '\035', - "GS", '\035', - "IS2", '\036', - "RS", '\036', - "IS1", '\037', - "US", '\037', - "space", ' ', - "exclamation-mark", '!', - "quotation-mark", '"', - "number-sign", '#', - "dollar-sign", '$', - "percent-sign", '%', - "ampersand", '&', - "apostrophe", '\'', - "left-parenthesis", '(', - "right-parenthesis", ')', - "asterisk", '*', - "plus-sign", '+', - "comma", ',', - "hyphen", '-', - "hyphen-minus", '-', - "period", '.', - "full-stop", '.', - "slash", '/', - "solidus", '/', - "zero", '0', - "one", '1', - "two", '2', - "three", '3', - "four", '4', - "five", '5', - "six", '6', - "seven", '7', - "eight", '8', - "nine", '9', - "colon", ':', - "semicolon", ';', - "less-than-sign", '<', - "equals-sign", '=', - "greater-than-sign", '>', - "question-mark", '?', - "commercial-at", '@', - "left-square-bracket", '[', - "backslash", '\\', - "reverse-solidus", '\\', - "right-square-bracket", ']', - "circumflex", '^', - "circumflex-accent", '^', - "underscore", '_', - "low-line", '_', - "grave-accent", '`', - "left-brace", '{', - "left-curly-bracket", '{', - "vertical-line", '|', - "right-brace", '}', - "right-curly-bracket", '}', - "tilde", '~', - "DEL", '\177', - NULL, 0, + { "NUL", '\0' }, + { "SOH", '\001' }, + { "STX", '\002' }, + { "ETX", '\003' }, + { "EOT", '\004' }, + { "ENQ", '\005' }, + { "ACK", '\006' }, + { "BEL", '\007' }, + { "alert", '\007' }, + { "BS", '\010' }, + { "backspace", '\b' }, + { "HT", '\011' }, + { "tab", '\t' }, + { "LF", '\012' }, + { "newline", '\n' }, + { "VT", '\013' }, + { "vertical-tab", '\v' }, + { "FF", '\014' }, + { "form-feed", '\f' }, + { "CR", '\015' }, + { "carriage-return", '\r' }, + { "SO", '\016' }, + { "SI", '\017' }, + { "DLE", '\020' }, + { "DC1", '\021' }, + { "DC2", '\022' }, + { "DC3", '\023' }, + { "DC4", '\024' }, + { "NAK", '\025' }, + { "SYN", '\026' }, + { "ETB", '\027' }, + { "CAN", '\030' }, + { "EM", '\031' }, + { "SUB", '\032' }, + { "ESC", '\033' }, + { "IS4", '\034' }, + { "FS", '\034' }, + { "IS3", '\035' }, + { "GS", '\035' }, + { "IS2", '\036' }, + { "RS", '\036' }, + { "IS1", '\037' }, + { "US", '\037' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '\"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ',' }, + { "hyphen", '-' }, + { "hyphen-minus", '-' }, + { "period", '.' }, + { "full-stop", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, + { "less-than-sign", '<' }, + { "equals-sign", '=' }, + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "underscore", '_' }, + { "low-line", '_' }, + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "DEL", '\177' }, + { NULL, 0, } }; + diff -ru regex.orig/debug.c regex/debug.c --- regex.orig/debug.c Thu Jul 16 21:20:30 1998 +++ regex/debug.c Tue Dec 18 18:44:44 2001 @@ -235,7 +235,7 @@ { static char buf[10]; - if (isprint(ch) || ch == ' ') + if (isprint((unsigned char)ch) || ch == ' ') sprintf(buf, "%c", ch); else sprintf(buf, "\\%o", ch); diff -ru regex.orig/engine.c regex/engine.c --- regex.orig/engine.c Thu Jul 16 21:20:30 1998 +++ regex/engine.c Tue Dec 18 18:44:48 2001 @@ -1000,7 +1000,7 @@ { static char pbuf[10]; - if (isprint(ch) || ch == ' ') + if (isprint((unsigned char)ch) || ch == ' ') sprintf(pbuf, "%c", ch); else sprintf(pbuf, "\\%o", ch); diff -ru regex.orig/regcomp.c regex/regcomp.c --- regex.orig/regcomp.c Sat Jun 9 23:50:56 2001 +++ regex/regcomp.c Tue Dec 18 18:51:12 2001 @@ -53,10 +53,10 @@ #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) #define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) ((co) || SETERROR(e)) -#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) -#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) -#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define REQUIRE(co, e) { if(!(co)) SETERROR(e); } +#define MUSTSEE(c, e) REQUIRE(MORE() && PEEK() == (c), e) +#define MUSTEAT(c, e) REQUIRE(MORE() && GETNEXT() == (c), e) +#define MUSTNOTSEE(c, e) REQUIRE(!MORE() || PEEK() != (c), e) #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) @@ -312,7 +312,7 @@ ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ - REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); + REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); @@ -324,7 +324,7 @@ c = PEEK(); /* we call { a repetition if followed by a digit */ if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) )) + (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) )) return; /* no repetition, we're done */ NEXT(); @@ -353,7 +353,7 @@ case '{': count = p_count(p); if (EAT(',')) { - if (isdigit(PEEK())) { + if (isdigit((unsigned char)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ @@ -374,7 +374,7 @@ return; c = PEEK(); if (!( c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())) ) ) + (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) ) return; SETERROR(REG_BADRPT); } @@ -531,7 +531,7 @@ } else if (EATTWO('\\', '{')) { count = p_count(p); if (EAT(',')) { - if (MORE() && isdigit(PEEK())) { + if (MORE() && isdigit((unsigned char)PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ @@ -562,7 +562,7 @@ register int count = 0; register int ndigits = 0; - while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { + while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) { count = count*10 + (GETNEXT() - '0'); ndigits++; } @@ -617,7 +617,7 @@ register int ci; for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i) && isalpha(i)) { + if (CHIN(cs, i) && isalpha((unsigned char)i)) { ci = othercase(i); if (ci != i) CHadd(cs, ci); @@ -729,7 +729,7 @@ register char *u; register char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && isalpha((unsigned char)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -822,11 +822,11 @@ othercase(ch) int ch; { - assert(isalpha(ch)); - if (isupper(ch)) - return(tolower(ch)); - else if (islower(ch)) - return(toupper(ch)); + assert(isalpha((unsigned char)ch)); + if (isupper((unsigned char)ch)) + return(tolower((unsigned char)ch)); + else if (islower((unsigned char)ch)) + return(toupper((unsigned char)ch)); else /* peculiar, but could happen */ return(ch); } @@ -865,11 +865,11 @@ static void ordinary(p, ch) register struct parse *p; -register int ch; { register cat_t *cap = p->g->categories; - if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + if ((p->g->cflags®_ICASE) && isalpha((unsigned char)ch) && + othercase(ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); @@ -1169,64 +1169,6 @@ (void) strcpy(cs->multis + oldend - 1, cp); cs->multis[cs->smultis - 1] = '\0'; -} - -/* - - mcsub - subtract a collating element from a cset - == static void mcsub(register cset *cs, register char *cp); - */ -static void -mcsub(cs, cp) -register cset *cs; -register char *cp; -{ - register char *fp = mcfind(cs, cp); - register size_t len = strlen(fp); - - assert(fp != NULL); - (void) memmove(fp, fp + len + 1, - cs->smultis - (fp + len + 1 - cs->multis)); - cs->smultis -= len; - - if (cs->smultis == 0) { - free(cs->multis); - cs->multis = NULL; - return; - } - - cs->multis = realloc(cs->multis, cs->smultis); - assert(cs->multis != NULL); -} - -/* - - mcin - is a collating element in a cset? - == static int mcin(register cset *cs, register char *cp); - */ -static int -mcin(cs, cp) -register cset *cs; -register char *cp; -{ - return(mcfind(cs, cp) != NULL); -} - -/* - - mcfind - find a collating element in a cset - == static char *mcfind(register cset *cs, register char *cp); - */ -static char * -mcfind(cs, cp) -register cset *cs; -register char *cp; -{ - register char *p; - - if (cs->multis == NULL) - return(NULL); - for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) - if (strcmp(cp, p) == 0) - return(p); - return(NULL); } /* diff -ru regex.orig/regcomp.ih regex/regcomp.ih --- regex.orig/regcomp.ih Fri Jul 17 23:32:04 1998 +++ regex/regcomp.ih Tue Dec 18 18:00:12 2001 @@ -28,9 +28,6 @@ static int firstch(register struct parse *p, register cset *cs); static int nch(register struct parse *p, register cset *cs); static void mcadd(register struct parse *p, register cset *cs, register char *cp); -static void mcsub(register cset *cs, register char *cp); -static int mcin(register cset *cs, register char *cp); -static char *mcfind(register cset *cs, register char *cp); static void mcinvert(register struct parse *p, register cset *cs); static void mccase(register struct parse *p, register cset *cs); static int isinsets(register struct re_guts *g, int c); diff -ru regex.orig/regerror.c regex/regerror.c --- regex.orig/regerror.c Thu Jul 16 21:20:30 1998 +++ regex/regerror.c Tue Dec 18 17:52:34 2001 @@ -36,24 +36,24 @@ char *name; char *explain; } rerrs[] = { - REG_OKAY, "REG_OKAY", "no errors detected", - REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", - REG_BADPAT, "REG_BADPAT", "invalid regular expression", - REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", - REG_ECTYPE, "REG_ECTYPE", "invalid character class", - REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", - REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", - REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", - REG_EPAREN, "REG_EPAREN", "parentheses not balanced", - REG_EBRACE, "REG_EBRACE", "braces not balanced", - REG_BADBR, "REG_BADBR", "invalid repetition count(s)", - REG_ERANGE, "REG_ERANGE", "invalid character range", - REG_ESPACE, "REG_ESPACE", "out of memory", - REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", - REG_EMPTY, "REG_EMPTY", "empty (sub)expression", - REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", - REG_INVARG, "REG_INVARG", "invalid argument to regex routine", - -1, "", "*** unknown regexp error code ***", + { REG_OKAY, "REG_OKAY", "no errors detected" }, + { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { -1, "", "*** unknown regexp error code ***" } }; /* diff -ru regex.orig/regex2.h regex/regex2.h --- regex.orig/regex2.h Thu Jul 16 21:20:30 1998 +++ regex/regex2.h Tue Dec 18 18:42:56 2001 @@ -132,4 +132,4 @@ /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') +#define ISWORD(c) (isalnum((unsigned char)c) || (c) == '_') diff -ru regex.orig/regexec.c regex/regexec.c --- regex.orig/regexec.c Thu Jul 16 21:20:30 1998 +++ regex/regexec.c Tue Dec 18 17:50:12 2001 @@ -17,7 +17,9 @@ #include "utils.h" #include "regex2.h" +#ifndef NDEBUG static int nope = 0; /* for use in asserts; shuts lint up */ +#endif /* macros for manipulating states, small version */ #define states unsigned ---------6f340302123118bb6f340302123118bb Content-Type: text/plain; charset=iso-8859-1; name="regex.nls2.diff" Content-Transfer-Encoding: 8bit Content-Disposition: inline; filename="regex.nls2.diff" Content-Description: regex.nls2.diff --- regex/cclass.orig Tue Dec 18 19:03:20 2001 +++ regex/cclass.h Tue Dec 18 19:07:00 2001 @@ -1,32 +1,26 @@ /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ /* character-class table */ +typedef enum { + CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, + CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT +} cclasstype; + static struct cclass { char *name; - char *chars; + cclasstype type; char *multis; } cclasses[] = { - { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", "" }, - { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - "" }, - { "blank", " \t", "" }, - { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", "" }, - { "digit", "0123456789", "" }, - { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "lower", "abcdefghijklmnopqrstuvwxyz", - "" }, - { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - "" }, - { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - "" }, - { "space", "\t\n\v\f\r ", "" }, - { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - "" }, - { "xdigit", "0123456789ABCDEFabcdef", - "" }, + { "alnum", CC_ALNUM, "" }, + { "alpha", CC_ALPHA, "" }, + { "blank", CC_BLANK, "" }, + { "cntrl", CC_CNTRL, "" }, + { "digit", CC_DIGIT, "" }, + { "graph", CC_GRAPH, "" }, + { "lower", CC_LOWER, "" }, + { "print", CC_PRINT, "" }, + { "punct", CC_PUNCT, "" }, + { "space", CC_SPACE, "" }, + { "upper", CC_UPPER, "" }, + { "xdigit", CC_XDIGIT, "" }, { NULL, 0, "" } }; --- regex/regcomp.orig Tue Dec 18 18:51:12 2001 +++ regex/regcomp.c Tue Dec 18 19:22:26 2001 @@ -727,7 +727,7 @@ register struct cclass *cp; register size_t len; register char *u; - register char c; + register int c; while (MORE() && isalpha((unsigned char)PEEK())) NEXT(); @@ -741,9 +741,73 @@ return; } - u = cp->chars; - while ((c = *u++) != '\0') - CHadd(cs, c); + switch(cp->type) { + case CC_ALNUM: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isalnum((unsigned char)c)) + CHadd(cs, c); + break; + case CC_ALPHA: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isalpha((unsigned char)c)) + CHadd(cs, c); + break; + case CC_BLANK: + /* + * According to the documentation "blank" is the same as + * "space", but original code defines only space and tab + * characters. Who is right? + */ + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isspace((unsigned char)c)) + CHadd(cs, c); + break; + case CC_CNTRL: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(iscntrl((unsigned char)c)) + CHadd(cs, c); + break; + case CC_DIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isdigit((unsigned char)c)) + CHadd(cs, c); + break; + case CC_GRAPH: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isgraph((unsigned char)c)) + CHadd(cs, c); + break; + case CC_LOWER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(islower((unsigned char)c)) + CHadd(cs, c); + break; + case CC_PRINT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isprint((unsigned char)c)) + CHadd(cs, c); + break; + case CC_PUNCT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(ispunct((unsigned char)c)) + CHadd(cs, c); + break; + case CC_SPACE: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isspace((unsigned char)c)) + CHadd(cs, c); + break; + case CC_UPPER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isupper((unsigned char)c)) + CHadd(cs, c); + break; + case CC_XDIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if(isxdigit((unsigned char)c)) + CHadd(cs, c); + break; + } for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u); } ---------6f340302123118bb6f340302123118bb--