X-Authentication-Warning: delorie.com: mail set sender to djgpp-workers-bounces using -f X-Recipient: djgpp-workers AT delorie DOT com X-Authenticated: #27081556 X-Provags-ID: V01U2FsdGVkX19b+w54lepb03I5BSOXg6F3Pwzr3cZOlCNv5ocMBm KezFNz7dkpsNvR Message-ID: <50D1FFA4.5060008@gmx.de> Date: Wed, 19 Dec 2012 18:55:48 +0100 From: Juan Manuel Guerrero User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20121025 Thunderbird/16.0.2 MIME-Version: 1.0 To: djgpp-workers AT delorie DOT com Subject: Implementation of %m modifier character for scanf family of functions. Content-Type: text/plain; charset=ISO-8859-15; format=flowed Content-Transfer-Encoding: 7bit X-Y-GMX-Trusted: 0 Reply-To: djgpp-workers AT delorie DOT com To ease porting gnu programs I have implemented the m modifier character. It applies to character/string conversion specifiers and relieves the caller of the need to allocate a corresponding buffer to hold the input. The m modifier character is a GNU glibc extension and is specified in the upcoming revision of the POSIX.1 standard. Regards, Juan M. Guerrero # cvs ci -m"Info about m modifier character added." djgpp/src/docs/kb/wc204.txi # cvs ci -m"Support for m modifier character added." djgpp/src/libc/ansi/stdio/doscan.c # cvs ci -m"Info about m modifier character added." djgpp/src/libc/ansi/stdio/scanf.txh # cvs ci -m"Added new test file tscanf2.c." djgpp/tests/libc/ansi/stdio/makefile # cvs add djgpp/tests/libc/ansi/stdio/tscanf2.c # cvs ci -m"Test for m modifier character." djgpp/tests/libc/ansi/stdio/tscanf2.c 2012-12-16 Juan Manuel Guerrero * djgpp/src/docs/kb/wc204.txi: Info about m modifier character added. * djgpp/src/libc/ansi/stdio/doscan.c: Support for m modifier character added. * djgpp/src/libc/ansi/stdio/scanf.txh: Info about m modifier character added. * djgpp/tests/libc/ansi/stdio/makefile: Added new test file tscanf2.c. * djgpp/tests/libc/ansi/stdio/tscanf2.c: Test for m modifier character. diff -aprNU5 djgpp.orig/src/docs/kb/wc204.txi djgpp/src/docs/kb/wc204.txi --- djgpp.orig/src/docs/kb/wc204.txi 2012-12-10 20:29:16 +0000 +++ djgpp/src/docs/kb/wc204.txi 2012-12-16 22:35:22 +0000 @@ -1254,5 +1254,12 @@ and their contents are discarded. @findex _doscan AT r{, and C99 conversion specifiers} @findex scanf AT r{, and C99 conversion specifiers} The @code{a}, @code{A} and @code{F} conversion specifiers are now supported by @code{_doscan} and the @code{scanf} family of functions. + +@findex _doscan AT r{, and m modifier character} +@findex scanf AT r{, and m modifier character} +The @code{m} modifier character is now supported by @code{_doscan} +and the @code{scanf} family of functions. This is a @acronym{GNU} +@code{glibc} extension and it is specified in the upcoming revision +of the @acronym{POSIX.1} standard. diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/doscan.c djgpp/src/libc/ansi/stdio/doscan.c --- djgpp.orig/src/libc/ansi/stdio/doscan.c 2012-12-11 23:35:32 +0000 +++ djgpp/src/libc/ansi/stdio/doscan.c 2012-12-17 23:51:48 +0000 @@ -8,31 +8,37 @@ #include #include #include #include #include +#include +#include +#include #include #include -#define SPC 01 -#define STP 02 +#define SPC 01 +#define STP 02 -#define CHAR 0 -#define SHORT 1 -#define REGULAR 2 -#define LONG 4 -#define LONGDOUBLE 8 +#define CHAR 0 +#define SHORT 1 +#define REGULAR 2 +#define LONG 4 +#define LONGDOUBLE 8 + +#define INT 0 +#define FLOAT 1 + +#define DEFAULT_LENGTH 30000 -#define INT 0 -#define FLOAT 1 static int _innum(int *ptr, int type, int len, int size, FILE *iop, int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), - int *eofptr); + int *eofptr, const bool allocate_char_buffer); static int _instr(char *ptr, int type, int len, FILE *iop, int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), - int *eofptr); + int *eofptr, const bool allocate_char_buffer); static const char *_getccl(const unsigned char *s); static char _sctab[256] = { 0,0,0,0,0,0,0,0, 0,SPC,SPC,SPC,SPC,SPC,0,0, @@ -58,27 +64,32 @@ _doscan_low(FILE *iop, int (*scan_getc)( { register int ch; int nmatch, len, ch1; int *ptr, fileended, size; int suppressed; + bool allocate_char_buffer; + int previous_errno = errno; decimal_point = localeconv()->decimal_point[0]; nchars = 0; nmatch = 0; fileended = 0; suppressed = 0; + errno = 0; for (;;) { switch (ch = *fmt++) { case '\0': return nmatch; case '%': if ((ch = *fmt++) == '%') goto def; - ptr = 0; + + allocate_char_buffer = false; + ptr = NULL; if (ch != '*') ptr = va_arg(argp, int *); else ch = *fmt++; len = 0; @@ -87,11 +98,11 @@ _doscan_low(FILE *iop, int (*scan_getc)( { len = len * 10 + ch - '0'; ch = *fmt++; } if (len == 0) - len = 30000; + len = DEFAULT_LENGTH; if (ch == 'l') { size = LONG; ch = *fmt++; @@ -134,10 +145,18 @@ _doscan_low(FILE *iop, int (*scan_getc)( { /* C99 */ size = REGULAR; ch = *fmt++; } + else if (ch == 'm') + { + /* POSIX.1 and GNU glibc extension */ + allocate_char_buffer = true; + ch = *fmt++; + if (ch == '[') + fmt = _getccl((const unsigned char *)fmt); + } else if (ch == '[') fmt = _getccl((const unsigned char *)fmt); if (isupper(ch & 0xff)) { @@ -180,21 +199,23 @@ _doscan_low(FILE *iop, int (*scan_getc)( else *(int *)ptr = nchars; break; } - if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended)) + if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended, allocate_char_buffer)) { if (ptr) nmatch++; else suppressed = 1; } else { - if (fileended && !nmatch && !suppressed) + if ((fileended && !nmatch && !suppressed) || (allocate_char_buffer && errno == ENOMEM)) return EOF; + + errno = previous_errno; return nmatch; } break; case ' ': case '\n': @@ -215,11 +236,11 @@ _doscan_low(FILE *iop, int (*scan_getc)( def: ch1 = scan_getc(iop); if (ch1 != EOF) nchars++; if (ch1 != ch) { - if (ch1==EOF) + if (ch1 == EOF) return (nmatch || suppressed ? nmatch : EOF); scan_ungetc(ch1, iop); nchars--; return nmatch; } @@ -227,22 +248,23 @@ def: } } static int _innum(int *ptr, int type, int len, int size, FILE *iop, - int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr) + int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr, + const bool allocate_char_buffer) { register char *np; char numbuf[64]; register int c, base; int expseen, scale, negflg, c1, ndigit; long long lcval; int cpos; if (type == 'c' || type == 's' || type == '[') return (_instr(ptr ? (char *)ptr : (char *)NULL, type, len, - iop, scan_getc, scan_ungetc, eofptr)); + iop, scan_getc, scan_ungetc, eofptr, allocate_char_buffer)); lcval = 0; ndigit = 0; scale = INT; if (type == 'a' || type == 'e' || type == 'f' || type == 'g') scale = FLOAT; @@ -253,12 +275,11 @@ _innum(int *ptr, int type, int len, int base = 16; np = numbuf; expseen = 0; negflg = 0; - while (((nchars++, c = scan_getc(iop)) != EOF) - && (_sctab[c & 0xff] & SPC)) + while (((nchars++, c = scan_getc(iop)) != EOF) && (_sctab[c & 0xff] & SPC)) ; if (c == EOF) nchars--; if (c == '-') { negflg++; @@ -273,11 +294,11 @@ _innum(int *ptr, int type, int len, int c = scan_getc(iop); nchars++; } cpos = 0; - for ( ; --len >= 0; *np++ = c, c = scan_getc(iop), nchars++) + for ( ; --len > -1; *np++ = c, c = scan_getc(iop), nchars++) { cpos++; if (c == '0' && cpos == 1 && type == 'i') base = 8; if ((c == 'x' || c == 'X') && (type == 'a' || type == 'i' || type == 'x') @@ -387,25 +408,48 @@ _innum(int *ptr, int type, int len, int return 1; } static int _instr(char *ptr, int type, int len, FILE *iop, - int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr) + int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr, + const bool allocate_char_buffer) { register int ch; + char *arg_ptr, *orig_ptr; + size_t string_length; int ignstp; int matched = 0; + size_t buffer_size = len; *eofptr = 0; - if (type == 'c' && len == 30000) + if (type == 'c' && len == DEFAULT_LENGTH) len = 1; - ignstp = 0; + if (allocate_char_buffer) + { + if (!len) + { + errno = ENOMEM; + return 0; + } + else + { + arg_ptr = ptr; + orig_ptr = ptr = malloc(buffer_size); + if (!ptr) + { + errno = ENOMEM; + return 0; + } + } + } + + ignstp = 0; if (type == 's') ignstp = SPC; - while ((nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp) + while ((string_length = nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp) ; ignstp = SPC; if (type == 'c') ignstp = 0; @@ -415,12 +459,29 @@ _instr(char *ptr, int type, int len, FIL while (ch != EOF && (_sctab[ch & 0xff] & ignstp) == 0) { matched = 1; if (ptr) *ptr++ = ch; - if (--len <= 0) - break; + if (--len < 1) + { + if (allocate_char_buffer && type != 'c') + { + ptrdiff_t offset = ptr - orig_ptr; + char *new_ptr = realloc(orig_ptr, buffer_size += DEFAULT_LENGTH); + if (!new_ptr) + { + free(orig_ptr); + errno = ENOMEM; + return 0; + } + len = DEFAULT_LENGTH; + orig_ptr = new_ptr; + ptr = orig_ptr + offset; + } + else + break; + } ch = scan_getc(iop); nchars++; } if (ch != EOF) @@ -438,12 +499,28 @@ _instr(char *ptr, int type, int len, FIL *eofptr = 1; } if (matched) { + string_length = nchars - string_length; if (ptr && type != 'c') + { *ptr++ = '\0'; + string_length++; + } + if (allocate_char_buffer) + { + *(char **)arg_ptr = realloc(orig_ptr, string_length); + ptr = arg_ptr; + if (!*ptr) + { + free(orig_ptr); + errno = ENOMEM; + return 0; + } + } + return 1; } return 0; } diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/scanf.txh djgpp/src/libc/ansi/stdio/scanf.txh --- djgpp.orig/src/libc/ansi/stdio/scanf.txh 2012-12-10 20:29:14 +0000 +++ djgpp/src/libc/ansi/stdio/scanf.txh 2012-12-16 22:35:24 +0000 @@ -28,10 +28,18 @@ converted according to the conversion sp This allows to describe an input field that is to be skipped. @item A width specifier, which specifies the maximum number of input characters to use in the conversion. +@item A @samp{m} character. This is used with string conversions, and +relieves the caller of the need to allocate a corresponding buffer to hold +the input: instead, @code{scanf} allocates a buffer of sufficient size, and +assigns the address of this buffer to the corresponding pointer argument, +which should be a pointer to a @var{char *} variable (this variable does +not need to be initialized before the call). The caller should subsequently +free this buffer when it is no longer required. This is a @acronym{GNU} extension. + @item An optional conversion qualifier, which may be: @table @code @item hh to specify @code{char}; @@ -394,11 +402,12 @@ The number of items successfully matched or if there is any input failure before the first item is converted and assigned, @code{EOF} is returned. Note that literal characters (including whitespace) in the format string which matched input characters count as ``converted items'', so input failure @emph{after} such characters were read and matched will @strong{not} cause @code{EOF} -to be returned. +to be returned. If the @code{m} modifier character has been used and +the memory allocation failed, then @var{errno} will be set to @code{ENOMEM}. @subheading Portability @port-note ansi The conversion specifiers @samp{F}, @samp{D}, @samp{I}, @samp{O}, and @code{U} are DJGPP extensions; they are provided @@ -411,20 +420,44 @@ is a very popular extension to ANSI (whi @port-note ansi-c99 The @code{hh}, @code{j}, @code{t} and @code{z} length modifiers and the @code{a}, @code{A} and @code{F} conversion specifiers first appeared in the ANSI C99 standard. +@port-note posix The @code{m} modifier character is a @acronym{GNU} +extension and is specified in the upcoming revision of the @acronym{POSIX.1} +standard. + @portability ansi, posix @subheading Example @example -int x, y; +int n, x, y; char buf[100]; +char *buf_ptr, *char_ptr; + scanf("%d %d %s", &x, &y, buf); /* read to end-of-line */ scanf("%d %[^\n]\n", &x, buf); /* read letters only */ scanf("%[a-zA-Z]", buf); + +/* read 5 charcters and allocate the corresponding buffer */ +scanf("%5mc", &char_ptr); +/* read letters only and allocate the corresponding buffer */ +scanf("%ms", &buf_ptr); + +/* read letters only and allocate the corresponding buffer */ +n = scanf("%a[a-z]", &p); +if (n == 1) +@{ + printf("read: %s\n", &buf_ptr); + free(buf_ptr); +@} +else if (errno != 0) + perror("scanf"); +else + fprintf(stderr, "No matching characters\n"): + @end example diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/makefile djgpp/tests/libc/ansi/stdio/makefile --- djgpp.orig/tests/libc/ansi/stdio/makefile 2008-05-01 00:45:54 +0000 +++ djgpp/tests/libc/ansi/stdio/makefile 2012-12-16 22:35:24 +0000 @@ -22,9 +22,10 @@ SRC += sscanf.c SRC += sscanf2.c SRC += sscanf3.c SRC += tmpnam.c SRC += tremove.c SRC += tscanf.c +SRC += tscanf2.c SRC += tsnprtf.c SRC += tsnprtf2.c include $(TOP)/../makefile.inc diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/tscanf2.c djgpp/tests/libc/ansi/stdio/tscanf2.c --- djgpp.orig/tests/libc/ansi/stdio/tscanf2.c 1970-01-01 00:00:00 +0000 +++ djgpp/tests/libc/ansi/stdio/tscanf2.c 2012-12-16 18:42:28 +0000 @@ -0,0 +1,54 @@ +/* Test the GNU C library specific m modifier. */ + +#include +#include + + +int main(void) +{ + const char printf_format[] = "The buffer contains: %d %.5g %s %s\n"; + const char scanf_format1[] = "%*[a-zA-Z: ] %d %lg %12mc %ms"; + const char scanf_format2[] = "%*[a-zA-Z: ] %d %lg %12c %s"; + const char scanf_format3[] = "%*[a-zA-Z: ] %d %lg %m[a-z_] %ms"; + char buffer[128]; + int iv; + double dv; + char cv[sizeof("first_string") - 1], sv[sizeof("second_string")]; + char *cvp, *svp; + char *svp1, *svp2; + + + sprintf(buffer, printf_format, 1, 2.34567, "first_string", "second_string"); + printf("%s\n\n", buffer); + + sscanf(buffer, scanf_format1, &iv, &dv, &cvp, &svp); + printf("Result of scanf using \"%s\":\n" + " arg1: %d\n" + " arg2: %g\n" + " arg3(length = %zd): %s\n" + " arg4(length = %zd): %s\n", scanf_format1, iv, dv, strlen(cvp) + 1, cvp, strlen(svp) + 1, svp); + /* The caller must free the allocated buffers. */ + free(cvp); + free(svp); + + printf("\n"); + sscanf(buffer, scanf_format2, &iv, &dv, cv, sv); + printf("Result of scanf using \"%s\":\n" + " arg1: %d\n" + " arg2: %g\n" + " arg3(length = %zd): %s\n" + " arg4(length = %zd): %s\n", scanf_format2, iv, dv, sizeof("first_string") - 1, cv, sizeof("second_string"), sv); + + printf("\n"); + sscanf(buffer, scanf_format3, &iv, &dv, &svp1, &svp2); + printf("Result of scanf using \"%s\":\n" + " arg1: %d\n" + " arg2: %g\n" + " arg3(length = %zd): %s\n" + " arg4(length = %zd): %s\n", scanf_format3, iv, dv, strlen(svp1) + 1, svp1, strlen(svp2) + 1, svp2); + /* The caller must free the allocated buffers. */ + free(svp1); + free(svp2); + + return 0; +}