Mail Archives: djgpp-workers/2012/12/19/12:55:50
X-Authentication-Warning: | delorie.com: mail set sender to djgpp-workers-bounces using -f
|
X-Recipient: | djgpp-workers AT delorie DOT com
|
X-Authenticated: | #27081556
|
X-Provags-ID: | V01U2FsdGVkX19b+w54lepb03I5BSOXg6F3Pwzr3cZOlCNv5ocMBm
|
| KezFNz7dkpsNvR
|
Message-ID: | <50D1FFA4.5060008@gmx.de>
|
Date: | Wed, 19 Dec 2012 18:55:48 +0100
|
From: | Juan Manuel Guerrero <juan DOT guerrero AT gmx DOT de>
|
User-Agent: | Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20121025 Thunderbird/16.0.2
|
MIME-Version: | 1.0
|
To: | djgpp-workers AT delorie DOT com
|
Subject: | Implementation of %m modifier character for scanf family of functions.
|
X-Y-GMX-Trusted: | 0
|
Reply-To: | djgpp-workers AT delorie DOT com
|
To ease porting gnu programs I have implemented the m modifier character.
It applies to character/string conversion specifiers and relieves the caller
of the need to allocate a corresponding buffer to hold the input.
The m modifier character is a GNU glibc extension and is specified in the
upcoming revision of the POSIX.1 standard.
Regards,
Juan M. Guerrero
# cvs ci -m"Info about m modifier character added." djgpp/src/docs/kb/wc204.txi
# cvs ci -m"Support for m modifier character added." djgpp/src/libc/ansi/stdio/doscan.c
# cvs ci -m"Info about m modifier character added." djgpp/src/libc/ansi/stdio/scanf.txh
# cvs ci -m"Added new test file tscanf2.c." djgpp/tests/libc/ansi/stdio/makefile
# cvs add djgpp/tests/libc/ansi/stdio/tscanf2.c
# cvs ci -m"Test for m modifier character." djgpp/tests/libc/ansi/stdio/tscanf2.c
2012-12-16 Juan Manuel Guerrero <juan DOT guerrero AT gmx DOT de>
* djgpp/src/docs/kb/wc204.txi: Info about m modifier character added.
* djgpp/src/libc/ansi/stdio/doscan.c: Support for m modifier character added.
* djgpp/src/libc/ansi/stdio/scanf.txh: Info about m modifier character added.
* djgpp/tests/libc/ansi/stdio/makefile: Added new test file tscanf2.c.
* djgpp/tests/libc/ansi/stdio/tscanf2.c: Test for m modifier character.
diff -aprNU5 djgpp.orig/src/docs/kb/wc204.txi djgpp/src/docs/kb/wc204.txi
--- djgpp.orig/src/docs/kb/wc204.txi 2012-12-10 20:29:16 +0000
+++ djgpp/src/docs/kb/wc204.txi 2012-12-16 22:35:22 +0000
@@ -1254,5 +1254,12 @@ and their contents are discarded.
@findex _doscan AT r{, and C99 conversion specifiers}
@findex scanf AT r{, and C99 conversion specifiers}
The @code{a}, @code{A} and @code{F} conversion specifiers
are now supported by @code{_doscan} and the @code{scanf}
family of functions.
+
+@findex _doscan AT r{, and m modifier character}
+@findex scanf AT r{, and m modifier character}
+The @code{m} modifier character is now supported by @code{_doscan}
+and the @code{scanf} family of functions. This is a @acronym{GNU}
+@code{glibc} extension and it is specified in the upcoming revision
+of the @acronym{POSIX.1} standard.
diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/doscan.c djgpp/src/libc/ansi/stdio/doscan.c
--- djgpp.orig/src/libc/ansi/stdio/doscan.c 2012-12-11 23:35:32 +0000
+++ djgpp/src/libc/ansi/stdio/doscan.c 2012-12-17 23:51:48 +0000
@@ -8,31 +8,37 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <locale.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <errno.h>
#include <libc/file.h>
#include <libc/local.h>
-#define SPC 01
-#define STP 02
+#define SPC 01
+#define STP 02
-#define CHAR 0
-#define SHORT 1
-#define REGULAR 2
-#define LONG 4
-#define LONGDOUBLE 8
+#define CHAR 0
+#define SHORT 1
+#define REGULAR 2
+#define LONG 4
+#define LONGDOUBLE 8
+
+#define INT 0
+#define FLOAT 1
+
+#define DEFAULT_LENGTH 30000
-#define INT 0
-#define FLOAT 1
static int _innum(int *ptr, int type, int len, int size, FILE *iop,
int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *),
- int *eofptr);
+ int *eofptr, const bool allocate_char_buffer);
static int _instr(char *ptr, int type, int len, FILE *iop,
int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *),
- int *eofptr);
+ int *eofptr, const bool allocate_char_buffer);
static const char *_getccl(const unsigned char *s);
static char _sctab[256] = {
0,0,0,0,0,0,0,0,
0,SPC,SPC,SPC,SPC,SPC,0,0,
@@ -58,27 +64,32 @@ _doscan_low(FILE *iop, int (*scan_getc)(
{
register int ch;
int nmatch, len, ch1;
int *ptr, fileended, size;
int suppressed;
+ bool allocate_char_buffer;
+ int previous_errno = errno;
decimal_point = localeconv()->decimal_point[0];
nchars = 0;
nmatch = 0;
fileended = 0;
suppressed = 0;
+ errno = 0;
for (;;)
{
switch (ch = *fmt++)
{
case '\0':
return nmatch;
case '%':
if ((ch = *fmt++) == '%')
goto def;
- ptr = 0;
+
+ allocate_char_buffer = false;
+ ptr = NULL;
if (ch != '*')
ptr = va_arg(argp, int *);
else
ch = *fmt++;
len = 0;
@@ -87,11 +98,11 @@ _doscan_low(FILE *iop, int (*scan_getc)(
{
len = len * 10 + ch - '0';
ch = *fmt++;
}
if (len == 0)
- len = 30000;
+ len = DEFAULT_LENGTH;
if (ch == 'l')
{
size = LONG;
ch = *fmt++;
@@ -134,10 +145,18 @@ _doscan_low(FILE *iop, int (*scan_getc)(
{
/* C99 */
size = REGULAR;
ch = *fmt++;
}
+ else if (ch == 'm')
+ {
+ /* POSIX.1 and GNU glibc extension */
+ allocate_char_buffer = true;
+ ch = *fmt++;
+ if (ch == '[')
+ fmt = _getccl((const unsigned char *)fmt);
+ }
else if (ch == '[')
fmt = _getccl((const unsigned char *)fmt);
if (isupper(ch & 0xff))
{
@@ -180,21 +199,23 @@ _doscan_low(FILE *iop, int (*scan_getc)(
else
*(int *)ptr = nchars;
break;
}
- if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended))
+ if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended, allocate_char_buffer))
{
if (ptr)
nmatch++;
else
suppressed = 1;
}
else
{
- if (fileended && !nmatch && !suppressed)
+ if ((fileended && !nmatch && !suppressed) || (allocate_char_buffer && errno == ENOMEM))
return EOF;
+
+ errno = previous_errno;
return nmatch;
}
break;
case ' ':
case '\n':
@@ -215,11 +236,11 @@ _doscan_low(FILE *iop, int (*scan_getc)(
def:
ch1 = scan_getc(iop);
if (ch1 != EOF) nchars++;
if (ch1 != ch)
{
- if (ch1==EOF)
+ if (ch1 == EOF)
return (nmatch || suppressed ? nmatch : EOF);
scan_ungetc(ch1, iop);
nchars--;
return nmatch;
}
@@ -227,22 +248,23 @@ def:
}
}
static int
_innum(int *ptr, int type, int len, int size, FILE *iop,
- int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr)
+ int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr,
+ const bool allocate_char_buffer)
{
register char *np;
char numbuf[64];
register int c, base;
int expseen, scale, negflg, c1, ndigit;
long long lcval;
int cpos;
if (type == 'c' || type == 's' || type == '[')
return (_instr(ptr ? (char *)ptr : (char *)NULL, type, len,
- iop, scan_getc, scan_ungetc, eofptr));
+ iop, scan_getc, scan_ungetc, eofptr, allocate_char_buffer));
lcval = 0;
ndigit = 0;
scale = INT;
if (type == 'a' || type == 'e' || type == 'f' || type == 'g')
scale = FLOAT;
@@ -253,12 +275,11 @@ _innum(int *ptr, int type, int len, int
base = 16;
np = numbuf;
expseen = 0;
negflg = 0;
- while (((nchars++, c = scan_getc(iop)) != EOF)
- && (_sctab[c & 0xff] & SPC))
+ while (((nchars++, c = scan_getc(iop)) != EOF) && (_sctab[c & 0xff] & SPC))
;
if (c == EOF) nchars--;
if (c == '-')
{
negflg++;
@@ -273,11 +294,11 @@ _innum(int *ptr, int type, int len, int
c = scan_getc(iop);
nchars++;
}
cpos = 0;
- for ( ; --len >= 0; *np++ = c, c = scan_getc(iop), nchars++)
+ for ( ; --len > -1; *np++ = c, c = scan_getc(iop), nchars++)
{
cpos++;
if (c == '0' && cpos == 1 && type == 'i')
base = 8;
if ((c == 'x' || c == 'X') && (type == 'a' || type == 'i' || type == 'x')
@@ -387,25 +408,48 @@ _innum(int *ptr, int type, int len, int
return 1;
}
static int
_instr(char *ptr, int type, int len, FILE *iop,
- int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr)
+ int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr,
+ const bool allocate_char_buffer)
{
register int ch;
+ char *arg_ptr, *orig_ptr;
+ size_t string_length;
int ignstp;
int matched = 0;
+ size_t buffer_size = len;
*eofptr = 0;
- if (type == 'c' && len == 30000)
+ if (type == 'c' && len == DEFAULT_LENGTH)
len = 1;
- ignstp = 0;
+ if (allocate_char_buffer)
+ {
+ if (!len)
+ {
+ errno = ENOMEM;
+ return 0;
+ }
+ else
+ {
+ arg_ptr = ptr;
+ orig_ptr = ptr = malloc(buffer_size);
+ if (!ptr)
+ {
+ errno = ENOMEM;
+ return 0;
+ }
+ }
+ }
+
+ ignstp = 0;
if (type == 's')
ignstp = SPC;
- while ((nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp)
+ while ((string_length = nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp)
;
ignstp = SPC;
if (type == 'c')
ignstp = 0;
@@ -415,12 +459,29 @@ _instr(char *ptr, int type, int len, FIL
while (ch != EOF && (_sctab[ch & 0xff] & ignstp) == 0)
{
matched = 1;
if (ptr)
*ptr++ = ch;
- if (--len <= 0)
- break;
+ if (--len < 1)
+ {
+ if (allocate_char_buffer && type != 'c')
+ {
+ ptrdiff_t offset = ptr - orig_ptr;
+ char *new_ptr = realloc(orig_ptr, buffer_size += DEFAULT_LENGTH);
+ if (!new_ptr)
+ {
+ free(orig_ptr);
+ errno = ENOMEM;
+ return 0;
+ }
+ len = DEFAULT_LENGTH;
+ orig_ptr = new_ptr;
+ ptr = orig_ptr + offset;
+ }
+ else
+ break;
+ }
ch = scan_getc(iop);
nchars++;
}
if (ch != EOF)
@@ -438,12 +499,28 @@ _instr(char *ptr, int type, int len, FIL
*eofptr = 1;
}
if (matched)
{
+ string_length = nchars - string_length;
if (ptr && type != 'c')
+ {
*ptr++ = '\0';
+ string_length++;
+ }
+ if (allocate_char_buffer)
+ {
+ *(char **)arg_ptr = realloc(orig_ptr, string_length);
+ ptr = arg_ptr;
+ if (!*ptr)
+ {
+ free(orig_ptr);
+ errno = ENOMEM;
+ return 0;
+ }
+ }
+
return 1;
}
return 0;
}
diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/scanf.txh djgpp/src/libc/ansi/stdio/scanf.txh
--- djgpp.orig/src/libc/ansi/stdio/scanf.txh 2012-12-10 20:29:14 +0000
+++ djgpp/src/libc/ansi/stdio/scanf.txh 2012-12-16 22:35:24 +0000
@@ -28,10 +28,18 @@ converted according to the conversion sp
This allows to describe an input field that is to be skipped.
@item A width specifier, which specifies the maximum number of input
characters to use in the conversion.
+@item A @samp{m} character. This is used with string conversions, and
+relieves the caller of the need to allocate a corresponding buffer to hold
+the input: instead, @code{scanf} allocates a buffer of sufficient size, and
+assigns the address of this buffer to the corresponding pointer argument,
+which should be a pointer to a @var{char *} variable (this variable does
+not need to be initialized before the call). The caller should subsequently
+free this buffer when it is no longer required. This is a @acronym{GNU} extension.
+
@item An optional conversion qualifier, which may be:
@table @code
@item hh
to specify @code{char};
@@ -394,11 +402,12 @@ The number of items successfully matched
or if there is any input failure before the first item is converted and
assigned, @code{EOF} is returned. Note that literal characters
(including whitespace) in the format string which matched input
characters count as ``converted items'', so input failure @emph{after}
such characters were read and matched will @strong{not} cause @code{EOF}
-to be returned.
+to be returned. If the @code{m} modifier character has been used and
+the memory allocation failed, then @var{errno} will be set to @code{ENOMEM}.
@subheading Portability
@port-note ansi The conversion specifiers @samp{F}, @samp{D}, @samp{I},
@samp{O}, and @code{U} are DJGPP extensions; they are provided
@@ -411,20 +420,44 @@ is a very popular extension to ANSI (whi
@port-note ansi-c99 The @code{hh}, @code{j}, @code{t}
and @code{z} length modifiers and the @code{a}, @code{A}
and @code{F} conversion specifiers first appeared
in the ANSI C99 standard.
+@port-note posix The @code{m} modifier character is a @acronym{GNU}
+extension and is specified in the upcoming revision of the @acronym{POSIX.1}
+standard.
+
@portability ansi, posix
@subheading Example
@example
-int x, y;
+int n, x, y;
char buf[100];
+char *buf_ptr, *char_ptr;
+
scanf("%d %d %s", &x, &y, buf);
/* read to end-of-line */
scanf("%d %[^\n]\n", &x, buf);
/* read letters only */
scanf("%[a-zA-Z]", buf);
+
+/* read 5 charcters and allocate the corresponding buffer */
+scanf("%5mc", &char_ptr);
+/* read letters only and allocate the corresponding buffer */
+scanf("%ms", &buf_ptr);
+
+/* read letters only and allocate the corresponding buffer */
+n = scanf("%a[a-z]", &p);
+if (n == 1)
+@{
+ printf("read: %s\n", &buf_ptr);
+ free(buf_ptr);
+@}
+else if (errno != 0)
+ perror("scanf");
+else
+ fprintf(stderr, "No matching characters\n"):
+
@end example
diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/makefile djgpp/tests/libc/ansi/stdio/makefile
--- djgpp.orig/tests/libc/ansi/stdio/makefile 2008-05-01 00:45:54 +0000
+++ djgpp/tests/libc/ansi/stdio/makefile 2012-12-16 22:35:24 +0000
@@ -22,9 +22,10 @@ SRC += sscanf.c
SRC += sscanf2.c
SRC += sscanf3.c
SRC += tmpnam.c
SRC += tremove.c
SRC += tscanf.c
+SRC += tscanf2.c
SRC += tsnprtf.c
SRC += tsnprtf2.c
include $(TOP)/../makefile.inc
diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/tscanf2.c djgpp/tests/libc/ansi/stdio/tscanf2.c
--- djgpp.orig/tests/libc/ansi/stdio/tscanf2.c 1970-01-01 00:00:00 +0000
+++ djgpp/tests/libc/ansi/stdio/tscanf2.c 2012-12-16 18:42:28 +0000
@@ -0,0 +1,54 @@
+/* Test the GNU C library specific m modifier. */
+
+#include <stdio.h>
+#include <string.h>
+
+
+int main(void)
+{
+ const char printf_format[] = "The buffer contains: %d %.5g %s %s\n";
+ const char scanf_format1[] = "%*[a-zA-Z: ] %d %lg %12mc %ms";
+ const char scanf_format2[] = "%*[a-zA-Z: ] %d %lg %12c %s";
+ const char scanf_format3[] = "%*[a-zA-Z: ] %d %lg %m[a-z_] %ms";
+ char buffer[128];
+ int iv;
+ double dv;
+ char cv[sizeof("first_string") - 1], sv[sizeof("second_string")];
+ char *cvp, *svp;
+ char *svp1, *svp2;
+
+
+ sprintf(buffer, printf_format, 1, 2.34567, "first_string", "second_string");
+ printf("%s\n\n", buffer);
+
+ sscanf(buffer, scanf_format1, &iv, &dv, &cvp, &svp);
+ printf("Result of scanf using \"%s\":\n"
+ " arg1: %d\n"
+ " arg2: %g\n"
+ " arg3(length = %zd): %s\n"
+ " arg4(length = %zd): %s\n", scanf_format1, iv, dv, strlen(cvp) + 1, cvp, strlen(svp) + 1, svp);
+ /* The caller must free the allocated buffers. */
+ free(cvp);
+ free(svp);
+
+ printf("\n");
+ sscanf(buffer, scanf_format2, &iv, &dv, cv, sv);
+ printf("Result of scanf using \"%s\":\n"
+ " arg1: %d\n"
+ " arg2: %g\n"
+ " arg3(length = %zd): %s\n"
+ " arg4(length = %zd): %s\n", scanf_format2, iv, dv, sizeof("first_string") - 1, cv, sizeof("second_string"), sv);
+
+ printf("\n");
+ sscanf(buffer, scanf_format3, &iv, &dv, &svp1, &svp2);
+ printf("Result of scanf using \"%s\":\n"
+ " arg1: %d\n"
+ " arg2: %g\n"
+ " arg3(length = %zd): %s\n"
+ " arg4(length = %zd): %s\n", scanf_format3, iv, dv, strlen(svp1) + 1, svp1, strlen(svp2) + 1, svp2);
+ /* The caller must free the allocated buffers. */
+ free(svp1);
+ free(svp2);
+
+ return 0;
+}
- Raw text -