delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2012/12/19/12:55:50

X-Authentication-Warning: delorie.com: mail set sender to djgpp-workers-bounces using -f
X-Recipient: djgpp-workers AT delorie DOT com
X-Authenticated: #27081556
X-Provags-ID: V01U2FsdGVkX19b+w54lepb03I5BSOXg6F3Pwzr3cZOlCNv5ocMBm
KezFNz7dkpsNvR
Message-ID: <50D1FFA4.5060008@gmx.de>
Date: Wed, 19 Dec 2012 18:55:48 +0100
From: Juan Manuel Guerrero <juan DOT guerrero AT gmx DOT de>
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20121025 Thunderbird/16.0.2
MIME-Version: 1.0
To: djgpp-workers AT delorie DOT com
Subject: Implementation of %m modifier character for scanf family of functions.
X-Y-GMX-Trusted: 0
Reply-To: djgpp-workers AT delorie DOT com

To ease porting gnu programs I have implemented the m modifier character.
It applies to character/string conversion specifiers and relieves the caller
of the need to allocate a corresponding buffer to hold the input.
The m modifier character is a GNU glibc extension and is specified in the
upcoming revision of the POSIX.1 standard.

Regards,
Juan M. Guerrero



# cvs ci -m"Info about m modifier character added." djgpp/src/docs/kb/wc204.txi
# cvs ci -m"Support for m modifier character added." djgpp/src/libc/ansi/stdio/doscan.c
# cvs ci -m"Info about m modifier character added." djgpp/src/libc/ansi/stdio/scanf.txh
# cvs ci -m"Added new test file tscanf2.c." djgpp/tests/libc/ansi/stdio/makefile
# cvs add djgpp/tests/libc/ansi/stdio/tscanf2.c
# cvs ci -m"Test for m modifier character." djgpp/tests/libc/ansi/stdio/tscanf2.c

2012-12-16  Juan Manuel Guerrero <juan DOT guerrero AT gmx DOT de>

     * djgpp/src/docs/kb/wc204.txi:  Info about m modifier character added.

     * djgpp/src/libc/ansi/stdio/doscan.c:  Support for m modifier character added.

     * djgpp/src/libc/ansi/stdio/scanf.txh:  Info about m modifier character added.

     * djgpp/tests/libc/ansi/stdio/makefile:  Added new test file tscanf2.c.

     * djgpp/tests/libc/ansi/stdio/tscanf2.c:  Test for m modifier character.






diff -aprNU5 djgpp.orig/src/docs/kb/wc204.txi djgpp/src/docs/kb/wc204.txi
--- djgpp.orig/src/docs/kb/wc204.txi    2012-12-10 20:29:16 +0000
+++ djgpp/src/docs/kb/wc204.txi    2012-12-16 22:35:22 +0000
@@ -1254,5 +1254,12 @@ and their contents are discarded.
  @findex _doscan AT r{, and C99 conversion specifiers}
  @findex scanf AT r{, and C99 conversion specifiers}
  The @code{a}, @code{A} and @code{F} conversion specifiers
  are now supported by @code{_doscan} and the @code{scanf}
  family of functions.
+
+@findex _doscan AT r{, and m modifier character}
+@findex scanf AT r{, and m modifier character}
+The @code{m} modifier character is now supported by @code{_doscan}
+and the @code{scanf} family of functions.  This is a @acronym{GNU}
+@code{glibc} extension and it is specified in the upcoming revision
+of the @acronym{POSIX.1} standard.
diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/doscan.c djgpp/src/libc/ansi/stdio/doscan.c
--- djgpp.orig/src/libc/ansi/stdio/doscan.c    2012-12-11 23:35:32 +0000
+++ djgpp/src/libc/ansi/stdio/doscan.c    2012-12-17 23:51:48 +0000
@@ -8,31 +8,37 @@
  #include <stdarg.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <ctype.h>
  #include <locale.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <errno.h>
  #include <libc/file.h>
  #include <libc/local.h>

-#define SPC         01
-#define STP         02
+#define SPC             01
+#define STP             02

-#define CHAR        0
-#define SHORT       1
-#define REGULAR     2
-#define LONG        4
-#define LONGDOUBLE  8
+#define CHAR            0
+#define SHORT           1
+#define REGULAR         2
+#define LONG            4
+#define LONGDOUBLE      8
+
+#define INT             0
+#define FLOAT           1
+
+#define DEFAULT_LENGTH  30000

-#define INT         0
-#define FLOAT       1

  static int _innum(int *ptr, int type, int len, int size, FILE *iop,
                    int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *),
-                  int *eofptr);
+                  int *eofptr, const bool allocate_char_buffer);
  static int _instr(char *ptr, int type, int len, FILE *iop,
                    int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *),
-                  int *eofptr);
+                  int *eofptr, const bool allocate_char_buffer);
  static const char *_getccl(const unsigned char *s);

  static char _sctab[256] = {
    0,0,0,0,0,0,0,0,
    0,SPC,SPC,SPC,SPC,SPC,0,0,
@@ -58,27 +64,32 @@ _doscan_low(FILE *iop, int (*scan_getc)(
  {
    register int ch;
    int nmatch, len, ch1;
    int *ptr, fileended, size;
    int suppressed;
+  bool allocate_char_buffer;
+  int previous_errno = errno;

    decimal_point = localeconv()->decimal_point[0];
    nchars = 0;
    nmatch = 0;
    fileended = 0;
    suppressed = 0;
+  errno = 0;

    for (;;)
    {
      switch (ch = *fmt++)
      {
      case '\0':
        return nmatch;
      case '%':
        if ((ch = *fmt++) == '%')
          goto def;
-      ptr = 0;
+
+      allocate_char_buffer = false;
+      ptr = NULL;
        if (ch != '*')
          ptr = va_arg(argp, int *);
        else
          ch = *fmt++;
        len = 0;
@@ -87,11 +98,11 @@ _doscan_low(FILE *iop, int (*scan_getc)(
        {
          len = len * 10 + ch - '0';
          ch = *fmt++;
        }
        if (len == 0)
-        len = 30000;
+        len = DEFAULT_LENGTH;

        if (ch == 'l')
        {
          size = LONG;
          ch = *fmt++;
@@ -134,10 +145,18 @@ _doscan_low(FILE *iop, int (*scan_getc)(
        {
          /* C99 */
          size = REGULAR;
          ch = *fmt++;
        }
+      else if (ch == 'm')
+      {
+        /* POSIX.1 and GNU glibc extension */
+        allocate_char_buffer = true;
+        ch = *fmt++;
+        if (ch == '[')
+          fmt = _getccl((const unsigned char *)fmt);
+      }
        else if (ch == '[')
          fmt = _getccl((const unsigned char *)fmt);

        if (isupper(ch & 0xff))
        {
@@ -180,21 +199,23 @@ _doscan_low(FILE *iop, int (*scan_getc)(
          else
            *(int *)ptr = nchars;
          break;
        }

-      if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended))
+      if (_innum(ptr, ch, len, size, iop, scan_getc, scan_ungetc, &fileended, allocate_char_buffer))
        {
          if (ptr)
            nmatch++;
          else
            suppressed = 1;
        }
        else
        {
-        if (fileended && !nmatch && !suppressed)
+        if ((fileended && !nmatch && !suppressed) || (allocate_char_buffer && errno == ENOMEM))
            return EOF;
+
+        errno = previous_errno;
          return nmatch;
        }
        break;
      case ' ':
      case '\n':
@@ -215,11 +236,11 @@ _doscan_low(FILE *iop, int (*scan_getc)(
  def:
        ch1 = scan_getc(iop);
        if (ch1 != EOF) nchars++;
        if (ch1 != ch)
        {
-        if (ch1==EOF)
+        if (ch1 == EOF)
            return (nmatch || suppressed ? nmatch : EOF);
          scan_ungetc(ch1, iop);
          nchars--;
          return nmatch;
        }
@@ -227,22 +248,23 @@ def:
    }
  }

  static int
  _innum(int *ptr, int type, int len, int size, FILE *iop,
-       int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr)
+       int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr,
+       const bool allocate_char_buffer)
  {
    register char *np;
    char numbuf[64];
    register int c, base;
    int expseen, scale, negflg, c1, ndigit;
    long long lcval;
    int cpos;

    if (type == 'c' || type == 's' || type == '[')
      return (_instr(ptr ? (char *)ptr : (char *)NULL, type, len,
-                   iop, scan_getc, scan_ungetc, eofptr));
+                   iop, scan_getc, scan_ungetc, eofptr, allocate_char_buffer));
    lcval = 0;
    ndigit = 0;
    scale = INT;
    if (type == 'a' || type == 'e' || type == 'f' || type == 'g')
      scale = FLOAT;
@@ -253,12 +275,11 @@ _innum(int *ptr, int type, int len, int
      base = 16;
    np = numbuf;

    expseen = 0;
    negflg = 0;
-  while (((nchars++, c = scan_getc(iop)) != EOF)
-         && (_sctab[c & 0xff] & SPC))
+  while (((nchars++, c = scan_getc(iop)) != EOF) && (_sctab[c & 0xff] & SPC))
      ;
    if (c == EOF) nchars--;
    if (c == '-')
    {
      negflg++;
@@ -273,11 +294,11 @@ _innum(int *ptr, int type, int len, int
      c = scan_getc(iop);
      nchars++;
    }

    cpos = 0;
-  for ( ; --len >= 0; *np++ = c, c = scan_getc(iop), nchars++)
+  for ( ; --len > -1; *np++ = c, c = scan_getc(iop), nchars++)
    {
      cpos++;
      if (c == '0' && cpos == 1 && type == 'i')
        base = 8;
      if ((c == 'x' || c == 'X') && (type == 'a' || type == 'i' || type == 'x')
@@ -387,25 +408,48 @@ _innum(int *ptr, int type, int len, int
    return 1;
  }

  static int
  _instr(char *ptr, int type, int len, FILE *iop,
-       int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr)
+       int (*scan_getc)(FILE *), int (*scan_ungetc)(int, FILE *), int *eofptr,
+       const bool allocate_char_buffer)
  {
    register int ch;
+  char *arg_ptr, *orig_ptr;
+  size_t string_length;
    int ignstp;
    int matched = 0;
+  size_t buffer_size = len;

    *eofptr = 0;
-  if (type == 'c' && len == 30000)
+  if (type == 'c' && len == DEFAULT_LENGTH)
      len = 1;
-  ignstp = 0;

+  if (allocate_char_buffer)
+  {
+    if (!len)
+    {
+      errno = ENOMEM;
+      return 0;
+    }
+    else
+    {
+      arg_ptr = ptr;
+      orig_ptr = ptr = malloc(buffer_size);
+      if (!ptr)
+      {
+        errno = ENOMEM;
+        return 0;
+      }
+    }
+  }
+
+  ignstp = 0;
    if (type == 's')
      ignstp = SPC;

-  while ((nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp)
+  while ((string_length = nchars++, ch = scan_getc(iop)) != EOF && _sctab[ch & 0xff] & ignstp)
      ;

    ignstp = SPC;
    if (type == 'c')
      ignstp = 0;
@@ -415,12 +459,29 @@ _instr(char *ptr, int type, int len, FIL
    while (ch != EOF && (_sctab[ch & 0xff] & ignstp) == 0)
    {
      matched = 1;
      if (ptr)
        *ptr++ = ch;
-    if (--len <= 0)
-      break;
+    if (--len < 1)
+    {
+      if (allocate_char_buffer && type != 'c')
+      {
+        ptrdiff_t offset = ptr - orig_ptr;
+        char *new_ptr = realloc(orig_ptr, buffer_size += DEFAULT_LENGTH);
+        if (!new_ptr)
+        {
+          free(orig_ptr);
+          errno = ENOMEM;
+          return 0;
+        }
+        len = DEFAULT_LENGTH;
+        orig_ptr = new_ptr;
+        ptr = orig_ptr + offset;
+      }
+      else
+        break;
+    }
      ch = scan_getc(iop);
      nchars++;
    }

    if (ch != EOF)
@@ -438,12 +499,28 @@ _instr(char *ptr, int type, int len, FIL
      *eofptr = 1;
    }

    if (matched)
    {
+    string_length = nchars - string_length;
      if (ptr && type != 'c')
+    {
        *ptr++ = '\0';
+      string_length++;
+    }
+    if (allocate_char_buffer)
+    {
+      *(char **)arg_ptr = realloc(orig_ptr, string_length);
+      ptr = arg_ptr;
+      if (!*ptr)
+      {
+        free(orig_ptr);
+        errno = ENOMEM;
+        return 0;
+      }
+    }
+
      return 1;
    }

    return 0;
  }
diff -aprNU5 djgpp.orig/src/libc/ansi/stdio/scanf.txh djgpp/src/libc/ansi/stdio/scanf.txh
--- djgpp.orig/src/libc/ansi/stdio/scanf.txh    2012-12-10 20:29:14 +0000
+++ djgpp/src/libc/ansi/stdio/scanf.txh    2012-12-16 22:35:24 +0000
@@ -28,10 +28,18 @@ converted according to the conversion sp
  This allows to describe an input field that is to be skipped.

  @item A width specifier, which specifies the maximum number of input
  characters to use in the conversion.

+@item A @samp{m} character.  This is used with string conversions, and
+relieves the caller of the need to allocate a corresponding buffer to hold
+the input: instead, @code{scanf} allocates a buffer of sufficient size, and
+assigns the address of this buffer to the corresponding  pointer argument,
+which should be a pointer to a @var{char *} variable (this variable does
+not need to  be initialized before the call).  The caller should subsequently
+free this buffer when it is no longer required.  This is a @acronym{GNU} extension.
+
  @item An optional conversion qualifier, which may be:

  @table @code
  @item hh
  to specify @code{char};
@@ -394,11 +402,12 @@ The number of items successfully matched
  or if there is any input failure before the first item is converted and
  assigned, @code{EOF} is returned.  Note that literal characters
  (including whitespace) in the format string which matched input
  characters count as ``converted items'', so input failure @emph{after}
  such characters were read and matched will @strong{not} cause @code{EOF}
-to be returned.
+to be returned.  If the @code{m} modifier character has been used and
+the memory allocation failed, then @var{errno} will be set to @code{ENOMEM}.

  @subheading Portability

  @port-note ansi The conversion specifiers @samp{F}, @samp{D}, @samp{I},
  @samp{O}, and @code{U} are DJGPP extensions; they are provided
@@ -411,20 +420,44 @@ is a very popular extension to ANSI (whi
  @port-note ansi-c99 The @code{hh}, @code{j}, @code{t}
  and @code{z} length modifiers and the @code{a}, @code{A}
  and @code{F} conversion specifiers first appeared
  in the ANSI C99 standard.

+@port-note posix The @code{m} modifier character is a @acronym{GNU}
+extension and is specified in the upcoming revision of the @acronym{POSIX.1}
+standard.
+
  @portability ansi, posix

  @subheading Example

  @example
-int x, y;
+int n, x, y;
  char buf[100];
+char *buf_ptr, *char_ptr;
+
  scanf("%d %d %s", &x, &y, buf);

  /* read to end-of-line */
  scanf("%d %[^\n]\n", &x, buf);
  /* read letters only */
  scanf("%[a-zA-Z]", buf);
+
+/* read 5 charcters and allocate the corresponding buffer */
+scanf("%5mc", &char_ptr);
+/* read letters only and allocate the corresponding buffer */
+scanf("%ms", &buf_ptr);
+
+/* read letters only and allocate the corresponding buffer */
+n = scanf("%a[a-z]", &p);
+if (n == 1)
+@{
+  printf("read: %s\n", &buf_ptr);
+  free(buf_ptr);
+@}
+else if (errno != 0)
+  perror("scanf");
+else
+  fprintf(stderr, "No matching characters\n"):
+
  @end example

diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/makefile djgpp/tests/libc/ansi/stdio/makefile
--- djgpp.orig/tests/libc/ansi/stdio/makefile    2008-05-01 00:45:54 +0000
+++ djgpp/tests/libc/ansi/stdio/makefile    2012-12-16 22:35:24 +0000
@@ -22,9 +22,10 @@ SRC += sscanf.c
  SRC += sscanf2.c
  SRC += sscanf3.c
  SRC += tmpnam.c
  SRC += tremove.c
  SRC += tscanf.c
+SRC += tscanf2.c
  SRC += tsnprtf.c
  SRC += tsnprtf2.c

  include $(TOP)/../makefile.inc
diff -aprNU5 djgpp.orig/tests/libc/ansi/stdio/tscanf2.c djgpp/tests/libc/ansi/stdio/tscanf2.c
--- djgpp.orig/tests/libc/ansi/stdio/tscanf2.c    1970-01-01 00:00:00 +0000
+++ djgpp/tests/libc/ansi/stdio/tscanf2.c    2012-12-16 18:42:28 +0000
@@ -0,0 +1,54 @@
+/* Test the GNU C library specific m modifier.  */
+
+#include <stdio.h>
+#include <string.h>
+
+
+int main(void)
+{
+  const char printf_format[] = "The buffer contains: %d   %.5g %s   %s\n";
+  const char scanf_format1[] = "%*[a-zA-Z: ] %d   %lg   %12mc %ms";
+  const char scanf_format2[] = "%*[a-zA-Z: ] %d   %lg   %12c   %s";
+  const char scanf_format3[] = "%*[a-zA-Z: ] %d   %lg   %m[a-z_] %ms";
+  char buffer[128];
+  int iv;
+  double dv;
+  char cv[sizeof("first_string") - 1], sv[sizeof("second_string")];
+  char *cvp, *svp;
+  char *svp1, *svp2;
+
+
+  sprintf(buffer, printf_format, 1, 2.34567, "first_string", "second_string");
+  printf("%s\n\n", buffer);
+
+  sscanf(buffer, scanf_format1, &iv, &dv, &cvp, &svp);
+  printf("Result of scanf using \"%s\":\n"
+         "  arg1:  %d\n"
+         "  arg2:  %g\n"
+         "  arg3(length = %zd):  %s\n"
+         "  arg4(length = %zd):  %s\n", scanf_format1, iv, dv, strlen(cvp) + 1, cvp, strlen(svp) + 1, svp);
+  /*  The caller must free the allocated buffers.  */
+  free(cvp);
+  free(svp);
+
+  printf("\n");
+  sscanf(buffer, scanf_format2, &iv, &dv, cv, sv);
+  printf("Result of scanf using \"%s\":\n"
+         "  arg1:  %d\n"
+         "  arg2:  %g\n"
+         "  arg3(length = %zd):  %s\n"
+         "  arg4(length = %zd):  %s\n", scanf_format2, iv, dv, sizeof("first_string") - 1, cv, sizeof("second_string"), sv);
+
+  printf("\n");
+  sscanf(buffer, scanf_format3, &iv, &dv, &svp1, &svp2);
+  printf("Result of scanf using \"%s\":\n"
+         "  arg1:  %d\n"
+         "  arg2:  %g\n"
+         "  arg3(length = %zd):  %s\n"
+         "  arg4(length = %zd):  %s\n", scanf_format3, iv, dv, strlen(svp1) + 1, svp1, strlen(svp2) + 1, svp2);
+  /*  The caller must free the allocated buffers.  */
+  free(svp1);
+  free(svp2);
+
+  return 0;
+}

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019