From: Message-Id: <200304201804.h3KI4eIA013584@speedy.ludd.luth.se> Subject: Yet another try on nan in strto{f,d,ld} To: DJGPP-WORKERS Date: Sun, 20 Apr 2003 20:04:40 +0200 (CEST) X-Mailer: ELM [version 2.4ME+ PL78 (25)] MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=US-ASCII X-MailScanner: Found to be clean Reply-To: djgpp-workers AT delorie DOT com Hello. Ok. This is what I have. All three functions are now implemented. I hope I haven't forgotten to incorporate any comments. 1. Note that the final paragraph in Return value for strtold is missing, in case you notice. It's because that isn't implemented. That's how it was. 2. You might want to look at the end of the changes of strtold.txh: "next most significant bit"? 3. To do the right thing for strtold(), I need to know if the integer bit may be clear for a NaN representation. Otherwise I need to set it. 4. I haven't changed _strtold()'s documentation. Should we really have the exact same documentation twice (except the portablity notes)? In the same file too?! There must be a better way to do this. 5. Still no tests for strtold(). Right, MartinS Index: djgpp/src/docs/kb/wc204.txi =================================================================== RCS file: /cvs/djgpp/djgpp/src/docs/kb/wc204.txi,v retrieving revision 1.151 diff -p -u -r1.151 wc204.txi --- djgpp/src/docs/kb/wc204.txi 26 Mar 2003 19:54:46 -0000 1.151 +++ djgpp/src/docs/kb/wc204.txi 20 Apr 2003 17:47:40 -0000 @@ -944,3 +944,12 @@ to the POSIX functions @code{open}, @cod @code{fsync} and @code{fdopen} and the ANSI functions @code{fopen}, @code{freopen}, @code{fclose}, @code{ftell}, @code{fseek} and @code{rewind}, to make them aware of file descriptors for directories. + +@findex strtod AT r{, @code{Inf} and @code{NaN} in input} +@findex strtof AT r{, @code{Inf} and @code{NaN} in input} +@findex strtold AT r{, @code{Inf} and @code{NaN} in input} +@findex _strtold AT r{, @code{Inf} and @code{NaN} in input} +The functions @code{strtod}, @code{strtof}, @code{strtold} and +@code{_strtold} now understand ``Inf'', ``Infinity'', ``NaN'', +``NaN()'', ``NaN(@var{hex-number})'' and any variations of case in the +input string. Index: djgpp/src/libc/ansi/stdlib/strtod.c =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/ansi/stdlib/strtod.c,v retrieving revision 1.5 diff -p -u -r1.5 strtod.c --- djgpp/src/libc/ansi/stdlib/strtod.c 17 Oct 2002 23:00:24 -0000 1.5 +++ djgpp/src/libc/ansi/stdlib/strtod.c 20 Apr 2003 17:47:40 -0000 @@ -1,14 +1,18 @@ +/* Copyright (C) 2003 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 2001 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1997 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1996 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1994 DJ Delorie, see COPYING.DJ for details */ +#include #include #include #include #include #include +#include #include +#include double strtod(const char *s, char **sret) @@ -32,6 +36,7 @@ strtod(const char *s, char **sret) while (isspace((unsigned char) *s)) s++; + /* Handle leading sign. */ if (*s == '+') s++; else if (*s == '-') @@ -40,6 +45,75 @@ strtod(const char *s, char **sret) s++; } + /* Handle INF and INFINITY. */ + if ( ! strnicmp( "INF", s, 3 ) ) + { + if( sret ) + { + if ( ! strnicmp( "INITY", &s[3], 5 ) ) + { + *sret = unconst((&s[8]), char *); + } + else + { + *sret = unconst((&s[3]), char *); + } + } + + if( 0 <= sign ) + { + return INFINITY; + } + else + { + return -INFINITY; + } + } + + /* Handle NAN and NAN(). */ + if ( ! strnicmp( "NAN", s, 3 ) ) + { + double tmp_d = NAN; + double_t n = *(double_t *)(&tmp_d); + + if ( sign < 0 ) + { + n.sign = 1; + } + + if( s[3] == '(' ) + { + unsigned long long mantissa_bits = 0; + char *endptr = unconst((&s[4]), char *); + + mantissa_bits = strtoull(&s[4], &endptr, 16); + if ( *endptr == ')' ) + { + mantissa_bits = mantissa_bits & 0xfffffffffffff; + if( mantissa_bits ) + { + n.mantissal = mantissa_bits & 0xffffffff; + n.mantissah = (mantissa_bits >> 32) & 0xfffff; + } + if( sret ) + { + *sret = endptr+1; + } + return *(double *)(&n); + } + + /* The subject sequence didn't match NAN(), so match + only NAN. */ + } + + if( sret ) + { + *sret = unconst((&s[3]), char *); + } + return *(double *)(&n); + } + + /* Handle ordinary numbers. */ while ((*s >= '0') && (*s <= '9')) { flags |= 1; Index: djgpp/src/libc/ansi/stdlib/strtod.txh =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/ansi/stdlib/strtod.txh,v retrieving revision 1.3 diff -p -u -r1.3 strtod.txh --- djgpp/src/libc/ansi/stdlib/strtod.txh 29 Jan 2003 12:30:21 -0000 1.3 +++ djgpp/src/libc/ansi/stdlib/strtod.txh 20 Apr 2003 17:47:40 -0000 @@ -11,13 +11,38 @@ double strtod(const char *s, char **endp @subheading Description This function converts as many characters of @var{s} as look like a -floating point number into that number. If @var{endp} is not a null -pointer, @code{*endp} is set to point to the first unconverted +floating point number into that number. It also recognises +(case-insensitively) ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@var{optional hex-number})''. If @var{endp} is not a null +pointer, @code{*@var{endp}} is set to point to the first unconverted character. @subheading Return Value -The value the represented by @var{s}. +The value represented by @var{s}. + +If @var{s} is ``Inf'' or ``Infinity'', with any variations of +case and optionally prefixed with ``+'' or ``-'', the return value is +@code{INFINITY} (if no prefix or a ``+'' prefix) or @code{-INFINITY} +(if the prefix is ``-''). + +If @var{s} is ``NaN'' or ``NaN()'', with any variations of case +and optionally prefixed with ``+'' or ``-'', the return value is +@code{(double)NAN}. If the prefix is ``-'' the sign bit in the NaN +will be set to 1. + +If @var{s} is ``NaN(@var{hex-number})'', with any variations of +case and optionally prefixed with ``+'' or ``-'', the return value is +a NaN with the mantissa bits set to +@code{@var{hex-number}&0xfffffffffffff} (the mantissa for doubles +consists of 52 bits). Use at most 16 hexadecimal digits in +@var{hex-number} or the internal conversion will overflow, which +results in a mantissa of 0xfffffffffffff. If +@code{@var{hex-number}&0xfffffffffffff} is 0 (which won't work as a +representation of a NaN) @code{(double)NAN} will be returned. If the +prefix is ``-'' the sign bit in the NaN will be set to 1. Testing +shows that SNaNs might be converted into QNaNs (most significant bit +will be set in the mantissa). If a number represented by @var{s} doesn't fit into the range of values representable by the type @code{double}, the function returns either @@ -25,6 +50,9 @@ representable by the type @code{double}, @code{+HUGE_VAL}, and sets @code{errno} to @code{ERANGE}. @subheading Portability + +@port-note ansi-c99 Support for ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@dots{})'' was standardised in ANSI C99. @portability ansi, posix Index: djgpp/src/libc/ansi/stdlib/strtold.c =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/ansi/stdlib/strtold.c,v retrieving revision 1.4 diff -p -u -r1.4 strtold.c --- djgpp/src/libc/ansi/stdlib/strtold.c 6 Dec 2002 09:32:23 -0000 1.4 +++ djgpp/src/libc/ansi/stdlib/strtold.c 20 Apr 2003 17:47:40 -0000 @@ -1,10 +1,14 @@ +/* Copyright (C) 2003 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 2002 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1999 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1994 DJ Delorie, see COPYING.DJ for details */ #include #include #include +#include +#include #include +#include static long double powten[] = { @@ -38,6 +42,74 @@ strtold(const char *s, char **sret) s++; } + /* Handle INF and INFINITY. */ + if ( ! strnicmp( "INF", s, 3 ) ) + { + if( sret ) + { + if ( ! strnicmp( "INITY", &s[3], 5 ) ) + { + *sret = unconst((&s[8]), char *); + } + else + { + *sret = unconst((&s[3]), char *); + } + } + + if( 0 <= sign ) + { + return INFINITY; + } + else + { + return -INFINITY; + } + } + + /* Handle NAN and NAN(). */ + if ( ! strnicmp( "NAN", s, 3 ) ) + { + long double ld = NAN; + long_double_t n = *(long_double_t *)(&ld); + + if ( sign < 0 ) + { + n.sign = 1; + } + + if( s[3] == '(' ) + { + unsigned long long mantissa_bits = 0; + char *endptr = unconst((&s[4]), char *); + + mantissa_bits = strtoull(&s[4], &endptr, 16); + if ( *endptr == ')' ) + { + if( mantissa_bits ) + { + n.mantissal = mantissa_bits & 0xffffffff; + n.mantissah = (mantissa_bits >> 32) & 0xffffffff; + } + if( sret ) + { + *sret = endptr+1; + } + return *(long double *)(&n); + } + + /* The subject sequence didn't match NAN(), so match + only NAN. */ + } + + if( sret ) + { + *sret = unconst((&s[3]), char *); + } + return *(long double *)(&n);; + } + + /* Handle ordinary numbers. */ while ((*s >= '0') && (*s <= '9')) { flags |= 1; Index: djgpp/src/libc/ansi/stdlib/strtold.txh =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/ansi/stdlib/strtold.txh,v retrieving revision 1.4 diff -p -u -r1.4 strtold.txh --- djgpp/src/libc/ansi/stdlib/strtold.txh 29 Jan 2003 12:30:21 -0000 1.4 +++ djgpp/src/libc/ansi/stdlib/strtold.txh 20 Apr 2003 17:47:40 -0000 @@ -46,14 +46,43 @@ long double strtold(const char *s, char @subheading Description This function converts as many characters of @var{s} that look like a -floating point number into one, and sets @var{*endp} to point to the -first unused character. +floating point number into that number. It also recognises +(case-insensitively) ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@var{optional hex-number})''. If @var{endp} is not a null +pointer, @code{*@var{endp}} is set to point to the first unconverted +character. @subheading Return Value -The value the string represented. +The value represented by @var{s}. + +If @var{s} is ``Inf'' or ``Infinity'', with any variations of case and +optionally prefixed with ``+'' or ``-'', the return value is +@code{INFINITY} (if no prefix or a ``+'' prefix) or @code{-INFINITY} +(if the prefix is ``-''). + +If @var{s} is ``NaN'' or ``NaN()'', with any variations of case and +optionally prefixed with ``+'' or ``-'', the return value is +@code{(long double)NAN}. If the prefix is ``-'' the sign bit in the +NaN will be set to 1. + +If @var{s} is ``NaN(@var{hex-number})'', with any variations of case +and optionally prefixed with ``+'' or ``-'', the return value is a NaN +with the mantissa bits set to +@code{@var{hex-number}&0xffffffffffffffff} (the mantissa for long +doubles consists of 64bits). Use at most 16 hexadecimal digits in +@var{hex-number} or the internal conversion will overflow, which +results in a mantissa of 0xffffffffffffffff. If +@code{@var{hex-number}&0xffffffffffffffff} is 0 (which won't work as a +representation of a NaN) @code{(long double)NAN} will be returned. If +the prefix is ``-'' the sign bit in the NaN will be set to 1. Testing +shows that SNaNs might be converted into QNaNs (the next most +significant bit will be set in the mantissa). @subheading Portability + +@port-note ansi-c99 Support for ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@dots{})'' was standardised in ANSI C99. @portability !ansi-c89, ansi-c99, !posix-1003.2-1992, posix-1003.1-2001 Index: djgpp/src/libc/c99/stdlib/strtof.c =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/c99/stdlib/strtof.c,v retrieving revision 1.2 diff -p -u -r1.2 strtof.c --- djgpp/src/libc/c99/stdlib/strtof.c 23 Jan 2003 19:53:02 -0000 1.2 +++ djgpp/src/libc/c99/stdlib/strtof.c 20 Apr 2003 17:47:40 -0000 @@ -5,12 +5,15 @@ /* Copyright (C) 1997 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1996 DJ Delorie, see COPYING.DJ for details */ /* Copyright (C) 1994 DJ Delorie, see COPYING.DJ for details */ +#include #include #include #include #include #include +#include #include +#include float strtof(const char *s, char **sret) @@ -43,6 +46,74 @@ strtof(const char *s, char **sret) s++; } + /* Handle INF and INFINITY. */ + if ( ! strnicmp( "INF", s, 3 ) ) + { + if( sret ) + { + if ( ! strnicmp( "INITY", &s[3], 5 ) ) + { + *sret = unconst((&s[8]), char *); + } + else + { + *sret = unconst((&s[3]), char *); + } + } + + if( 0 <= sign ) + { + return INFINITY; + } + else + { + return -INFINITY; + } + } + + /* Handle NAN and NAN(). */ + if( ! strnicmp( "NAN", s, 3 ) ) + { + float f = NAN; + float_t n = *(float_t *)(&f); + + if( sign < 0 ) + { + n.sign = 1; + } + + if( s[3] == '(' ) + { + unsigned long mantissa_bits = 0; + char *endptr = unconst((&s[4]), char *); + + mantissa_bits = strtoul(&s[4], &endptr, 16); + if( *endptr == ')' ) + { + mantissa_bits = mantissa_bits & 0x7fffff; + if( mantissa_bits ) + { + n.mantissa = mantissa_bits; + } + if( sret ) + { + *sret = endptr+1; + } + return *(float *)(&n); + } + + /* The subject sequence didn't match NAN(), so match + only NAN. */ + } + + if( sret ) + { + *sret = unconst((&s[3]), char *); + } + return *(float *)(&n); + } + + /* Handle ordinary numbers. */ while ((*s >= '0') && (*s <= '9')) { flags |= 1; Index: djgpp/src/libc/c99/stdlib/strtof.txh =================================================================== RCS file: /cvs/djgpp/djgpp/src/libc/c99/stdlib/strtof.txh,v retrieving revision 1.4 diff -p -u -r1.4 strtof.txh --- djgpp/src/libc/c99/stdlib/strtof.txh 29 Jan 2003 12:34:24 -0000 1.4 +++ djgpp/src/libc/c99/stdlib/strtof.txh 20 Apr 2003 17:47:40 -0000 @@ -11,13 +11,37 @@ float strtof(const char *s, char **endp) @subheading Description This function converts as many characters of @var{s} as look like a -floating point number into that number. If @var{endp} is not a null -pointer, @code{*endp} is set to point to the first unconverted +floating point number into that number. It also recognises +(case-insensitively) ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@var{optional hex-number})''. If @var{endp} is not a null +pointer, @code{*@var{endp}} is set to point to the first unconverted character. @subheading Return Value -The value the represented by @var{s}. +The value represented by @var{s}. + +If @var{s} is ``Inf'' or ``Infinity'', with any variations of case and +optionally prefixed with ``+'' or ``-'', the return value is +@code{INFINITY} (if no prefix or a ``+'' prefix) or @code{-INFINITY} +(if the prefix is ``-''). + +If @var{s} is ``NaN'' or ``NaN()'', with any variations of case and +optionally prefixed with ``+'' or ``-'', the return value is +@code{NAN}. If the prefix is ``-'' the sign bit in the NaN will be +set to 1. + +If @var{s} is ``NaN(@var{hex-number})'', with any variations of case +and optionally prefixed with ``+'' or ``-'', the return value is a NaN +with the mantissa bits set to @code{@var{hex-number}&0x7fffff} (the +mantissa for floats consists of 23 bits). Use at most 8 hexadecimal +digits in @var{hex-number} or the internal conversion will overflow, +which results in a mantissa of 7fffff. If +@code{@var{hex-number}&0x7fffff} is 0 (which won't work as a +representation of a NaN) @code{NAN} will be returned. If the prefix +is ``-'' the sign bit in the NaN will be set to 1. Testing shows that +SNaNs might be converted into QNaNs (most significant bit will be set +in the mantissa). If a number represented by @var{s} doesn't fit into the range of values representable by the type @code{float}, the function returns either @@ -25,6 +49,9 @@ representable by the type @code{float}, @code{+HUGE_VALF}, and sets @code{errno} to @code{ERANGE}. @subheading Portability + +@port-note ansi-c99 Support for ``Inf'', ``Infinity'', ``NaN'' and +``NaN(@dots{})'' was standardised in ANSI C99. @portability !ansi-c89, ansi-c99, !posix-1003.2-1992, posix-1003.1-2001 Index: djgpp/tests/libc/ansi/stdlib/strtod.c =================================================================== RCS file: /cvs/djgpp/djgpp/tests/libc/ansi/stdlib/strtod.c,v retrieving revision 1.1 diff -p -u -r1.1 strtod.c --- djgpp/tests/libc/ansi/stdlib/strtod.c 1 Jan 1998 21:45:46 -0000 1.1 +++ djgpp/tests/libc/ansi/stdlib/strtod.c 20 Apr 2003 17:47:41 -0000 @@ -9,6 +9,16 @@ static const char *testnum[] = { "1e6000000000", /* overflow */ "1e400", /* ditto */ "1e-400", /* underflow */ + "InF", /* infinity */ + "-inf", /* infinity */ + "infinity", /* infinity */ + "-inFinitY", /* infinity */ + "nAn", /* nan */ + "-nan", /* nan */ + "Nan()", /* nan */ + "nan(0)", /* nan */ + "Nan(1)", /* nan */ + "-NaN(0xfffff)", /* nan */ 0 }; Index: djgpp/tests/libc/c99/stdlib/t-strtof.c =================================================================== RCS file: /cvs/djgpp/djgpp/tests/libc/c99/stdlib/t-strtof.c,v retrieving revision 1.2 diff -p -u -r1.2 t-strtof.c --- djgpp/tests/libc/c99/stdlib/t-strtof.c 23 Jan 2003 19:53:57 -0000 1.2 +++ djgpp/tests/libc/c99/stdlib/t-strtof.c 20 Apr 2003 17:47:41 -0000 @@ -51,6 +51,68 @@ static const test3_t tests3[] = { static const size_t n_tests3 = sizeof(tests3) / sizeof(tests3[0]); +typedef struct { + const char * const str; /* String to run strtof() on. */ + const int diff; /* For endptr tests. How many characters from string start + endptr should be offset. */ +} test4_t; + +static const test4_t tests4[] = { + { "inF", 3 }, + { "-INf", 4 }, + { "infi", 3 }, + { "-infi", 4 }, + { "infinit", 3 }, + { "-infinit", 4 }, + { "INfINITY", 8 }, + { "-InfInIty", 9 }, + { "infinity0", 8 }, + { "-infinity5", 9 }, + { "infinity-1", 8 }, + { "-infinity-6", 9 }, +}; + +static const size_t n_tests4 = sizeof(tests4) / sizeof(tests4[0]); + +typedef struct { + const char * const str; /* String to run strtof() on. */ + const int diff; /* For endptr tests. How many characters from string start + endptr should be offset. */ + const int sign; /* Sign bit. */ + const int mantissa; /* What mantissa should be set to after + conversion. 0 -> don't care as long as it's + non-zero. */ +} test5_t; + +static const test5_t tests5[] = { + { "nAn", 3, 0, 0 }, + { "-nAn", 4, 1, 0 }, + { "nanny", 3, 0, 0 }, + { "-nanny", 4, 1, 0 }, + { "NAN()", 5, 0, 0 }, + { "NAN( )", 3, 0, 0 }, + { "-Nan()", 6, 1, 0 }, + { "nAN(0)", 6, 0, 0 }, + { "-nan(0)", 7, 1, 0 }, + { "nan(0x401234)", 13, 0, 0x401234 }, /* QNaN */ + { "-nan(0x400088)", 14, 1, 0x400088 }, /* QNaN */ + { "nan(0x1234)", 11, 0, 0x1234 }, /* SNaN -> QNaN? */ + { "-nan(0x88)", 10, 1, 0x88 }, /* SNaN -> QNaN? */ + { "-nan(0xaa7d7aa74)", 17, 1, 0x7fffff }, /* Overflow. */ + { "nan(0x12345678123456781)", 24, 0, 0x7fffff }, /* Overflow. */ + { "-nan(0x12345678123456781)", 25, 1, 0x7fffff }, /* Overflow. */ + { "naN(something)", 3, 0, 0 }, + { "-nAn(smurf)", 4, 1, 0 }, + { "-nan(-nan)", 4, 1, 0 }, + { "nan(nan(nan()))", 3, 0, 0 }, + { "NaN(0x1234oops)", 3, 0, 0 }, + { "nan()()", 5, 0, 0 }, + { "NAN()nan", 5, 0, 0 }, +}; + +static const size_t n_tests5 = sizeof(tests5) / sizeof(tests5[0]); + + static void inline result (const size_t n, const float f_in, const float f_out) { @@ -106,6 +168,64 @@ main (void) puts("No - OK"); } } - + + puts("Infinity tests:"); + for (i = 0; i < n_tests4; i++) { + char *endptr; + float_t float_bits; + + f_res = strtof(tests4[i].str, &endptr); + + printf("strtof(\"%s\", &endptr) -> %f, %ld - ", tests4[i].str, + f_res, endptr-tests4[i].str); + + /* Need to do the Inf detection ourselves. */ + float_bits = *(float_t *)(&f_res); + if (float_bits.exponent != 0xff) { + puts("exponent != 0xff - FAIL"); + } else if (float_bits.mantissa != 0) { + puts("mantissa != 0 - FAIL"); + } else if ( (float_bits.sign && 0 < f_res ) || + (!float_bits.sign && f_res < 0) ) { + puts("Wrong sign - FAIL"); + } else if ( endptr-tests4[i].str != tests4[i].diff) { + printf("endptr-(start_of_string) == %ld != %d - FAIL\n", + endptr-tests4[i].str, tests4[i].diff); + } else { + puts("OK"); + } + } + + puts("Nan tests:"); + for (i = 0; i < n_tests5; i++) { + char *endptr; + float_t float_bits; + + f_res = strtof(tests5[i].str, &endptr); + + printf("strtof(\"%s\", &endptr) -> %f, %ld - ", tests5[i].str, + f_res, endptr-tests5[i].str); + + /* Need to to the NaN detection ourselves. */ + float_bits = *(float_t *)(&f_res); + if (float_bits.exponent != 0xff ) { + puts("exponent != 0xff - FAIL"); + } else if (float_bits.mantissa == 0) { + puts("mantissa == 0 - FAIL"); + } else if (tests5[i].sign != float_bits.sign) { + printf("sign == %d != %d - FAIL\n", float_bits.sign, + tests5[i].sign); + } else if ( endptr-tests5[i].str != tests5[i].diff) { + printf("endptr-(start_of_string) == %ld != %d - FAIL\n", + endptr-tests5[i].str, tests5[i].diff); + } else if (tests5[i].mantissa && + tests5[i].mantissa != float_bits.mantissa) { + printf("(note: mantissa == 0x%x != 0x%x) - OK\n", + float_bits.mantissa, tests5[i].mantissa); + } else { + puts("OK"); + } + } + return(EXIT_SUCCESS); }