Message-Id: <200003161729.MAA18808@delorie.com> From: "Dieter Buerssner" To: djgpp-workers AT delorie DOT com Date: Thu, 16 Mar 2000 18:28:51 +0100 MIME-Version: 1.0 Content-type: text/plain; charset=US-ASCII Content-transfer-encoding: 7BIT Subject: Re: Unnormals??? References: In-reply-to: X-mailer: Pegasus Mail for Win32 (v3.12b) Reply-To: djgpp-workers AT delorie DOT com On 16 Mar 00, Hans-Bernhard Broeker wrote: > And just to bring that into the discussion, again: *printf() is not > the only place we'ld have to have to do something about, here. > "[-]nan(optional_string)" is supposed to be usable by other functions, > too, including *scanf(), strtod and the new nan*() functions. I have written code quite some time ago, that does this. It is rather long (about 100k for strtod[l] and support functions for scanf printf. It should guarantee accurate bin <-> dec conversion. (errors of 0.50x ulps in difficult cases are possible) If somebody wants to look at this, I can send the whole source by email. If you look at the piece of code, that I append, you may judge, whether this source code could be acceptable for the djgpp project. It is not to well commented. At the heart of it is the function __strtold_err, which is similar to strtold, but also returns an error- term (The difference between the long double returned, and what would be returned by higher precision math.) This term is needed to avoid subtle errors, when rounding long double to double, which are explained at the bottom. I haven't tested it with newer versions of gcc yet. Your comments are welcome, Dieter /* Copyright Dieter Buerssner */ #include #include #include #ifdef __GNUC__ #define unconst(__v, __t) __extension__ ({union { const __t __cp; __t __p; } __q; __q.__cp = __v; __q.__p;}) #else #define unconst(__v, __t) ((__t)__v) #endif long double __strtold_err(const char *, char **, long double *); long double __l10expl_c_err(long double, long double, int, long double *); long double __l10expl(long double, int); /* 10.0L * MAXITENTH + 9.0L will yield an exact representable number */ static const long double MAXITENTH = 1.844674407370955160e18L; static const long double tenl = 10.0L; static const long double pten[] = { 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L }; /* result = r*10^ne + f*10^(ne-nf) */ static long double scale_rf(long double r, long double f, int ne, int nf, long double *eterm) { long double sum; if (ne == 0 && nf == 0) { *eterm = 0.0L; return r; } if (ne) { if (nf) { if ((nf-=ne)!=0) { if (nf > 4800) { /* possible underflow of f*10^(ne-nf); calculate (r+f*10^(-nf))*10^ne */ nf += ne; return __l10expl_c_err(r, __l10expl(f, -nf), ne, eterm); } else { /* this way some difficult rounding cases for integer values will be caught */ r = __l10expl_c_err(r, 0.0L, ne, eterm); f = __l10expl(f, -nf) + *eterm; } } else { /* ne == nf */ r = __l10expl_c_err(r, 0.0L, ne, eterm); f += *eterm; } } else /* nf == 0 */ return __l10expl_c_err(r, 0.0L, ne, eterm); } else { /* ne == 0 */ /* f = f*10^-nf */ f = __l10expl(f, -nf); } /* calulate error */ sum = r+f; *eterm = (r-sum)+f; return sum; } static long double conv_numbuf(char *np, int sign, int ne, long double *eterm) { long double r, f; int c, nr, nf; long ipart; if (*np == '\0') { r = 0.0L; *eterm = 0.0L; /* Cannot return here because gcc 2.73 optimizes a negative zero away. */ goto done; } f = 0.0L; nf = 0; /* first 9 digits */ nr = 0; ipart = 0; do { if ((c = *np++) == '\0') { r=ipart; goto scale_result; } ipart = 10*ipart + (c - '0'); } while (++nr < 9); r=ipart; /* second 9 digits */ ipart=0; do { if ((c = *np++) == '\0') { r=r*pten[nr-9]+ipart; goto scale_result; } ipart = 10*ipart + (c - '0'); } while (++nr < 18); /* merge first and second 9 digits */ r=r*pten[9]+ipart; /* do 19th digit */ if ((c = *np++) == '\0') goto scale_result; r = tenl*r + (c - '0'); nr++; if ((c = *np) == '\0') goto scale_result; /* 20 digits might fit in integer */ if (r <= MAXITENTH) { r = tenl*r + (c - '0'); nr++; np++; } /* first 9 digits of fraction */ ipart=0; do { if ((c = *np++) == '\0') { f=ipart; goto scale_result; } ipart = 10*ipart + (c - '0'); } while (++nf < 9); f=ipart; /* second 9 digits of fraction */ ipart=0; do { if ((c = *np++) == '\0') break; ipart = 10*ipart + (c - '0'); } while (++nf < 18); /* merge digits */ f=f*pten[nf-9]+ipart; scale_result: nr--; /* one digit means exponent of zero */ ne-=nr; /* result = r*10^ne + f*10^(ne-nf) */ if (ne == 0 && nf == 0) *eterm = 0.0L; else r = scale_rf(r,f,ne,nf,eterm); done: if (sign) { r = -r; *eterm = -(*eterm); } return r; } /* [#3] The expected form of the subject sequence is an optional plus or minus sign, then one of the following: [...] - one of INF or INFINITY, ignoring case - one of NAN or NAN(n-char-sequence-opt), ignoring case in the NAN part, where: n-char-sequence: digit nondigit n-char-sequence digit n-char-sequence nondigit */ #define ld(x) ((long double)(*(long double *)(x))) static const unsigned short infshort[] = {0,0,0,0,0x7fff,0}; #define INFINITY ld(infshort) static const unsigned short nanshort[] = {0,0,0,1,0x7fff,0}; #define NOTANUMBER ld(nanshort) static const char *spccmp(const char *ls, const char *s) { char c; do { c = *s++; if (ls[0] != c && ls[1] != c) return NULL; ls+=2; } while (*ls != '\0'); return s; } static long double special(const char *s, char **endptr, int sign) { long double r; const char *ep, *ep2; int c; ep = spccmp("IiNnFf", s); if (ep != NULL) { ep2 = spccmp("IiNnIiTtYy", ep); if (ep2 != NULL) ep = ep2; if (endptr) *endptr = unconst(ep, char *); r = INFINITY; if (sign) r = -r; return r; } ep = spccmp("NnAaNn", s); if (ep != NULL) { ep2 = ep; if (*ep2 == '(') { while ((c = *++ep2) != '\0') if (!isalnum(c) && c != '_') break; if (c == ')') ep = ++ep2; } if (endptr) *endptr = unconst(ep, char *); r = NOTANUMBER; if (sign) r = -r; return r; } return 0.0L; } #define MAXDIGS 38 /* Don't change! */ long double __strtold_err(const char *s, char **endptr, long double *eterm) { int ne, c, digseen, sign, e, esign; char numbuf[MAXDIGS+1]; char *np, *ep; const char *os; /* default to beginning of string (in case of error) */ if (endptr) *endptr = unconst(s, char *); /* strip leading spaces */ while(isspace(*s)) s++; sign = 0; c = *s; if (c == '+') c = *++s; else if (c == '-') { sign=1; c = *++s; } digseen = 0; np = numbuf; ep = np + MAXDIGS; /* end of numbuf */ ne = -1; if (isdigit(c)) { digseen = 1; /* strip leading zeroes */ while (c == '0') c = *++s; while (isdigit(c)) { if (np >= ep) { /* got enough precision, discard all following digits */ do { ne++; c = *++s; } while (isdigit(c)); break; } *np++ = c; c = *++s; ne++; } } if (c == '.') { c = *++s; if (isdigit(c)) { if (np == numbuf) { /* only fractional part: strip leading zeroes */ digseen = 1; while (c == '0') { c = *++s; ne--; } } while (isdigit(c)) { if (np >= ep) { /* got enough precision */ do c = *++s; while (isdigit(c)); break; } *np++ = c; c = *++s; } } } if (digseen == 0) /* Test for nan and inf */ return special(s,endptr,sign); /* strip trailing zeroes */ while (np > numbuf) if (*--np != '0') { ++np; break; } *np = '\0'; /* mark end of numbuf */ /* Test for zero result */ /* read exponent */ if ((c == 'e') || (c == 'E')) { os = s; esign=0; c = *++s; if (c == '+') c = *++s; else if (c == '-') { esign=1; c = *++s; } if (isdigit(c)) { e = (c - '0'); c = *++s; while (isdigit(c)) { e = 10*e + (c - '0'); c = *++s; } ne = esign ? ne - e : ne + e; } else s=os; /* there really was no exponent */ } if (endptr) *endptr = unconst(s, char *);; return conv_numbuf(numbuf, sign, ne, eterm); } /* strtod replacement, different source*/ /* Copyright Dieter Buerssner */ #include #include long double __strtold_err(const char *s, char **sret, long double *eterm); /* some code snipped */ /* TODO, think about rounding control */ double _strtod(const char *s, char **sret) { long double r, errterm; volatile unsigned short oldcw, dblcw; /* must be memory operands */ volatile double rd; /* don't optimize this away! */ r = __strtold_err(s,sret,&errterm); /* round r to double, avoid "double" (twice) rounding */ /* To show why this is tricky assume extended precision (long double) to have 3 (signifcand) decimal digits and double precision to have 2 decimal digits. The exact result shall be 17.47, so we have r=17.5 and errterm=-0.03; if we round 17.5 to double precision we get 18.0 which is wrong by 0.53 ulps. The following algorithm works as follows: round r to double rd = 18.0; add rounding error of previous operation to errterm; errterm = -0.53; now add using double precision! rd + (double)errterm = 17.0; (Addition using long double precision would of course have the same rounding problem because: (double)((long double)err + (long double)errterm) = (double)(17.5) = 18.0 It is important that the addition is really done in double precision. ) A fancier approach would be to use the inexact flag and set rounding mode to chop. */ /* * Intel x87 control word * * Interrupt Exceptions, set bits will _not_ be trapped * Invalid operation 0x0001 * Denormalized operand 0x0002 * Zero devide 0x0004 * Overflow 0x0008 * underflow 0x0010 * inexact 0x0020 * * Precision Control * 24 bits 0x0000 * 53 bits 0x0200 * 64 bits precision 0x0300 * * Rounding Control: * round nearest 0x0000 * down 0x0400 * up 0x0800 * chop 0x0c00 * * Infinity Control: (Only pre 80387, ignored later) * affine 0x1000 * project 0x0000 * */ rd = (double)r; /* round r to double (don't optimize!) */ /* do this only if rd is not subnormal */ if (((*(((unsigned short *)&rd)+3)) & 0x7ff0) != 0) { errterm += (r-(long double)rd); /* add error of last operation */ __asm__ volatile ("fnstcw %0" : "=m" (oldcw)); /* get x87 control word */ dblcw = (oldcw & 0xfcff) | 0x0200; /* precision 53 bits */ __asm__ volatile ("fldcw %0" :: "m" (dblcw)); rd += errterm; /* Do this in 53 bit precision! */ __asm__ volatile ("fldcw %0" :: "m" (oldcw)); /* restore x87 cw */ } return rd; }