delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2000/03/16/13:17:36

Message-Id: <200003161729.MAA18808@delorie.com>
From: "Dieter Buerssner" <buers AT gmx DOT de>
To: djgpp-workers AT delorie DOT com
Date: Thu, 16 Mar 2000 18:28:51 +0100
MIME-Version: 1.0
Subject: Re: Unnormals???
References: <Pine DOT SUN DOT 3 DOT 91 DOT 1000316114116 DOT 3117B-100000 AT is>
In-reply-to: <Pine.LNX.4.10.10003161442441.22148-100000@acp3bf>
X-mailer: Pegasus Mail for Win32 (v3.12b)
Reply-To: djgpp-workers AT delorie DOT com

On 16 Mar 00,  Hans-Bernhard Broeker wrote:

> And just to bring that into the discussion, again: *printf() is not
> the only place we'ld have to have to do something about, here.
> "[-]nan(optional_string)" is supposed to be usable by other functions,
> too, including *scanf(), strtod and the new nan*() functions.

I have written code quite some time ago, that does this.
It is rather long (about 100k for strtod[l] and support functions
for scanf printf. It should guarantee accurate bin <-> dec 
conversion. (errors of 0.50x ulps in difficult cases are possible)
If somebody wants to look at this, I can send the whole source by 
email. 

If you look at the piece of code, that I append, you may judge, 
whether this source code could be acceptable for the djgpp project. 
It is not to well commented. At the heart of it is the function 
__strtold_err, which is similar to strtold, but also returns an error-
term (The difference between the long double
returned, and what would be returned by higher precision math.)
This term is needed to avoid subtle errors, when rounding long double
to double, which are explained at the bottom.
I haven't tested it with newer versions of gcc yet. 

Your comments are welcome,
Dieter

/* Copyright Dieter Buerssner */
#include <stdio.h>
#include <stddef.h>
#include <ctype.h>

#ifdef __GNUC__
#define unconst(__v, __t) __extension__ ({union { const __t __cp; __t __p; } __q; __q.__cp = __v; __q.__p;})
#else
#define unconst(__v, __t) ((__t)__v)
#endif


long double __strtold_err(const char *, char **, long double *);
long double __l10expl_c_err(long double, long double, int, long double *);
long double __l10expl(long double, int);

/* 10.0L * MAXITENTH + 9.0L will yield an exact representable number */
static const long double MAXITENTH = 1.844674407370955160e18L;
static const long double tenl = 10.0L;
static const long double pten[] = {
  1e0L,  1e1L,  1e2L,  1e3L,  1e4L,  1e5L,  1e6L,  1e7L,  1e8L,  1e9L
};

/* result = r*10^ne + f*10^(ne-nf) */

static long double
scale_rf(long double r, long double f, int ne, int nf, long double *eterm)
{
  long double sum;
  if (ne == 0 && nf == 0)
  {
    *eterm = 0.0L;
    return r;
  }
  if (ne)
  {
    if (nf)
    {
      if ((nf-=ne)!=0)
      {

	if (nf > 4800)
	{
	  /* possible underflow of f*10^(ne-nf); calculate
	     (r+f*10^(-nf))*10^ne */
	  nf += ne;
	  return __l10expl_c_err(r, __l10expl(f, -nf), ne, eterm);

	}
	else
	{
	  /* this way some difficult rounding cases for
	     integer values will be caught */
	  r = __l10expl_c_err(r, 0.0L, ne, eterm);
          f = __l10expl(f, -nf) + *eterm;
	}
      }
      else
      {
	/* ne == nf */
	r = __l10expl_c_err(r, 0.0L, ne, eterm);
	f += *eterm;
      }
    }
    else
      /* nf == 0 */
      return __l10expl_c_err(r, 0.0L, ne, eterm);
  }
  else
  {
    /* ne == 0 */
    /* f = f*10^-nf */
    f = __l10expl(f, -nf);
  }
  /* calulate error */
  sum = r+f;
  *eterm = (r-sum)+f;
  return sum;
}


static long double
conv_numbuf(char *np, int sign, int ne, long double *eterm)
{
  long double r, f;
  int c, nr, nf;
  long ipart;

  if (*np == '\0')
  {
    r = 0.0L;
    *eterm = 0.0L;
    /* Cannot return here because gcc 2.73 optimizes a negative zero
       away. */
    goto done;
  }

  f = 0.0L;
  nf = 0;

  /* first 9 digits */
  nr = 0;
  ipart = 0;
  do
  {
    if ((c = *np++) == '\0')
    {
      r=ipart;
      goto scale_result;
    }
    ipart = 10*ipart + (c - '0');
  }
  while (++nr < 9);
  r=ipart;

  /* second 9 digits */
  ipart=0;
  do
  {
    if ((c = *np++) == '\0')
    {
      r=r*pten[nr-9]+ipart;
      goto scale_result;
    }
    ipart = 10*ipart + (c - '0');
  }
  while (++nr < 18);

  /* merge first and second 9 digits */
  r=r*pten[9]+ipart;

  /* do 19th digit */
  if ((c = *np++) == '\0')
    goto scale_result;
  r = tenl*r + (c - '0');
  nr++;

  if ((c = *np) == '\0')
    goto scale_result;
  /* 20 digits might fit in integer */
  if (r <= MAXITENTH)
  {
    r = tenl*r + (c - '0');
    nr++;
    np++;
  }

  /* first 9 digits of fraction */
  ipart=0;
  do
  {
    if ((c = *np++) == '\0')
    {
      f=ipart;
      goto scale_result;
    }
    ipart = 10*ipart + (c - '0');
  }
  while (++nf < 9);
  f=ipart;

  /* second 9 digits of fraction */
  ipart=0;
  do
  {
    if ((c = *np++) == '\0')
      break;
    ipart = 10*ipart + (c - '0');
  }
  while (++nf < 18);
  /* merge digits */
  f=f*pten[nf-9]+ipart;

scale_result:
  nr--; /* one digit means exponent of zero */
  ne-=nr;
  /* result = r*10^ne + f*10^(ne-nf) */
  if (ne == 0 && nf == 0)
    *eterm = 0.0L;
  else
    r = scale_rf(r,f,ne,nf,eterm);

done:
  if (sign)
  {
    r = -r;
    *eterm = -(*eterm);
  }
  return r;
}

/*
       [#3] The  expected  form  of  the  subject  sequence  is  an
       optional plus or minus sign, then one of the following:

          [...]

          - one of INF or INFINITY, ignoring case

          - one of NAN or NAN(n-char-sequence-opt),  ignoring  case
            in the NAN part, where:

                    n-char-sequence:
                            digit
                            nondigit
                            n-char-sequence digit
                            n-char-sequence nondigit
*/

#define ld(x) ((long double)(*(long double *)(x)))
static const unsigned short infshort[] = {0,0,0,0,0x7fff,0};
#define INFINITY ld(infshort)
static const unsigned short nanshort[] = {0,0,0,1,0x7fff,0};
#define NOTANUMBER ld(nanshort)

static const char *spccmp(const char *ls, const char *s)
{
  char c;
  do
  {
    c = *s++;
    if (ls[0] != c && ls[1] != c)
      return NULL;
    ls+=2;
  }
  while (*ls != '\0');
  return s;
}

static long double special(const char *s, char **endptr, int sign)
{
  long double r;
  const char *ep, *ep2;
  int c;
  ep = spccmp("IiNnFf", s);
  if (ep != NULL)
  {
    ep2 = spccmp("IiNnIiTtYy", ep);
    if (ep2 != NULL)
      ep = ep2;
    if (endptr)
      *endptr = unconst(ep, char *);
    r = INFINITY;
    if (sign)
      r = -r;
    return r;
  }
  ep = spccmp("NnAaNn", s);
  if (ep != NULL)
  {
    ep2 = ep;
    if (*ep2 == '(')
    {
      while ((c = *++ep2) != '\0')
        if (!isalnum(c) && c != '_')
          break;
      if (c == ')')
        ep = ++ep2;
    }
    if (endptr)
      *endptr = unconst(ep, char *);
    r = NOTANUMBER;
    if (sign)
      r = -r;
    return r;
  }
  return 0.0L;
}

#define MAXDIGS 38 /* Don't change! */
long double __strtold_err(const char *s, char **endptr, long double *eterm)
{
  int ne, c, digseen, sign, e, esign;
  char numbuf[MAXDIGS+1];
  char *np, *ep;
  const char *os;

  /* default to beginning of string (in case of error) */
  if (endptr)
    *endptr = unconst(s, char *);
  /* strip leading spaces */
  while(isspace(*s))
    s++;
  sign = 0;
  c = *s;
  if (c == '+')
    c = *++s;
  else if (c == '-')
  {
    sign=1;
    c = *++s;
  }
  digseen = 0;
  np = numbuf;
  ep = np + MAXDIGS; /* end of numbuf */
  ne = -1;
  if (isdigit(c))
  {
    digseen = 1;
    /* strip leading zeroes */
    while (c == '0')
      c = *++s;
    while (isdigit(c))
    {
      if (np >= ep)
      {
        /* got enough precision, discard all following digits */
        do
        {
          ne++;
          c = *++s;
        }
        while (isdigit(c));
        break;
      }
      *np++ = c;
      c = *++s;
      ne++;
    }
  }
  if (c == '.')
  {
    c = *++s;
    if (isdigit(c))
    {
      if (np == numbuf)
      {
        /* only fractional part: strip leading zeroes */
        digseen = 1;
        while (c == '0')
        {
          c = *++s;
          ne--;
        }
      }
      while (isdigit(c))
      {
        if (np >= ep)
        {
          /* got enough precision */
          do
            c = *++s;
          while (isdigit(c));
          break;
        }
        *np++ = c;
        c = *++s;
      }
    }
  }
  if (digseen == 0)
    /* Test for nan and inf */
    return special(s,endptr,sign);
  /* strip trailing zeroes */
  while (np > numbuf)
    if (*--np != '0')
    {
      ++np;
      break;
    }
  *np = '\0';  /* mark end of numbuf */
  /* Test for zero result */
  /* read exponent */
  if ((c == 'e') || (c == 'E'))
  {
    os = s;
    esign=0;
    c = *++s;
    if (c == '+')
      c = *++s;
    else if (c == '-')
    {
      esign=1;
      c = *++s;
    }
    if (isdigit(c))
    {
      e = (c - '0');
      c = *++s;
      while (isdigit(c))
      {
        e = 10*e + (c - '0');
        c = *++s;
      }
      ne = esign ? ne - e : ne + e;
    }
    else
      s=os; /* there really was no exponent */
  }
  if (endptr)
    *endptr = unconst(s, char *);;
  return conv_numbuf(numbuf, sign, ne, eterm);
}

/* strtod replacement, different source*/
/* Copyright Dieter Buerssner */
#include <stdlib.h>
#include <ctype.h>

long double __strtold_err(const char *s, char **sret, long double *eterm);

/* some code snipped */
/* TODO, think about rounding control */
double _strtod(const char *s, char **sret)
{
  long double r, errterm;
  volatile unsigned short oldcw, dblcw; /* must be memory operands */
  volatile double rd; /* don't optimize this away! */
  r = __strtold_err(s,sret,&errterm);
  /* round r to double, avoid "double" (twice) rounding */
  /* To show why this is tricky assume extended precision (long double)
     to have 3 (signifcand) decimal digits and double precision to have
     2 decimal digits.
     The exact result shall be 17.47, so we have
      r=17.5 and errterm=-0.03;
     if we round 17.5 to double precision we get 18.0 which is
     wrong by 0.53 ulps.
     The following algorithm works as follows:
       round r to double
	 rd = 18.0;
       add rounding error of previous operation to errterm;
	 errterm = -0.53;
       now add using double precision!
	 rd + (double)errterm = 17.0;
       (Addition using long double precision would of course
	have the same rounding problem because:
	    (double)((long double)err + (long double)errterm)
	  = (double)(17.5) = 18.0
	It is important that the addition is really done in
	double precision. )

     A fancier approach would be to use the inexact flag and
     set rounding mode to chop.
   */
/*
 * Intel x87 control word
 *
 * Interrupt Exceptions, set bits will _not_ be trapped
 *    	Invalid operation    0x0001
 *	Denormalized operand 0x0002
 *	Zero devide          0x0004
 *	Overflow             0x0008
 *	underflow            0x0010
 *	inexact              0x0020
 *
 * Precision Control
 *	24 bits              0x0000
 *	53 bits              0x0200
 *	64 bits precision    0x0300
 *
 * Rounding Control:
 *	round nearest        0x0000
 *	down                 0x0400
 *	up                   0x0800
 *	chop                 0x0c00
 *
 * Infinity Control:  (Only pre 80387, ignored later)
 *	affine               0x1000
 *	project              0x0000
 *
 */

  rd = (double)r; /* round r to double (don't optimize!) */
  /* do this only if rd is not subnormal */
  if (((*(((unsigned short *)&rd)+3)) & 0x7ff0) != 0)
  {
    errterm += (r-(long double)rd); /* add error of last operation */
    __asm__ volatile ("fnstcw %0" : "=m" (oldcw)); /* get x87 control word */
    dblcw = (oldcw & 0xfcff) | 0x0200;  /* precision 53 bits */
    __asm__ volatile ("fldcw %0" :: "m" (dblcw));
    rd += errterm;                      /* Do this in 53 bit precision! */
    __asm__ volatile ("fldcw %0" :: "m" (oldcw)); /* restore x87 cw */
  }
  return rd;
}


- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019