delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/1999/04/26/12:47:42

Date: Mon, 26 Apr 1999 12:47:57 -0400
Message-Id: <199904261647.MAA06392@envy.delorie.com>
From: DJ Delorie <dj AT delorie DOT com>
To: djgpp-workers AT delorie DOT com
Subject: [dma AT hpesdma DOT fc DOT hp DOT com: Performance Observation]
Reply-To: djgpp-workers AT delorie DOT com

Any ideas?

------- Start of forwarded message -------
From: David Anderson <dma AT hpesdma DOT fc DOT hp DOT com>
Subject: Performance Observation
To: dj AT delorie DOT com
Date: Sun, 25 Apr 1999 17:09:29 MDT

Hello,

	I really like djgpp. Especially the snappy performance.
	And then I upgraded...

	The following is an example that uses my 1k complex fft
	CPU benchmark-type program:

C:\Dave\fft\v1>gcc -O3 -ffast-math -o fft.exe fft.c -lm

C:\Dave\fft\v1>fft.exe
average fft time = 0.001456 (seconds)

C:\Dave\fft\v1>strip fft.exe

C:\Dave\fft\v1>gcc --version
2.7.2.1

C:\Dave\fft\v1>dir fft.exe

 Volume in drive C is DAVID
 Volume Serial Number is 232D-9D8D
 Directory of C:\Dave\fft\v1

FFT      EXE        37,376  04-25-99  4:54p fft.exe
         1 file(s)         37,376 bytes

C:\Dave\fft\v1>

	Small, fast (faster than any other compiler on any other
operating system given the same hardware). I am running on an AMD 
K6II @ 350 Mhz (by the way). My program uses no real memory to speak
of (should fit in a CPU's cache - hence the handy CPU benchmark status)

	And then using the latest & greatest:

C:\Dave\fft\v2>gcc -O3 -ffast-math -o fft.exe fft.c -lm
fft.c: In function `main':
fft.c:80: warning: return type of `main' is not `int'

C:\Dave\fft\v2>fft.exe
average fft time = 0.001797 (seconds)

C:\Dave\fft\v2>strip fft.exe

C:\Dave\fft\v2>gcc --version
2.81

C:\Dave\fft\v2>dir fft.exe

 Volume in drive C is DAVID
 Volume Serial Number is 232D-9D8D
 Directory of C:\Dave\fft\v2

FFT      EXE        49,152  04-25-99  5:01p fft.exe
         1 file(s)         49,152 bytes


	More than 20% slower! Almost 30% larger!

	So what's up?

- -Dave

==================== fft.c ====================

/* start of fft.c */
/* fft.c
 * This program was written to perform a complex fft on a 1024 point
 * data set.
 */

#include <math.h>
#include <stdio.h>
#include <time.h>
#ifndef M_PI
#   define M_PI 3.141592653589793
#endif

double co[1024],si[1024];
unsigned int rev[1024];
   double a_real[1024],a_imag[1024];
   double src_r[1024],src_i[1024];

void fft(order,s_r,s_i,a_real,a_imag)
unsigned int order;
double *s_r,*s_i,*a_real,*a_imag;
{
   unsigned int i,j;
   unsigned int offset,mult;
   unsigned int raw_mask,mask;
   double t_real,t_imag;
   double *p_r,*p_i; /* pointers used in accessing real, imag */
   /* this section bit reverses the input data */
   p_r=s_r;
   p_i=s_i;
   for (i=0;i<(1<<order);i++)
      {
      a_real[rev[i]]=*p_r++;
      a_imag[rev[i]]=*p_i++;
      }
   /* this section does the fft algorithm */
   raw_mask=0;
   for (i=0;i<(order-1);i++)
      raw_mask |= 1<<i;
   for (i=0;i<order;i++)
      {
      offset=1<<i;
      mask=raw_mask >> (order-i-1);
      mult= 1 << (order-i-1);
      for (j=0;j<(1<<order);j++)
         {
         if ((j&offset)==0)
            {
            t_real = a_real[j+offset] * co[mult * (j & mask)] -
               a_imag[j+offset] * si[mult * (j & mask)];
            t_imag = a_real[j+offset] * si[mult * (j & mask)] +
               a_imag[j+offset] * co[mult * (j & mask)];
            a_real[j+offset] = a_real[j] - t_real;
            a_imag[j+offset] = a_imag[j] - t_imag;
            a_real[j] +=  t_real;
            a_imag[j] +=  t_imag;
            }
         }
      }
}

unsigned int reverse(index,order)
unsigned int index,order;
{
   /* this routine performs a bit reversal on the address */
   unsigned int i,result;
   result=0;
   for (i=0;i<order;i++)
      {
      result <<= 1;
      result |= (index & 0x0001);
      index >>= 1;
      }
   return result;
}

#define loopCount		10000

void main()
{
   unsigned int i,order;
#ifdef DOS
   time_t 
#else
   clock_t
#endif
   			start, finish;
   /*
    * This loop fills up the sine and cosine arrays.  The bit
reversal
    * array is also filled up.  The source array is created, in
normal
    * order.  The fft subroutine is expected to bit reverse the data
    * when it is copied from src to a[].
    */
   order=10;
   for (i=0;i < (1<<order);i++)
      {
      src_i[i]=0.0;
      if (i < 512)
         src_r[i]=1.0 - (double)i/256.0;
      else
         src_r[i]= -3.0 + (double)i/256.0;
/*      a_real[reverse(i,order)]=sin(2.0 * M_PI * (double)i/1024.0);
*/
      rev[i]=reverse(i,order);
      co[i]=cos(M_PI*(double)i*2.0/1024.0);
      si[i]= sin(M_PI*(double)i*2.0/1024.0);
      }
#ifdef DOS
	  start = time( NULL );
#else
	start = clock();
#endif
   for (i=0;i<loopCount;i++)
      fft(order,src_r,src_i,a_real,a_imag);
#ifdef DOS
	  finish = time( NULL );
   printf("average fft time = %f (seconds)\n", 
   		( (float)finish - (float)start ) / ( (float)loopCount )  );
#else
   finish=clock();
   printf("average fft time = %f (seconds)\n", 
   		( (float)finish - (float)start ) / 
		( (float)loopCount * (float)CLOCKS_PER_SEC ) );
#endif
   /*
    * Now we print out the result, which will be in the proper order.
    */
#ifdef DEBUG
   for (i=0;i<(1<<order);i++)
      {
      if ((a_real[i] == 0.0)   && (a_imag[i] == 0.0))
         printf ("%d , %lg , %lg\n",i,(double)0.0,(double)0.0);
      else
         printf ("%d , %lg , %lg\n",i,
            sqrt(a_real[i]*a_real[i] + a_imag[i]*a_imag[i]),
            atan2(a_imag[i],a_real[i]));
      }
#endif
}

- --
------- End of forwarded message -------

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019