Mail Archives: djgpp-workers/1999/04/26/12:47:42
Any ideas?
------- Start of forwarded message -------
From: David Anderson <dma AT hpesdma DOT fc DOT hp DOT com>
Subject: Performance Observation
To: dj AT delorie DOT com
Date: Sun, 25 Apr 1999 17:09:29 MDT
Hello,
I really like djgpp. Especially the snappy performance.
And then I upgraded...
The following is an example that uses my 1k complex fft
CPU benchmark-type program:
C:\Dave\fft\v1>gcc -O3 -ffast-math -o fft.exe fft.c -lm
C:\Dave\fft\v1>fft.exe
average fft time = 0.001456 (seconds)
C:\Dave\fft\v1>strip fft.exe
C:\Dave\fft\v1>gcc --version
2.7.2.1
C:\Dave\fft\v1>dir fft.exe
Volume in drive C is DAVID
Volume Serial Number is 232D-9D8D
Directory of C:\Dave\fft\v1
FFT EXE 37,376 04-25-99 4:54p fft.exe
1 file(s) 37,376 bytes
C:\Dave\fft\v1>
Small, fast (faster than any other compiler on any other
operating system given the same hardware). I am running on an AMD
K6II @ 350 Mhz (by the way). My program uses no real memory to speak
of (should fit in a CPU's cache - hence the handy CPU benchmark status)
And then using the latest & greatest:
C:\Dave\fft\v2>gcc -O3 -ffast-math -o fft.exe fft.c -lm
fft.c: In function `main':
fft.c:80: warning: return type of `main' is not `int'
C:\Dave\fft\v2>fft.exe
average fft time = 0.001797 (seconds)
C:\Dave\fft\v2>strip fft.exe
C:\Dave\fft\v2>gcc --version
2.81
C:\Dave\fft\v2>dir fft.exe
Volume in drive C is DAVID
Volume Serial Number is 232D-9D8D
Directory of C:\Dave\fft\v2
FFT EXE 49,152 04-25-99 5:01p fft.exe
1 file(s) 49,152 bytes
More than 20% slower! Almost 30% larger!
So what's up?
- -Dave
==================== fft.c ====================
/* start of fft.c */
/* fft.c
* This program was written to perform a complex fft on a 1024 point
* data set.
*/
#include <math.h>
#include <stdio.h>
#include <time.h>
#ifndef M_PI
# define M_PI 3.141592653589793
#endif
double co[1024],si[1024];
unsigned int rev[1024];
double a_real[1024],a_imag[1024];
double src_r[1024],src_i[1024];
void fft(order,s_r,s_i,a_real,a_imag)
unsigned int order;
double *s_r,*s_i,*a_real,*a_imag;
{
unsigned int i,j;
unsigned int offset,mult;
unsigned int raw_mask,mask;
double t_real,t_imag;
double *p_r,*p_i; /* pointers used in accessing real, imag */
/* this section bit reverses the input data */
p_r=s_r;
p_i=s_i;
for (i=0;i<(1<<order);i++)
{
a_real[rev[i]]=*p_r++;
a_imag[rev[i]]=*p_i++;
}
/* this section does the fft algorithm */
raw_mask=0;
for (i=0;i<(order-1);i++)
raw_mask |= 1<<i;
for (i=0;i<order;i++)
{
offset=1<<i;
mask=raw_mask >> (order-i-1);
mult= 1 << (order-i-1);
for (j=0;j<(1<<order);j++)
{
if ((j&offset)==0)
{
t_real = a_real[j+offset] * co[mult * (j & mask)] -
a_imag[j+offset] * si[mult * (j & mask)];
t_imag = a_real[j+offset] * si[mult * (j & mask)] +
a_imag[j+offset] * co[mult * (j & mask)];
a_real[j+offset] = a_real[j] - t_real;
a_imag[j+offset] = a_imag[j] - t_imag;
a_real[j] += t_real;
a_imag[j] += t_imag;
}
}
}
}
unsigned int reverse(index,order)
unsigned int index,order;
{
/* this routine performs a bit reversal on the address */
unsigned int i,result;
result=0;
for (i=0;i<order;i++)
{
result <<= 1;
result |= (index & 0x0001);
index >>= 1;
}
return result;
}
#define loopCount 10000
void main()
{
unsigned int i,order;
#ifdef DOS
time_t
#else
clock_t
#endif
start, finish;
/*
* This loop fills up the sine and cosine arrays. The bit
reversal
* array is also filled up. The source array is created, in
normal
* order. The fft subroutine is expected to bit reverse the data
* when it is copied from src to a[].
*/
order=10;
for (i=0;i < (1<<order);i++)
{
src_i[i]=0.0;
if (i < 512)
src_r[i]=1.0 - (double)i/256.0;
else
src_r[i]= -3.0 + (double)i/256.0;
/* a_real[reverse(i,order)]=sin(2.0 * M_PI * (double)i/1024.0);
*/
rev[i]=reverse(i,order);
co[i]=cos(M_PI*(double)i*2.0/1024.0);
si[i]= sin(M_PI*(double)i*2.0/1024.0);
}
#ifdef DOS
start = time( NULL );
#else
start = clock();
#endif
for (i=0;i<loopCount;i++)
fft(order,src_r,src_i,a_real,a_imag);
#ifdef DOS
finish = time( NULL );
printf("average fft time = %f (seconds)\n",
( (float)finish - (float)start ) / ( (float)loopCount ) );
#else
finish=clock();
printf("average fft time = %f (seconds)\n",
( (float)finish - (float)start ) /
( (float)loopCount * (float)CLOCKS_PER_SEC ) );
#endif
/*
* Now we print out the result, which will be in the proper order.
*/
#ifdef DEBUG
for (i=0;i<(1<<order);i++)
{
if ((a_real[i] == 0.0) && (a_imag[i] == 0.0))
printf ("%d , %lg , %lg\n",i,(double)0.0,(double)0.0);
else
printf ("%d , %lg , %lg\n",i,
sqrt(a_real[i]*a_real[i] + a_imag[i]*a_imag[i]),
atan2(a_imag[i],a_real[i]));
}
#endif
}
- --
------- End of forwarded message -------
- Raw text -