Mail Archives: djgpp/1992/05/28/20:38:01
I am still purplexed by the unexpectedly slow execution time of a two-
dimesional convolution program when compiled with Gnu C compared to
Microsoft C 5.1 compiled with the /Ox option and small (default) model.
A portion of the code has been extracted and converted into a benchmark
program with the hope that someone may be able to tell me how to decrease the
execution time. On a 386/33 machine with 8 Meg of memory the execution time
for Gnu C is about 54 seconds verses 38 seconds for Microsoft C. The Gnu C
program was compiled as
gcc -O2 -finline-functions -fomit-frame-pointer -funroll-loops benchcnv.c
as recommended by Eric Backus and did reduce the execution time by about 5
seconds. Thanks in advance for any help provided.
The benchmark code follows:
+++++++++++++++++++++++++++++++++++++ snip here ++++++++++++++++++++++++++++
/*
Convolution test program for Gnu C by J. W. V. Miller
Center for Applied Machine Vision and Intelligent Systems
The Univeristy of Michigan-Dearborn
*/
#include <stdio.h>
#include <stdlib.h>
#define buf_size 16384
#define MAXKERN 21
#define MAXCOEF 10
#define NB 512
#define NL 512
#define ITER 9
unsigned char *a[MAXKERN]; /* Temporary storage */
short *coeftab; /* coefficient array */
short *lx; /* Up to MAXCOEF unique coef luts */
unsigned char buffer[16384]; /* Dummy buffer */
void main(int argc, char *argv[])
{
short lineptr[MAXKERN];
short i, j, t;
short ix, nx, ny, nxh, nyh, np, *coefptr;
short ky, kx, nc, nxny, offset;
unsigned char *iptr, *optr, *aptr;
float rcoef, xdiv = (float) 1.0;
xdiv = 1.0;
offset = 128;
nx = ITER;
ny = ITER;
nxny = nx * ny;
nc = 2;
if ((coeftab = (short *) malloc(nxny * sizeof(short))) == NULL)
{
fprintf(stderr, "Fatal error: Failure to allocate memory for coeftab!\n");
exit(10);
}
for (i = 0; i < ny; i++)
if ((a[i] = (unsigned char *) malloc(NB * sizeof(short))) == NULL)
{
fprintf(stderr, "Fatal error: Failure to allocate memory for temporary image storage!\n");
exit(10);
}
if ((lx = (short *) malloc(256 * nc * sizeof(short))) == NULL)
{
fprintf(stderr, "Fatal error: Failure to allocate memory for lut!\n");
exit(10);
}
for (i = 0; i < nc; i++)
{
short ix = i * 256;
rcoef = 0.0;
rcoef = rcoef / xdiv;
for (j = 0; j < 256; j++)
{
if (rcoef < 0)
lx[ix + j] = (short) (((float) j * rcoef) - .5);
else
lx[ix + j] = (short) (((float) j * rcoef) + .5);
}
}
for (i = 0; i < nxny; i++)
{
coeftab[i] = 1;
coeftab[i] *= 256; /* Fix up offset for lx[] */
}
nxh = nx / 2;
nyh = ny / 2;
for (ky = -nyh; ky < nyh; ky++) /* Initialize input buffer */
{
lineptr[ky + nyh] = ky + nyh;
iptr = buffer;
for (j = 0; j < NB; j++)
{
a[lineptr[ky + nyh]][j] = iptr[j];
}
} /* initialize line buffer */
lineptr[ny - 1] = ny - 1;
for (ky = -nyh; ky < 0; ky++)
{
optr = buffer;
for (j = 0; j < NB; j++)
optr[j] = (unsigned char) offset;
} /* Fix up output buffer */
for (i = nyh; i < NL - nyh; i++)
{
short lastlineptr = lineptr[ny - 1];
iptr = buffer;
optr = buffer;
for (kx = 0; kx <= nx - 1; kx++)
{
a[lastlineptr][kx] = iptr[kx];
optr[kx] = (unsigned char) offset;
optr[NB - 1 - kx] = (unsigned char) offset;
}
for (j = nxh; j <= NB - nxh - 1; j++)
{
a[lastlineptr][nxh + j] = iptr[nxh + j];
t = offset;
coefptr = coeftab;
for (ky = 0; ky < ny;)
{
aptr = a[lineptr[ky++]];
for (kx = j -nxh; kx <= j + nxh;)
{
t += lx[*(coefptr++) + aptr[kx++]];
}
}
optr[j] = (unsigned char) t;
} /* j loop */
for (ky = -nyh; ky <= nyh; ky++)
lineptr[ky + nyh] = (lineptr[ky + nyh] + 1) % ny;
} /* i loop */
for (ky = 0; ky <= nyh - 1; ky++)
{
optr = buffer;
for (j = 0; j < NB; j++)
optr[j] = (unsigned char) offset;
}
exit(0);
}
- Raw text -