Date: 28 May 1992 20:23:41 -0400 (EDT) From: JMILLER AT CHESS DOT EISC DOT UTOLEDO DOT edu Subject: How fast?? part 2 To: djgpp AT sun DOT soe DOT clarkson DOT edu Organization: Edison Industrial Systems Center, Machine Vision Group Status: O I am still purplexed by the unexpectedly slow execution time of a two- dimesional convolution program when compiled with Gnu C compared to Microsoft C 5.1 compiled with the /Ox option and small (default) model. A portion of the code has been extracted and converted into a benchmark program with the hope that someone may be able to tell me how to decrease the execution time. On a 386/33 machine with 8 Meg of memory the execution time for Gnu C is about 54 seconds verses 38 seconds for Microsoft C. The Gnu C program was compiled as gcc -O2 -finline-functions -fomit-frame-pointer -funroll-loops benchcnv.c as recommended by Eric Backus and did reduce the execution time by about 5 seconds. Thanks in advance for any help provided. The benchmark code follows: +++++++++++++++++++++++++++++++++++++ snip here ++++++++++++++++++++++++++++ /* Convolution test program for Gnu C by J. W. V. Miller Center for Applied Machine Vision and Intelligent Systems The Univeristy of Michigan-Dearborn */ #include #include #define buf_size 16384 #define MAXKERN 21 #define MAXCOEF 10 #define NB 512 #define NL 512 #define ITER 9 unsigned char *a[MAXKERN]; /* Temporary storage */ short *coeftab; /* coefficient array */ short *lx; /* Up to MAXCOEF unique coef luts */ unsigned char buffer[16384]; /* Dummy buffer */ void main(int argc, char *argv[]) { short lineptr[MAXKERN]; short i, j, t; short ix, nx, ny, nxh, nyh, np, *coefptr; short ky, kx, nc, nxny, offset; unsigned char *iptr, *optr, *aptr; float rcoef, xdiv = (float) 1.0; xdiv = 1.0; offset = 128; nx = ITER; ny = ITER; nxny = nx * ny; nc = 2; if ((coeftab = (short *) malloc(nxny * sizeof(short))) == NULL) { fprintf(stderr, "Fatal error: Failure to allocate memory for coeftab!\n"); exit(10); } for (i = 0; i < ny; i++) if ((a[i] = (unsigned char *) malloc(NB * sizeof(short))) == NULL) { fprintf(stderr, "Fatal error: Failure to allocate memory for temporary image storage!\n"); exit(10); } if ((lx = (short *) malloc(256 * nc * sizeof(short))) == NULL) { fprintf(stderr, "Fatal error: Failure to allocate memory for lut!\n"); exit(10); } for (i = 0; i < nc; i++) { short ix = i * 256; rcoef = 0.0; rcoef = rcoef / xdiv; for (j = 0; j < 256; j++) { if (rcoef < 0) lx[ix + j] = (short) (((float) j * rcoef) - .5); else lx[ix + j] = (short) (((float) j * rcoef) + .5); } } for (i = 0; i < nxny; i++) { coeftab[i] = 1; coeftab[i] *= 256; /* Fix up offset for lx[] */ } nxh = nx / 2; nyh = ny / 2; for (ky = -nyh; ky < nyh; ky++) /* Initialize input buffer */ { lineptr[ky + nyh] = ky + nyh; iptr = buffer; for (j = 0; j < NB; j++) { a[lineptr[ky + nyh]][j] = iptr[j]; } } /* initialize line buffer */ lineptr[ny - 1] = ny - 1; for (ky = -nyh; ky < 0; ky++) { optr = buffer; for (j = 0; j < NB; j++) optr[j] = (unsigned char) offset; } /* Fix up output buffer */ for (i = nyh; i < NL - nyh; i++) { short lastlineptr = lineptr[ny - 1]; iptr = buffer; optr = buffer; for (kx = 0; kx <= nx - 1; kx++) { a[lastlineptr][kx] = iptr[kx]; optr[kx] = (unsigned char) offset; optr[NB - 1 - kx] = (unsigned char) offset; } for (j = nxh; j <= NB - nxh - 1; j++) { a[lastlineptr][nxh + j] = iptr[nxh + j]; t = offset; coefptr = coeftab; for (ky = 0; ky < ny;) { aptr = a[lineptr[ky++]]; for (kx = j -nxh; kx <= j + nxh;) { t += lx[*(coefptr++) + aptr[kx++]]; } } optr[j] = (unsigned char) t; } /* j loop */ for (ky = -nyh; ky <= nyh; ky++) lineptr[ky + nyh] = (lineptr[ky + nyh] + 1) % ny; } /* i loop */ for (ky = 0; ky <= nyh - 1; ky++) { optr = buffer; for (j = 0; j < NB; j++) optr[j] = (unsigned char) offset; } exit(0); }