delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp/1992/05/28/20:38:01

Date: 28 May 1992 20:23:41 -0400 (EDT)
From: JMILLER AT CHESS DOT EISC DOT UTOLEDO DOT edu
Subject: How fast?? part 2
To: djgpp AT sun DOT soe DOT clarkson DOT edu
Organization: Edison Industrial Systems Center, Machine Vision Group
Status: O

I am still purplexed by the unexpectedly slow execution time of a two-
dimesional convolution program when compiled with Gnu C compared to
Microsoft C 5.1 compiled with the /Ox option and small (default) model.
A portion of the code has been extracted and converted into a benchmark
program with the hope that someone may be able to tell me how to decrease the
execution time. On a 386/33 machine with 8 Meg of memory the execution time
for Gnu C is about 54 seconds verses 38 seconds for Microsoft C. The Gnu C
program was compiled as

gcc -O2 -finline-functions -fomit-frame-pointer -funroll-loops benchcnv.c

as recommended by Eric Backus and did reduce the execution time by about 5
seconds. Thanks in advance for any help provided.

The benchmark code follows:

+++++++++++++++++++++++++++++++++++++ snip here ++++++++++++++++++++++++++++
/*
Convolution test program for Gnu C by J. W. V. Miller
Center for Applied Machine Vision and Intelligent Systems
The Univeristy of Michigan-Dearborn
*/

#include <stdio.h>
#include <stdlib.h>

#define buf_size     16384       
#define MAXKERN      21
#define MAXCOEF      10
#define NB           512
#define NL           512
#define ITER         9

unsigned char   *a[MAXKERN]; /* Temporary storage */
short           *coeftab; /* coefficient array */
short           *lx;  /* Up to MAXCOEF unique coef luts */
unsigned char   buffer[16384]; /* Dummy buffer */

void main(int argc, char   *argv[])
{ 
	short           lineptr[MAXKERN];
	short           i, j, t;
	short           ix, nx, ny, nxh, nyh, np, *coefptr;
	short           ky, kx, nc, nxny, offset;
	unsigned char   *iptr, *optr, *aptr;
	float           rcoef, xdiv = (float) 1.0;
	xdiv = 1.0;
	offset = 128;
	nx = ITER;
	ny = ITER;
	nxny = nx * ny; 
	nc = 2;
	if ((coeftab = (short *) malloc(nxny * sizeof(short))) == NULL)
	{
		fprintf(stderr, "Fatal error: Failure to allocate memory for coeftab!\n");
		exit(10);
	}
	for (i = 0; i < ny; i++)
		if ((a[i] = (unsigned char *) malloc(NB * sizeof(short))) == NULL)
		{
			fprintf(stderr, "Fatal error: Failure to allocate memory for temporary image storage!\n");
			exit(10);
		}
	if ((lx = (short *) malloc(256 * nc * sizeof(short))) == NULL)
	{
		fprintf(stderr, "Fatal error: Failure to allocate memory for lut!\n");
		exit(10);
	}
	for (i = 0; i < nc; i++)
	{ 
		short ix = i * 256;
		rcoef = 0.0;
		rcoef = rcoef / xdiv; 
		for (j = 0; j < 256; j++)
		{ 
			if (rcoef < 0)
				lx[ix + j] = (short) (((float) j * rcoef) - .5);
			else
				lx[ix + j] = (short) (((float) j * rcoef) + .5);
		} 
	}
	for (i = 0; i < nxny; i++)
	{ 
		coeftab[i] = 1;
		coeftab[i] *= 256; /* Fix up offset for lx[] */
	} 
	nxh = nx / 2; 
	nyh = ny / 2; 
	for (ky = -nyh; ky < nyh; ky++) /* Initialize input buffer */
	{ 
		lineptr[ky + nyh] = ky + nyh;
		iptr = buffer;
		for (j = 0; j < NB; j++)
		{ 
			a[lineptr[ky + nyh]][j] = iptr[j];
		} 
	}   /* initialize line buffer */
	lineptr[ny - 1] = ny - 1;
	for (ky = -nyh; ky < 0; ky++)
	{ 
		optr = buffer;
		for (j = 0; j < NB; j++)
			optr[j] = (unsigned char) offset;
	}   /* Fix up output buffer */
	for (i = nyh; i < NL - nyh; i++)
	{
		short lastlineptr = lineptr[ny - 1];
		iptr = buffer;
		optr = buffer;
		for (kx = 0; kx <= nx - 1; kx++) 
		{ 
			a[lastlineptr][kx] = iptr[kx];
			optr[kx] = (unsigned char) offset;
			optr[NB - 1 - kx] = (unsigned char) offset;
		} 
		for (j = nxh; j <= NB - nxh - 1; j++)
		{ 
			a[lastlineptr][nxh + j] = iptr[nxh + j];
			t = offset;
			coefptr = coeftab;
			for (ky = 0; ky < ny;)
			{
				aptr = a[lineptr[ky++]];
				for (kx = j -nxh; kx <= j + nxh;)
				{
					t += lx[*(coefptr++) + aptr[kx++]];
				}
			}
			optr[j] = (unsigned char) t;
		}   /* j loop */
		for (ky = -nyh; ky <= nyh; ky++) 
			lineptr[ky + nyh] = (lineptr[ky + nyh] + 1) % ny;
	}   /* i loop */

	for (ky = 0; ky <= nyh - 1; ky++) 
	{ 
		optr = buffer;
		for (j = 0; j < NB; j++)
			optr[j] = (unsigned char) offset;
	} 
	exit(0);
} 


- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019