delorie.com/archives/browse.cgi   search  
Mail Archives: pgcc/2001/02/20/15:16:24

Date: Tue, 20 Feb 2001 22:15:42 +0200 (EET)
From: Tuukka Toivonen <tuukkat AT s-inf-pc24 DOT oulu DOT fi>
To: Nick Kurshev <nickols_k AT mail DOT ru>
cc: "pgcc AT delorie DOT com" <pgcc AT delorie DOT com>
Subject: Re: Re: Probably pgcc-2.95.2.1 does not optimized propertly?
In-Reply-To: <Pine.LNX.4.21.0102202141440.3407-100000@s-inf-pc24.oulu.fi>
Message-ID: <Pine.LNX.4.21.0102202205140.3407-100000@s-inf-pc24.oulu.fi>
MIME-Version: 1.0
Reply-To: pgcc AT delorie DOT com
Errors-To: nobody AT delorie DOT com
X-Mailing-List: pgcc AT delorie DOT com
X-Unsubscribes-To: listserv AT delorie DOT com

On Tue, 20 Feb 2001, Tuukka Toivonen wrote:

> /* 1 clock */
> int x1,x2,x3,x4,x5;
> void benchtest(void) {
> 	x1 = 0;		/* Generates:	movl $0,x1 */
> 	x2 = 0; 	/*		movl $0,x2 */
> 	x3 = 0; 	/*		movl $0,x3 */
> 	x4 = 0; 	/*		movl $0,x4 */
> 	x5 = 0; 	/*		movl $0,x5 */
> }
> 
> /* 1 clock, equally fast to above one */
> int x1,x2,x3,x4,x5;
> void benchtest(void) {
> 	asm volatile(
> 	"xorl	%eax, %eax\n"
> 	"movl	%eax, x1\n"
> 	"movl	%eax, x2\n"
> 	"movl	%eax, x3\n"
> 	"movl	%eax, x4\n"
> 	"movl	%eax, x5\n"
> 	);
> }

As you can see, no difference between the two. But what if there are more
variables?

And indeed, AthlonGCC (as well as PentiumGCC with pentium or pentiumpro
optimizer) generates "movl $0,var" instead of "xorl %eax,%eax; movl
%eax,var", ie. it generates suboptimal code -- for many variables to zero.

(5 clock overhead substracted:)

/* 14 clocks */
int
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20;
void benchtest(void) {
	x1 = 0;		/* Generates:	movl $0,x1 */
	x2 = 0; 	/*		movl $0,x2 */
	x3 = 0; 	/*		movl $0,x3 */
	x4 = 0; 	/*		movl $0,x4 */
	x5 = 0; 	/*		movl $0,x5 */
	x6 = 0; 	/* etc... */
	x7 = 0;
	x8 = 0;
	x9 = 0;
	x10= 0;
	x11= 0;
	x12= 0;
	x13= 0;
	x14= 0;
	x15= 0;
	x16= 0;
	x17= 0;
	x18= 0;
	x19= 0;
	x20= 0;
}

/* 12 clocks -- faster! */
int
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20;
void benchtest(void) {
	asm volatile(
	"xorl	%eax, %eax\n"
	"movl	%eax, x1\n"
	"movl	%eax, x2\n"
	"movl	%eax, x3\n"
	"movl	%eax, x4\n"
	"movl	%eax, x5\n"
	"movl	%eax, x6\n"
	"movl	%eax, x7\n"
	"movl	%eax, x8\n"
	"movl	%eax, x9\n"
	"movl	%eax, x10\n"
	"movl	%eax, x11\n"
	"movl	%eax, x12\n"
	"movl	%eax, x13\n"
	"movl	%eax, x14\n"
	"movl	%eax, x15\n"
	"movl	%eax, x16\n"
	"movl	%eax, x17\n"
	"movl	%eax, x18\n"
	"movl	%eax, x19\n"
	"movl	%eax, x20\n"
	);
}

- Raw text -


  webmaster     delorie software   privacy  
  Copyright � 2019   by DJ Delorie     Updated Jul 2019