Date: Tue, 20 Feb 2001 22:15:42 +0200 (EET) From: Tuukka Toivonen To: Nick Kurshev cc: "pgcc AT delorie DOT com" Subject: Re: Re: Probably pgcc-2.95.2.1 does not optimized propertly? In-Reply-To: Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Reply-To: pgcc AT delorie DOT com Errors-To: nobody AT delorie DOT com X-Mailing-List: pgcc AT delorie DOT com X-Unsubscribes-To: listserv AT delorie DOT com Precedence: bulk On Tue, 20 Feb 2001, Tuukka Toivonen wrote: > /* 1 clock */ > int x1,x2,x3,x4,x5; > void benchtest(void) { > x1 = 0; /* Generates: movl $0,x1 */ > x2 = 0; /* movl $0,x2 */ > x3 = 0; /* movl $0,x3 */ > x4 = 0; /* movl $0,x4 */ > x5 = 0; /* movl $0,x5 */ > } > > /* 1 clock, equally fast to above one */ > int x1,x2,x3,x4,x5; > void benchtest(void) { > asm volatile( > "xorl %eax, %eax\n" > "movl %eax, x1\n" > "movl %eax, x2\n" > "movl %eax, x3\n" > "movl %eax, x4\n" > "movl %eax, x5\n" > ); > } As you can see, no difference between the two. But what if there are more variables? And indeed, AthlonGCC (as well as PentiumGCC with pentium or pentiumpro optimizer) generates "movl $0,var" instead of "xorl %eax,%eax; movl %eax,var", ie. it generates suboptimal code -- for many variables to zero. (5 clock overhead substracted:) /* 14 clocks */ int x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20; void benchtest(void) { x1 = 0; /* Generates: movl $0,x1 */ x2 = 0; /* movl $0,x2 */ x3 = 0; /* movl $0,x3 */ x4 = 0; /* movl $0,x4 */ x5 = 0; /* movl $0,x5 */ x6 = 0; /* etc... */ x7 = 0; x8 = 0; x9 = 0; x10= 0; x11= 0; x12= 0; x13= 0; x14= 0; x15= 0; x16= 0; x17= 0; x18= 0; x19= 0; x20= 0; } /* 12 clocks -- faster! */ int x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20; void benchtest(void) { asm volatile( "xorl %eax, %eax\n" "movl %eax, x1\n" "movl %eax, x2\n" "movl %eax, x3\n" "movl %eax, x4\n" "movl %eax, x5\n" "movl %eax, x6\n" "movl %eax, x7\n" "movl %eax, x8\n" "movl %eax, x9\n" "movl %eax, x10\n" "movl %eax, x11\n" "movl %eax, x12\n" "movl %eax, x13\n" "movl %eax, x14\n" "movl %eax, x15\n" "movl %eax, x16\n" "movl %eax, x17\n" "movl %eax, x18\n" "movl %eax, x19\n" "movl %eax, x20\n" ); }