Mail Archives: pgcc/2001/02/20/15:16:24
On Tue, 20 Feb 2001, Tuukka Toivonen wrote:
> /* 1 clock */
> int x1,x2,x3,x4,x5;
> void benchtest(void) {
> x1 = 0; /* Generates: movl $0,x1 */
> x2 = 0; /* movl $0,x2 */
> x3 = 0; /* movl $0,x3 */
> x4 = 0; /* movl $0,x4 */
> x5 = 0; /* movl $0,x5 */
> }
>
> /* 1 clock, equally fast to above one */
> int x1,x2,x3,x4,x5;
> void benchtest(void) {
> asm volatile(
> "xorl %eax, %eax\n"
> "movl %eax, x1\n"
> "movl %eax, x2\n"
> "movl %eax, x3\n"
> "movl %eax, x4\n"
> "movl %eax, x5\n"
> );
> }
As you can see, no difference between the two. But what if there are more
variables?
And indeed, AthlonGCC (as well as PentiumGCC with pentium or pentiumpro
optimizer) generates "movl $0,var" instead of "xorl %eax,%eax; movl
%eax,var", ie. it generates suboptimal code -- for many variables to zero.
(5 clock overhead substracted:)
/* 14 clocks */
int
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20;
void benchtest(void) {
x1 = 0; /* Generates: movl $0,x1 */
x2 = 0; /* movl $0,x2 */
x3 = 0; /* movl $0,x3 */
x4 = 0; /* movl $0,x4 */
x5 = 0; /* movl $0,x5 */
x6 = 0; /* etc... */
x7 = 0;
x8 = 0;
x9 = 0;
x10= 0;
x11= 0;
x12= 0;
x13= 0;
x14= 0;
x15= 0;
x16= 0;
x17= 0;
x18= 0;
x19= 0;
x20= 0;
}
/* 12 clocks -- faster! */
int
x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,x16,x17,x18,x19,x20;
void benchtest(void) {
asm volatile(
"xorl %eax, %eax\n"
"movl %eax, x1\n"
"movl %eax, x2\n"
"movl %eax, x3\n"
"movl %eax, x4\n"
"movl %eax, x5\n"
"movl %eax, x6\n"
"movl %eax, x7\n"
"movl %eax, x8\n"
"movl %eax, x9\n"
"movl %eax, x10\n"
"movl %eax, x11\n"
"movl %eax, x12\n"
"movl %eax, x13\n"
"movl %eax, x14\n"
"movl %eax, x15\n"
"movl %eax, x16\n"
"movl %eax, x17\n"
"movl %eax, x18\n"
"movl %eax, x19\n"
"movl %eax, x20\n"
);
}
- Raw text -