X-pop3-spooler: POP3MAIL 2.1.0 b 4 980420 -bs- Message-Id: <199806091550.KAA05661@mail.mankato.msus.edu> Date: Tue, 9 Jun 1998 10:50:17 -0500 (CDT) From: Jeffrey Hundstad Subject: gcc-2.8.1, pgcc-2.90.29 980515, and cmovb To: beastium-list AT Desk DOT nl MIME-Version: 1.0 Content-Type: TEXT/plain; CHARSET=US-ASCII Sender: Marc Lehmann Status: RO X-Status: A Content-Length: 3607 Lines: 107 I'm just currious... I've taken a look at the tetrary operator with trivial selector and trivial conditional operations, and dumped the assembly code from both 2.8.1 and pgcc-980515. pgcc does: cmpl %eax,%edx jbe .L2 movl %edx,%eax ..L2: while 2.8.1 does: cmpl %edx,%eax cmovb %edx,%eax It seems like 2.8.1 would cause less pain an anguish to the cpu... aren't jumps bad... and avoided at all cost? I wrapped a clock couter around the tetrary op. and it appears to cost the same either way... I used: CFLAGS=-O20 --fast-math -fomit-frame-pointer -mcpu=i686 -march=i686 A pentium II CPU EDO RAM The source code and a parallel difference output of the assembly code follows: #include static unsigned int t1, t2; int main (void) { unsigned int a, b, c; scanf ("%d %d", &a, &b); c = (a > b) ? a : b; printf ("\n%d\n", c); return 0; } .file "s.c" .file "s.c" .version "01.01" .version "01.01" > / GNU C version pgcc-2.90.29 980515 (egcs-1.0.3 release) (i68 > / options passed: -mno-ieee-fp -mcpu=i686 -march=i686 -O20 - > / -fomit-frame-pointer > / options enabled: -fdefer-pop -fomit-frame-pointer -fcse-fo > / -fcse-skip-blocks -fexpensive-optimizations -fthread-jumps > / -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse > / -finline-functions -finline -fkeep-static-consts -fcaller-s > / -fpcc-struct-return -frerun-cse-after-loop -frerun-loop-opt > / -fschedule-insns2 -fsched-interblock -fsched-spec -ffast-ma > / -fverbose-asm -fgnu-linker -fregmove -falias-check -fargume > / -fcompare-elim -fsign-extension-elim -fjump-back -fopt-reg- > / -fall-mem-givs -freduce-index-givs -fpeep-spills -fsoftware > / -fcopy-prop -flift-stores -fruntime-lift-stores -fdo-offloa > / -fcorrect-cse-mistakes -fopt-jumps-out -freplace-mem -frepl > / -finterleave-stack-non-stack -fschedule-stack-reg-insns > / -freg-reg-copy-opt -fpush-load-into-loop -fswap-for-agi -fr > / -frisc-const -freplace-reload-regs -frisc-mem-dest -m80387 > / -mno-soft-float -mfp-ret-in-387 -mschedule-prologue -mstack > / -mcpu=i686 -march=i686 > gcc2_compiled.: gcc2_compiled.: ..section .rodata .section .rodata ..LC0: .LC0: .string "%d %d" .string "%d %d" ..LC1: .LC1: .string "\n%d\n" .string "\n%d\n" .local t1 .local t1 .comm t1,4,4 .comm t1,4,4 .local t2 .local t2 .comm t2,4,4 .comm t2,4,4 ..text .text .align 4 | .align 16 ..globl main .globl main .type main,@function .type main,@function main: main: subl $8,%esp subl $8,%esp leal 4(%esp),%eax < pushl %eax pushl %eax leal 4(%esp),%eax leal 4(%esp),%eax pushl %eax pushl %eax > leal 12(%esp),%eax > pushl %eax pushl $.LC0 pushl $.LC0 call scanf call scanf movl 16(%esp),%eax | addl $16,%esp movl 12(%esp),%edx | movl 4(%esp),%edx addl $12,%esp | movl (%esp),%eax cmpl %edx,%eax | cmpl %eax,%edx cmovb %edx,%eax | jbe .L2 > movl %edx,%eax > .L2: pushl %eax pushl %eax pushl $.LC1 pushl $.LC1 call printf call printf xorl %eax,%eax xorl %eax,%eax addl $8,%esp addl $8,%esp addl $8,%esp addl $8,%esp ret ret ..Lfe1: .Lfe1: .size main,.Lfe1-main .size main,.Lfe1-main .ident "GCC: (GNU) 2.8. | .ident "GCC: (GNU) pgcc-2.90.29 980515 (egcs-1.0.3 r