X-pop3-spooler: POP3MAIL 2.1.0 b 4 980420 -bs-
Message-Id: <199806091550.KAA05661@mail.mankato.msus.edu>
Date: Tue, 9 Jun 1998 10:50:17 -0500 (CDT)
From: Jeffrey Hundstad <jeffrey DOT hundstad AT mankato DOT msus DOT edu>
Subject: gcc-2.8.1, pgcc-2.90.29 980515, and cmovb
To: beastium-list AT Desk DOT nl
MIME-Version: 1.0
Content-Type: TEXT/plain; CHARSET=US-ASCII
Sender: Marc Lehmann <pcg AT goof DOT com>
Status: RO
X-Status: A
Content-Length: 3607
Lines: 107

I'm just currious... I've taken a look at the tetrary operator with
trivial selector and trivial conditional operations, and dumped the
assembly code from both 2.8.1 and pgcc-980515.

pgcc does:
        cmpl %eax,%edx
        jbe .L2
        movl %edx,%eax
..L2:

while 2.8.1 does:
        cmpl %edx,%eax
        cmovb %edx,%eax
 
It seems like 2.8.1 would cause less pain an anguish to the cpu...
aren't jumps bad... and avoided at all cost?

I wrapped a clock couter around the tetrary op. and it appears to cost
the same either way...

I used:

CFLAGS=-O20 --fast-math -fomit-frame-pointer -mcpu=i686 -march=i686
A pentium II CPU
EDO RAM

The source code and a parallel difference output of the assembly code
follows:

#include <stdio.h> 
static unsigned int t1, t2;

int
main (void)
{
  unsigned int a, b, c;

  scanf ("%d %d", &a, &b);
  c = (a > b) ? a : b;
  printf ("\n%d\n", c);
  return 0;
}

	.file	"s.c"		     	.file	"s.c"
	.version	"01.01"	     	.version	"01.01"
				   >	/ GNU C version pgcc-2.90.29 980515 (egcs-1.0.3 release) (i68
				   >	/ options passed:  -mno-ieee-fp -mcpu=i686 -march=i686 -O20 -
				   >	/ -fomit-frame-pointer
				   >	/ options enabled:  -fdefer-pop -fomit-frame-pointer -fcse-fo
				   >	/ -fcse-skip-blocks -fexpensive-optimizations -fthread-jumps
				   >	/ -fstrength-reduce -fpeephole -fforce-mem -ffunction-cse
				   >	/ -finline-functions -finline -fkeep-static-consts -fcaller-s
				   >	/ -fpcc-struct-return -frerun-cse-after-loop -frerun-loop-opt
				   >	/ -fschedule-insns2 -fsched-interblock -fsched-spec -ffast-ma
				   >	/ -fverbose-asm -fgnu-linker -fregmove -falias-check -fargume
				   >	/ -fcompare-elim -fsign-extension-elim -fjump-back -fopt-reg-
				   >	/ -fall-mem-givs -freduce-index-givs -fpeep-spills -fsoftware
				   >	/ -fcopy-prop -flift-stores -fruntime-lift-stores -fdo-offloa
				   >	/ -fcorrect-cse-mistakes -fopt-jumps-out -freplace-mem -frepl
				   >	/ -finterleave-stack-non-stack -fschedule-stack-reg-insns
				   >	/ -freg-reg-copy-opt -fpush-load-into-loop -fswap-for-agi -fr
				   >	/ -frisc-const -freplace-reload-regs -frisc-mem-dest -m80387
				   >	/ -mno-soft-float -mfp-ret-in-387 -mschedule-prologue -mstack
				   >	/ -mcpu=i686 -march=i686
				   >
gcc2_compiled.:			     gcc2_compiled.:
..section	.rodata		     .section	.rodata
..LC0:				     .LC0:
	.string	"%d %d"		     	.string	"%d %d"
..LC1:				     .LC1:
	.string	"\n%d\n"	     	.string	"\n%d\n"
	.local	t1		     	.local	t1
	.comm	t1,4,4		     	.comm	t1,4,4
	.local	t2		     	.local	t2
	.comm	t2,4,4		     	.comm	t2,4,4
..text				     .text
	.align 4		   |		.align 16
..globl main			     .globl main
	.type	 main,@function	     	.type	 main,@function
main:				     main:
	subl $8,%esp		     	subl $8,%esp
	leal 4(%esp),%eax	   <
	pushl %eax		     	pushl %eax
	leal 4(%esp),%eax	     	leal 4(%esp),%eax
	pushl %eax		     	pushl %eax
				   >		leal 12(%esp),%eax
				   >		pushl %eax
	pushl $.LC0		     	pushl $.LC0
	call scanf		     	call scanf
	movl 16(%esp),%eax	   |		addl $16,%esp
	movl 12(%esp),%edx	   |		movl 4(%esp),%edx
	addl $12,%esp		   |		movl (%esp),%eax
	cmpl %edx,%eax		   |		cmpl %eax,%edx
	cmovb %edx,%eax		   |		jbe .L2
				   >		movl %edx,%eax
				   >	.L2:
	pushl %eax		     	pushl %eax
	pushl $.LC1		     	pushl $.LC1
	call printf		     	call printf
	xorl %eax,%eax		     	xorl %eax,%eax
	addl $8,%esp		     	addl $8,%esp
	addl $8,%esp		     	addl $8,%esp
	ret			     	ret
..Lfe1:				     .Lfe1:
	.size	 main,.Lfe1-main     	.size	 main,.Lfe1-main
	.ident	"GCC: (GNU) 2.8.   |		.ident	"GCC: (GNU) pgcc-2.90.29 980515 (egcs-1.0.3 r