delorie.com/archives/browse.cgi   search  
Mail Archives: pgcc/1998/09/07/12:24:02

X-pop3-spooler: POP3MAIL 2.1.0 b 4 980420 -bs-
Message-Id: <m0zG02K-0000FVC@chkw386.ch.pwr.wroc.pl>
Date: Mon, 7 Sep 98 12:04
From: strasbur AT chkw386 DOT ch DOT pwr DOT wroc DOT pl (Krzysztof Strasburger)
To: beastium-list AT Desk DOT nl, strasbur AT chkw386 DOT ch DOT pwr DOT wroc DOT pl
Subject: Re: Bug on P6 identified... not good...
Cc: pcg AT goof DOT com
Sender: Marc Lehmann <pcg AT goof DOT com>
Status: RO
X-Status: A
Lines: 357

>I just found the option, which corrupted the code on PPro.
>Unfortunately, it is -ffast-math combined with any non-zero 
>level of optimization. The code corruption is very rare,
I just found the subroutine. Preprocessed source and asembler outputs
appended. This is f2c translated code, so I "preprocessed" it
again by hand to make it more readable, but the error can be still
reproduced.
Compiled with -mpentiumpro -malign-double -malign-jumps=0 -malign-loops=0
-malign-functions=0 -traditional -O1 -ffast-math -fno-exceptions gives
bad code, without -ffast-math - works well with any level of optimization.
That's all.
Krzysztof


***** C code
struct {
    double gran[6]	 ;
    long int ipp1, ipt1, ipw;
} eopt_;

struct {
    short int imem[1];
} mem_;
 
int parlim_(param, pkier, npar, hmin, hmax)
double *param, *pkier;
long int *npar;
double *hmin, *hmax;
{
     
    long int i__1;
    double d__1;
     
    extern   int blad_();
    long int brak;
    double h;
    long int i, k;
    double h1, h2;
     
    --pkier;
    --param;
     
    brak = (1) ;
    *hmin = 0.;
    *hmax = 0.;
    i__1 = *npar;
    for (i = 1; i <= i__1; ++i) {
	k = mem_ .imem[eopt_ .ipt1 + i - 2];
	if (k < 1 || k > 3) {
	    blad_("PARLIM: TYP PARAMETRU", 21L);
	}
	h1 = eopt_ .gran[k - 1] - param[i];
	h2 = eopt_ .gran[k + 2] - param[i];
 
	if ((d__1 = pkier[i], (( d__1 ) >= 0 ? ( d__1 ) : -( d__1 )) ) < 1e-10 || h1 > -1e-10 || h2 < 1e-10)
		 {
	    pkier[i] = 0.;
	    goto L10;
	}
	h1 /= pkier[i];
	h2 /= pkier[i];
	if (h1 > h2) {
	    h = h1;
	    h1 = h2;
	    h2 = h;
	}
	if (brak) {
	    *hmin = h1;
	    *hmax = h2;
	    brak = (0) ;
	} else {
	    *hmin = (( *hmin ) >= ( h1 ) ? ( *hmin ) : ( h1 )) ;
	    *hmax = (( *hmax ) <= ( h2 ) ? ( *hmax ) : ( h2 )) ;
	}
L10:
	;
    }
    return 0;
}

**** Asembler output with -ffast-math

	.file	"parlim-p.c"
	.version	"01.01"
gcc2_compiled.:
..data
..LC0:
	.string	"PARLIM: TYP PARAMETRU"
..text
..globl parlim_
	.type	 parlim_,@function
parlim_:
	pushl %ebp
	movl %esp,%ebp
	subl $24,%esp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 12(%ebp),%edi
	addl $-8,%edi
	movl 8(%ebp),%eax
	addl $-8,%eax
	movl %eax,-4(%ebp)
	movl $1,-12(%ebp)
	movl 20(%ebp),%eax
	movl $0,(%eax)
	movl $0,4(%eax)
	movl 24(%ebp),%eax
	movl $0,(%eax)
	movl $0,4(%eax)
	movl 16(%ebp),%eax
	movl (%eax),%eax
	movl %eax,-8(%ebp)
	movl $1,%esi
	cmpl %eax,%esi
	jg .L3
..L5:
	movl %esi,%eax
	addl eopt_+52,%eax
	movswl mem_-4(,%eax,2),%ebx
	leal -1(%ebx),%eax
	cmpl $2,%eax
	jbe .L6
	pushl $21
	pushl $.LC0
	call blad_
	addl $8,%esp
..L6:
	fldl eopt_-8(,%ebx,8)
	movl -4(%ebp),%eax
	fsubl (%eax,%esi,8)
	fldl eopt_+16(,%ebx,8)
	fsubl (%eax,%esi,8)
	fldl (%edi,%esi,8)
	fabs
	fldl .LC1
	fcom %st(1)
	fnstsw %ax
	fstp %st(1)
	testl $16640,%eax
	je .L16
	fldl .LC2
	fcomp %st(3)
	fnstsw %ax
	setb %al
	movzbl %al,%edx
	fcomp %st(1)
	fnstsw %ax
	seta %al
	movb %al,-24(%ebp)
	movzbl -24(%ebp),%eax
	orl %eax,%edx
	je .L7
	jmp .L19
..L16:
	fstp %st(0)
..L19:
	fstp %st(0)
	fstp %st(0)
	movl $0,(%edi,%esi,8)
	movl $0,4(%edi,%esi,8)
	jmp .L4
..L7:
	fxch %st(1)
	fdivl (%edi,%esi,8)
	fxch %st(1)
	fdivl (%edi,%esi,8)
	fcom %st(1)
	fnstsw %ax
	testl $256,%eax
	je .L10
	fxch %st(1)
..L10:
	cmpl $0,-12(%ebp)
	je .L11
	fxch %st(1)
	movl 20(%ebp),%eax
	fstpl (%eax)
	movl 24(%ebp),%eax
	fstpl (%eax)
	movl $0,-12(%ebp)
	jmp .L4
..L11:
	movl 20(%ebp),%eax
	fldl (%eax)
	fcom %st(2)
	fnstsw %ax
	testl $256,%eax
	je .L17
	fstp %st(0)
	jmp .L13
..L17:
	fstp %st(2)
..L13:
	fxch %st(1)
	movl 20(%ebp),%eax
	fstpl (%eax)
	movl 24(%ebp),%eax
	fldl (%eax)
	fcom %st(1)
	fnstsw %ax
	testl $16640,%eax
	jne .L18
	fstp %st(0)
	jmp .L14
..L18:
	fstp %st(1)
..L14:
	movl 24(%ebp),%eax
	fstpl (%eax)
..L4:
	incl %esi
	cmpl -8(%ebp),%esi
	jle .L5
..L3:
	xorl %eax,%eax
	leal -36(%ebp),%esp
	popl %ebx
	popl %esi
	popl %edi
	movl %ebp,%esp
	popl %ebp
	ret
..Lfe1:
	.size	 parlim_,.Lfe1-parlim_
..section	.rodata
	.align 8
..LC1:
	.long 0xd9d7bdbb,0x3ddb7cdf
	.align 8
..LC2:
	.long 0xd9d7bdbb,0xbddb7cdf
..text
	.comm	eopt_,64,32
	.comm	mem_,2,2
	.ident	"GCC: (GNU) pgcc-2.91.57 19980901 (egcs-1.1 release)"

**** Output of diff -u parlim.s.fast-math parlim.s.no-fast-math

--- parlim.s.fm	Mon Sep  7 11:30:15 1998
+++ parlim.s.nfm	Mon Sep  7 11:30:07 1998
@@ -50,29 +50,37 @@
 	fldl eopt_+16(,%ebx,8)
 	fsubl (%eax,%esi,8)
 	fldl (%edi,%esi,8)
-	fabs
+	fldz
+	fcomp %st(1)
+	fnstsw %ax
+	andb $69,%ah
+	decb %ah
+	cmpb $64,%ah
+	jb .L24
+	fchs
+.L24:
 	fldl .LC1
-	fcom %st(1)
+	fcompp
 	fnstsw %ax
-	fstp %st(1)
-	testl $16640,%eax
-	je .L16
+	andb $69,%ah
+	je .L21
 	fldl .LC2
-	fcomp %st(3)
+	fcomp %st(2)
 	fnstsw %ax
-	setb %al
+	andb $69,%ah
+	cmpb $1,%ah
+	sete %al
 	movzbl %al,%edx
+	fldl .LC1
 	fcomp %st(1)
 	fnstsw %ax
-	seta %al
+	andb $69,%ah
+	sete %al
 	movb %al,-24(%ebp)
 	movzbl -24(%ebp),%eax
 	orl %eax,%edx
 	je .L7
-	jmp .L19
-.L16:
-	fstp %st(0)
-.L19:
+.L21:
 	fstp %st(0)
 	fstp %st(0)
 	movl $0,(%edi,%esi,8)
@@ -85,12 +93,13 @@
 	fdivl (%edi,%esi,8)
 	fcom %st(1)
 	fnstsw %ax
-	testl $256,%eax
-	je .L10
+	andb $69,%ah
+	cmpb $1,%ah
+	jne .L12
 	fxch %st(1)
-.L10:
+.L12:
 	cmpl $0,-12(%ebp)
-	je .L11
+	je .L13
 	fxch %st(1)
 	movl 20(%ebp),%eax
 	fstpl (%eax)
@@ -98,18 +107,18 @@
 	fstpl (%eax)
 	movl $0,-12(%ebp)
 	jmp .L4
-.L11:
+.L13:
 	movl 20(%ebp),%eax
 	fldl (%eax)
 	fcom %st(2)
 	fnstsw %ax
-	testl $256,%eax
-	je .L17
-	fstp %st(0)
-	jmp .L13
-.L17:
+	andb $5,%ah
+	jne .L22
 	fstp %st(2)
-.L13:
+	jmp .L15
+.L22:
+	fstp %st(0)
+.L15:
 	fxch %st(1)
 	movl 20(%ebp),%eax
 	fstpl (%eax)
@@ -117,13 +126,15 @@
 	fldl (%eax)
 	fcom %st(1)
 	fnstsw %ax
-	testl $16640,%eax
-	jne .L18
-	fstp %st(0)
-	jmp .L14
-.L18:
+	andb $69,%ah
+	decb %ah
+	cmpb $64,%ah
+	jae .L23
 	fstp %st(1)
-.L14:
+	jmp .L17
+.L23:
+	fstp %st(0)
+.L17:
 	movl 24(%ebp),%eax
 	fstpl (%eax)
 .L4:

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019