Mail Archives: pgcc/1998/09/07/12:24:02
>I just found the option, which corrupted the code on PPro.
>Unfortunately, it is -ffast-math combined with any non-zero
>level of optimization. The code corruption is very rare,
I just found the subroutine. Preprocessed source and asembler outputs
appended. This is f2c translated code, so I "preprocessed" it
again by hand to make it more readable, but the error can be still
reproduced.
Compiled with -mpentiumpro -malign-double -malign-jumps=0 -malign-loops=0
-malign-functions=0 -traditional -O1 -ffast-math -fno-exceptions gives
bad code, without -ffast-math - works well with any level of optimization.
That's all.
Krzysztof
***** C code
struct {
double gran[6] ;
long int ipp1, ipt1, ipw;
} eopt_;
struct {
short int imem[1];
} mem_;
int parlim_(param, pkier, npar, hmin, hmax)
double *param, *pkier;
long int *npar;
double *hmin, *hmax;
{
long int i__1;
double d__1;
extern int blad_();
long int brak;
double h;
long int i, k;
double h1, h2;
--pkier;
--param;
brak = (1) ;
*hmin = 0.;
*hmax = 0.;
i__1 = *npar;
for (i = 1; i <= i__1; ++i) {
k = mem_ .imem[eopt_ .ipt1 + i - 2];
if (k < 1 || k > 3) {
blad_("PARLIM: TYP PARAMETRU", 21L);
}
h1 = eopt_ .gran[k - 1] - param[i];
h2 = eopt_ .gran[k + 2] - param[i];
if ((d__1 = pkier[i], (( d__1 ) >= 0 ? ( d__1 ) : -( d__1 )) ) < 1e-10 || h1 > -1e-10 || h2 < 1e-10)
{
pkier[i] = 0.;
goto L10;
}
h1 /= pkier[i];
h2 /= pkier[i];
if (h1 > h2) {
h = h1;
h1 = h2;
h2 = h;
}
if (brak) {
*hmin = h1;
*hmax = h2;
brak = (0) ;
} else {
*hmin = (( *hmin ) >= ( h1 ) ? ( *hmin ) : ( h1 )) ;
*hmax = (( *hmax ) <= ( h2 ) ? ( *hmax ) : ( h2 )) ;
}
L10:
;
}
return 0;
}
**** Asembler output with -ffast-math
.file "parlim-p.c"
.version "01.01"
gcc2_compiled.:
..data
..LC0:
.string "PARLIM: TYP PARAMETRU"
..text
..globl parlim_
.type parlim_,@function
parlim_:
pushl %ebp
movl %esp,%ebp
subl $24,%esp
pushl %edi
pushl %esi
pushl %ebx
movl 12(%ebp),%edi
addl $-8,%edi
movl 8(%ebp),%eax
addl $-8,%eax
movl %eax,-4(%ebp)
movl $1,-12(%ebp)
movl 20(%ebp),%eax
movl $0,(%eax)
movl $0,4(%eax)
movl 24(%ebp),%eax
movl $0,(%eax)
movl $0,4(%eax)
movl 16(%ebp),%eax
movl (%eax),%eax
movl %eax,-8(%ebp)
movl $1,%esi
cmpl %eax,%esi
jg .L3
..L5:
movl %esi,%eax
addl eopt_+52,%eax
movswl mem_-4(,%eax,2),%ebx
leal -1(%ebx),%eax
cmpl $2,%eax
jbe .L6
pushl $21
pushl $.LC0
call blad_
addl $8,%esp
..L6:
fldl eopt_-8(,%ebx,8)
movl -4(%ebp),%eax
fsubl (%eax,%esi,8)
fldl eopt_+16(,%ebx,8)
fsubl (%eax,%esi,8)
fldl (%edi,%esi,8)
fabs
fldl .LC1
fcom %st(1)
fnstsw %ax
fstp %st(1)
testl $16640,%eax
je .L16
fldl .LC2
fcomp %st(3)
fnstsw %ax
setb %al
movzbl %al,%edx
fcomp %st(1)
fnstsw %ax
seta %al
movb %al,-24(%ebp)
movzbl -24(%ebp),%eax
orl %eax,%edx
je .L7
jmp .L19
..L16:
fstp %st(0)
..L19:
fstp %st(0)
fstp %st(0)
movl $0,(%edi,%esi,8)
movl $0,4(%edi,%esi,8)
jmp .L4
..L7:
fxch %st(1)
fdivl (%edi,%esi,8)
fxch %st(1)
fdivl (%edi,%esi,8)
fcom %st(1)
fnstsw %ax
testl $256,%eax
je .L10
fxch %st(1)
..L10:
cmpl $0,-12(%ebp)
je .L11
fxch %st(1)
movl 20(%ebp),%eax
fstpl (%eax)
movl 24(%ebp),%eax
fstpl (%eax)
movl $0,-12(%ebp)
jmp .L4
..L11:
movl 20(%ebp),%eax
fldl (%eax)
fcom %st(2)
fnstsw %ax
testl $256,%eax
je .L17
fstp %st(0)
jmp .L13
..L17:
fstp %st(2)
..L13:
fxch %st(1)
movl 20(%ebp),%eax
fstpl (%eax)
movl 24(%ebp),%eax
fldl (%eax)
fcom %st(1)
fnstsw %ax
testl $16640,%eax
jne .L18
fstp %st(0)
jmp .L14
..L18:
fstp %st(1)
..L14:
movl 24(%ebp),%eax
fstpl (%eax)
..L4:
incl %esi
cmpl -8(%ebp),%esi
jle .L5
..L3:
xorl %eax,%eax
leal -36(%ebp),%esp
popl %ebx
popl %esi
popl %edi
movl %ebp,%esp
popl %ebp
ret
..Lfe1:
.size parlim_,.Lfe1-parlim_
..section .rodata
.align 8
..LC1:
.long 0xd9d7bdbb,0x3ddb7cdf
.align 8
..LC2:
.long 0xd9d7bdbb,0xbddb7cdf
..text
.comm eopt_,64,32
.comm mem_,2,2
.ident "GCC: (GNU) pgcc-2.91.57 19980901 (egcs-1.1 release)"
**** Output of diff -u parlim.s.fast-math parlim.s.no-fast-math
--- parlim.s.fm Mon Sep 7 11:30:15 1998
+++ parlim.s.nfm Mon Sep 7 11:30:07 1998
@@ -50,29 +50,37 @@
fldl eopt_+16(,%ebx,8)
fsubl (%eax,%esi,8)
fldl (%edi,%esi,8)
- fabs
+ fldz
+ fcomp %st(1)
+ fnstsw %ax
+ andb $69,%ah
+ decb %ah
+ cmpb $64,%ah
+ jb .L24
+ fchs
+.L24:
fldl .LC1
- fcom %st(1)
+ fcompp
fnstsw %ax
- fstp %st(1)
- testl $16640,%eax
- je .L16
+ andb $69,%ah
+ je .L21
fldl .LC2
- fcomp %st(3)
+ fcomp %st(2)
fnstsw %ax
- setb %al
+ andb $69,%ah
+ cmpb $1,%ah
+ sete %al
movzbl %al,%edx
+ fldl .LC1
fcomp %st(1)
fnstsw %ax
- seta %al
+ andb $69,%ah
+ sete %al
movb %al,-24(%ebp)
movzbl -24(%ebp),%eax
orl %eax,%edx
je .L7
- jmp .L19
-.L16:
- fstp %st(0)
-.L19:
+.L21:
fstp %st(0)
fstp %st(0)
movl $0,(%edi,%esi,8)
@@ -85,12 +93,13 @@
fdivl (%edi,%esi,8)
fcom %st(1)
fnstsw %ax
- testl $256,%eax
- je .L10
+ andb $69,%ah
+ cmpb $1,%ah
+ jne .L12
fxch %st(1)
-.L10:
+.L12:
cmpl $0,-12(%ebp)
- je .L11
+ je .L13
fxch %st(1)
movl 20(%ebp),%eax
fstpl (%eax)
@@ -98,18 +107,18 @@
fstpl (%eax)
movl $0,-12(%ebp)
jmp .L4
-.L11:
+.L13:
movl 20(%ebp),%eax
fldl (%eax)
fcom %st(2)
fnstsw %ax
- testl $256,%eax
- je .L17
- fstp %st(0)
- jmp .L13
-.L17:
+ andb $5,%ah
+ jne .L22
fstp %st(2)
-.L13:
+ jmp .L15
+.L22:
+ fstp %st(0)
+.L15:
fxch %st(1)
movl 20(%ebp),%eax
fstpl (%eax)
@@ -117,13 +126,15 @@
fldl (%eax)
fcom %st(1)
fnstsw %ax
- testl $16640,%eax
- jne .L18
- fstp %st(0)
- jmp .L14
-.L18:
+ andb $69,%ah
+ decb %ah
+ cmpb $64,%ah
+ jae .L23
fstp %st(1)
-.L14:
+ jmp .L17
+.L23:
+ fstp %st(0)
+.L17:
movl 24(%ebp),%eax
fstpl (%eax)
.L4:
- Raw text -