X-pop3-spooler: POP3MAIL 2.1.0 b 4 980420 -bs- Message-Id: Date: Mon, 7 Sep 98 12:04 From: strasbur AT chkw386 DOT ch DOT pwr DOT wroc DOT pl (Krzysztof Strasburger) To: beastium-list AT Desk DOT nl, strasbur AT chkw386 DOT ch DOT pwr DOT wroc DOT pl Subject: Re: Bug on P6 identified... not good... Cc: pcg AT goof DOT com Sender: Marc Lehmann Status: RO X-Status: A Content-Length: 5799 Lines: 357 >I just found the option, which corrupted the code on PPro. >Unfortunately, it is -ffast-math combined with any non-zero >level of optimization. The code corruption is very rare, I just found the subroutine. Preprocessed source and asembler outputs appended. This is f2c translated code, so I "preprocessed" it again by hand to make it more readable, but the error can be still reproduced. Compiled with -mpentiumpro -malign-double -malign-jumps=0 -malign-loops=0 -malign-functions=0 -traditional -O1 -ffast-math -fno-exceptions gives bad code, without -ffast-math - works well with any level of optimization. That's all. Krzysztof ***** C code struct { double gran[6] ; long int ipp1, ipt1, ipw; } eopt_; struct { short int imem[1]; } mem_; int parlim_(param, pkier, npar, hmin, hmax) double *param, *pkier; long int *npar; double *hmin, *hmax; { long int i__1; double d__1; extern int blad_(); long int brak; double h; long int i, k; double h1, h2; --pkier; --param; brak = (1) ; *hmin = 0.; *hmax = 0.; i__1 = *npar; for (i = 1; i <= i__1; ++i) { k = mem_ .imem[eopt_ .ipt1 + i - 2]; if (k < 1 || k > 3) { blad_("PARLIM: TYP PARAMETRU", 21L); } h1 = eopt_ .gran[k - 1] - param[i]; h2 = eopt_ .gran[k + 2] - param[i]; if ((d__1 = pkier[i], (( d__1 ) >= 0 ? ( d__1 ) : -( d__1 )) ) < 1e-10 || h1 > -1e-10 || h2 < 1e-10) { pkier[i] = 0.; goto L10; } h1 /= pkier[i]; h2 /= pkier[i]; if (h1 > h2) { h = h1; h1 = h2; h2 = h; } if (brak) { *hmin = h1; *hmax = h2; brak = (0) ; } else { *hmin = (( *hmin ) >= ( h1 ) ? ( *hmin ) : ( h1 )) ; *hmax = (( *hmax ) <= ( h2 ) ? ( *hmax ) : ( h2 )) ; } L10: ; } return 0; } **** Asembler output with -ffast-math .file "parlim-p.c" .version "01.01" gcc2_compiled.: ..data ..LC0: .string "PARLIM: TYP PARAMETRU" ..text ..globl parlim_ .type parlim_,@function parlim_: pushl %ebp movl %esp,%ebp subl $24,%esp pushl %edi pushl %esi pushl %ebx movl 12(%ebp),%edi addl $-8,%edi movl 8(%ebp),%eax addl $-8,%eax movl %eax,-4(%ebp) movl $1,-12(%ebp) movl 20(%ebp),%eax movl $0,(%eax) movl $0,4(%eax) movl 24(%ebp),%eax movl $0,(%eax) movl $0,4(%eax) movl 16(%ebp),%eax movl (%eax),%eax movl %eax,-8(%ebp) movl $1,%esi cmpl %eax,%esi jg .L3 ..L5: movl %esi,%eax addl eopt_+52,%eax movswl mem_-4(,%eax,2),%ebx leal -1(%ebx),%eax cmpl $2,%eax jbe .L6 pushl $21 pushl $.LC0 call blad_ addl $8,%esp ..L6: fldl eopt_-8(,%ebx,8) movl -4(%ebp),%eax fsubl (%eax,%esi,8) fldl eopt_+16(,%ebx,8) fsubl (%eax,%esi,8) fldl (%edi,%esi,8) fabs fldl .LC1 fcom %st(1) fnstsw %ax fstp %st(1) testl $16640,%eax je .L16 fldl .LC2 fcomp %st(3) fnstsw %ax setb %al movzbl %al,%edx fcomp %st(1) fnstsw %ax seta %al movb %al,-24(%ebp) movzbl -24(%ebp),%eax orl %eax,%edx je .L7 jmp .L19 ..L16: fstp %st(0) ..L19: fstp %st(0) fstp %st(0) movl $0,(%edi,%esi,8) movl $0,4(%edi,%esi,8) jmp .L4 ..L7: fxch %st(1) fdivl (%edi,%esi,8) fxch %st(1) fdivl (%edi,%esi,8) fcom %st(1) fnstsw %ax testl $256,%eax je .L10 fxch %st(1) ..L10: cmpl $0,-12(%ebp) je .L11 fxch %st(1) movl 20(%ebp),%eax fstpl (%eax) movl 24(%ebp),%eax fstpl (%eax) movl $0,-12(%ebp) jmp .L4 ..L11: movl 20(%ebp),%eax fldl (%eax) fcom %st(2) fnstsw %ax testl $256,%eax je .L17 fstp %st(0) jmp .L13 ..L17: fstp %st(2) ..L13: fxch %st(1) movl 20(%ebp),%eax fstpl (%eax) movl 24(%ebp),%eax fldl (%eax) fcom %st(1) fnstsw %ax testl $16640,%eax jne .L18 fstp %st(0) jmp .L14 ..L18: fstp %st(1) ..L14: movl 24(%ebp),%eax fstpl (%eax) ..L4: incl %esi cmpl -8(%ebp),%esi jle .L5 ..L3: xorl %eax,%eax leal -36(%ebp),%esp popl %ebx popl %esi popl %edi movl %ebp,%esp popl %ebp ret ..Lfe1: .size parlim_,.Lfe1-parlim_ ..section .rodata .align 8 ..LC1: .long 0xd9d7bdbb,0x3ddb7cdf .align 8 ..LC2: .long 0xd9d7bdbb,0xbddb7cdf ..text .comm eopt_,64,32 .comm mem_,2,2 .ident "GCC: (GNU) pgcc-2.91.57 19980901 (egcs-1.1 release)" **** Output of diff -u parlim.s.fast-math parlim.s.no-fast-math --- parlim.s.fm Mon Sep 7 11:30:15 1998 +++ parlim.s.nfm Mon Sep 7 11:30:07 1998 @@ -50,29 +50,37 @@ fldl eopt_+16(,%ebx,8) fsubl (%eax,%esi,8) fldl (%edi,%esi,8) - fabs + fldz + fcomp %st(1) + fnstsw %ax + andb $69,%ah + decb %ah + cmpb $64,%ah + jb .L24 + fchs +.L24: fldl .LC1 - fcom %st(1) + fcompp fnstsw %ax - fstp %st(1) - testl $16640,%eax - je .L16 + andb $69,%ah + je .L21 fldl .LC2 - fcomp %st(3) + fcomp %st(2) fnstsw %ax - setb %al + andb $69,%ah + cmpb $1,%ah + sete %al movzbl %al,%edx + fldl .LC1 fcomp %st(1) fnstsw %ax - seta %al + andb $69,%ah + sete %al movb %al,-24(%ebp) movzbl -24(%ebp),%eax orl %eax,%edx je .L7 - jmp .L19 -.L16: - fstp %st(0) -.L19: +.L21: fstp %st(0) fstp %st(0) movl $0,(%edi,%esi,8) @@ -85,12 +93,13 @@ fdivl (%edi,%esi,8) fcom %st(1) fnstsw %ax - testl $256,%eax - je .L10 + andb $69,%ah + cmpb $1,%ah + jne .L12 fxch %st(1) -.L10: +.L12: cmpl $0,-12(%ebp) - je .L11 + je .L13 fxch %st(1) movl 20(%ebp),%eax fstpl (%eax) @@ -98,18 +107,18 @@ fstpl (%eax) movl $0,-12(%ebp) jmp .L4 -.L11: +.L13: movl 20(%ebp),%eax fldl (%eax) fcom %st(2) fnstsw %ax - testl $256,%eax - je .L17 - fstp %st(0) - jmp .L13 -.L17: + andb $5,%ah + jne .L22 fstp %st(2) -.L13: + jmp .L15 +.L22: + fstp %st(0) +.L15: fxch %st(1) movl 20(%ebp),%eax fstpl (%eax) @@ -117,13 +126,15 @@ fldl (%eax) fcom %st(1) fnstsw %ax - testl $16640,%eax - jne .L18 - fstp %st(0) - jmp .L14 -.L18: + andb $69,%ah + decb %ah + cmpb $64,%ah + jae .L23 fstp %st(1) -.L14: + jmp .L17 +.L23: + fstp %st(0) +.L17: movl 24(%ebp),%eax fstpl (%eax) .L4: