delorie.com/djgpp/bugs/show.cgi   search  
Bug 000193

When Created: 12/07/1997 07:20:22
Against DJGPP version: 2.01
By whom: aliasx@geocities.com
Abstract: possible inline asm optimisation error
I came across the problem today when using optimisations with the following
C code with inline asm.

void tilescreen(byte *screen,byte *tile,lword x, lword y, byte rot, byte xscale, byte yscale)
{
   lword ddx, ddy, d2x, d2y, i, j;

   ddx  = (CosTable[rot] * xscale) >>5;
   ddy  = (SinTable[rot] * yscale) >>8;

   rot+=64;

   d2x  = (CosTable[rot] * xscale) >>5;
   d2y  = (SinTable[rot] * yscale) >>8;

	i = x - ddx * 160 - d2x * 100;
	j = y*32 - ddy * 160 - d2y * 100;

   __asm__ __volatile__ ("   cld
          
   movl %7,%%edi
          
   movl $200,%%ecx
          

          
 0: /* vertical loop */
          
  
          
   push %%ecx
          
   movl %0,%%eax
          
   movl %1,%%edx
          
   movl $320,%%ecx
          

          
 1: /* horizontal loop */
          

          
   addl %2,%%eax
          
   addl %3,%%edx
          
   movb %%ah,%%bl
          
   movb %%dh,%%bh
          
   shrl $3,%%bx
          
   andl $0x03ff,%%ebx
          
   addl %4,%%ebx
          
   movl %%ebx,%%esi
          
   movsb
          
   decl %%ecx
          
   jnz  1b
          

          
   movl %5,%%eax
          
   addl %%eax,%0
          
   movl %6,%%eax
          
   addl %%eax,%1
          
   pop  %%ecx
          
   decl %%ecx
          
   jnz  0b
           "

          /*Outputs*/ :
          /*Inputs*/  : "g" (i), "g" (j), "g" (ddx), "g" (ddy), "g" (tile),
                        "g" (d2x), "g" (d2y), "g" (screen)
          /*Reg's*/   : "eax","ebx","ecx","edx","esi","edi"
   );  
}

------------------------------------------------------------------------------

Here's a dump of the assembler output compiled without optimisations.
It works as expected.

   cld
   movl 8(%ebp),%edi
   movl $200,%ecx
          
 0: /* vertical loop */
          
   push %ecx
   movl -24(%ebp),%eax
   movl -28(%ebp),%edx
   movl $320,%ecx
          
 1: /* horizontal loop */
          
   addl -8(%ebp),%eax
   addl -12(%ebp),%edx
   movb %ah,%bl
   movb %dh,%bh
   shrl $3,%bx
   andl $0x03ff,%ebx
   addl 12(%ebp),%ebx
   movl %ebx,%esi
   movsb
   decl %ecx
   jnz  1b

   movl -16(%ebp),%eax
   addl %eax,-24(%ebp)
   movl -20(%ebp),%eax
   addl %eax,-28(%ebp)
   pop  %ecx
   decl %ecx
   jnz  0b

------------------------------------------------------------------------------

Here's the same code compiled with -O3 (-O1 & -O2 also produce incorrect code).
It clobbers eax after each horizontal loop.

   cld
   movl 8(%ebp),%edi
   movl $200,%ecx

 0: /* vertical loop */
  
   push %ecx
   movl -20(%ebp),%eax
   movl -24(%ebp),%edx
   movl $320,%ecx

 1: /* horizontal loop */

   addl -4(%ebp),%eax
   addl -8(%ebp),%edx
   movb %ah,%bl
   movb %dh,%bh
   shrl $3,%bx
   andl $0x03ff,%ebx
   addl 12(%ebp),%ebx
   movl %ebx,%esi
   movsb
   decl %ecx
   jnz  1b

   movl -12(%ebp),%eax
   addl %eax,-20(%ebp)
   movl %esi,%eax         <--- clobbers eax
   addl %eax,-24(%ebp)
   pop  %ecx
   decl %ecx
   jnz  0b

Note added: 04/13/1999 07:00:49
By whom: eliz@is.elta.co.il
See bug 194.

Closed on 04/13/1999 07:00:09: Inline assembly is not a DJGPP-specific feature.
By whom: eliz@is.elta.co.il



  webmaster     delorie software   privacy  
  Copyright © 2010   by DJ Delorie     Updated Jul 2010