Mail Archives: djgpp/2003/08/30/08:45:16
Hello,
I have coded a very simple C program.
It's very simple and it can be coded into a different way, but I just
want to show the point.
#include <stdio.h>
#define MAXBUF 16
unsigned char funct(unsigned char *ptr,int size)
{
unsigned char res = 0;
unsigned char a,b,c,d,e,f;
size >>= 3;
do {
a = ptr[0];
b = ptr[1];
c = ptr[2];
d = ptr[3];
e = ptr[4];
f = ptr[5];
if (a>64) a=64;
if (b>64) b=64;
if (c>64) c=64;
if (d>64) d=64;
if (e>64) c=64;
if (f>64) d=64;
res += ((a^b) & (c^d)) | (e^f);
ptr += 4;
} while (--size);
return res;
}
void main()
{
unsigned char buffer[MAXBUF];
int x;
for (x=0; x<MAXBUF; x++)
buffer[x] = random();
printf("%d\n",funct(buffer,MAXBUF));
}
I have used GCC 3.2.3 and I got this assembly output for funct():
_funct:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
pushl %ebx
pushl %ebx
movl 32(%esp), %ebp
movl 28(%esp), %edx
movb $0, 7(%esp)
sarl $3, %ebp
.p2align 4,,7
L2:
movb 3(%edx), %bl
movb (%edx), %cl
movzbl 1(%edx), %edi
movb 2(%edx), %al
cmpb $64, %cl
movb %bl, 3(%esp)
movb 5(%edx), %bl
movzbl 4(%edx), %esi
movb %bl, 6(%esp)
jbe L5
movb $64, %cl
L5:
movl %edi, %ebx
cmpb $64, %bl
jbe L6
movl $64, %edi
L6:
cmpb $64, %al
jbe L7
movb $64, %al
L7:
cmpb $64, 3(%esp)
jbe L8
movb $64, 3(%esp)
L8:
movl %esi, %ebx
cmpb $64, %bl
jbe L9
movb $64, %al
L9:
cmpb $64, 6(%esp)
jbe L10
movb $64, 3(%esp)
L10:
xorb 3(%esp), %al
xorl %edi, %ecx
addl $4, %edx
andl %eax, %ecx
movb 6(%esp), %al
xorl %eax, %esi
orl %esi, %ecx
addb %cl, 7(%esp)
decl %ebp
jne L2
xorl %eax, %eax
movb 7(%esp), %al
popl %edx
popl %ecx
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
It has been compiled with:
gcc demo.c -S -O2 -fomit-frame-pointer
I have also compiled with -O9 option instead of -O2, but the only true
difference was that the function was expanded inline, since it's so
simple.
In my opinion this is a better code:
_funct:
pushl %edi
pushl %esi
pushl %ebx
movl ARG1, %edi
movl ARG0, %esi
xorl %eax, %eax
sarl $3, %edi
.p2align 4,,7
L2:
movb (%edx), %bl
movb 1(%edx), %bh
movb 2(%edx), %cl
movb 3(%edx), %ch
movb 4(%edx), %dl
movb 5(%edx), %dh
cmpb $64, %cl
jbe L5
movb $64, %cl
L5:
cmpb $64, %ch
jbe L6
movb $64, %ch
L6:
cmpb $64, %bl
jbe L7
movb $64, %bl
L7:
cmpb $64, %bh
jbe L8
movb $64, %bh
L8:
cmpb $64, %dl
jbe L9
movb $64, %dl
L9:
cmpb $64, %dh
jbe L10
movb $64, %dh
L10:
xorb %bh, %bl
xorb %ch, %cl
xorb %dh, %dl
andb %cl, %bl
addl $4, %esi
orb %dl, %bl
decl %edi
addb %bl, %al
jne L2
popl %ebx
andl $0xFF, %eax
popl %esi
popl %edi
ret
I know there are many things to examine, like memory access speed (GCC
compiled version could be fast too).
However, I just wonder if there is a way for telling: "use upper
registers too".
As I wrote previously, I tried the trick of local register variable,
but it doesn't work.
Maybe the only way is to code the interesting parts with inline
assembly functions (when it's possible) or an entire assembly code
with our wanted function.
I'm waiting your opinions for that.
Sincerely,
Carlo
- Raw text -