delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp/2003/08/30/08:45:16

From: cbramix AT libero DOT it (Carlo)
Newsgroups: comp.os.msdos.djgpp
Subject: Re: Optimizing 8 bit variables?
Date: 30 Aug 2003 05:41:29 -0700
Organization: http://groups.google.com/
Lines: 194
Message-ID: <d2ad330a.0308300441.2acd05ad@posting.google.com>
References: <d2ad330a DOT 0308260427 DOT 49b6ab37 AT posting DOT google DOT com> <bifne4$6c9$1 AT antares DOT lu DOT erisoft DOT se>
NNTP-Posting-Host: 151.25.221.152
X-Trace: posting.google.com 1062247289 12342 127.0.0.1 (30 Aug 2003 12:41:29 GMT)
X-Complaints-To: groups-abuse AT google DOT com
NNTP-Posting-Date: 30 Aug 2003 12:41:29 GMT
To: djgpp AT delorie DOT com
DJ-Gateway: from newsgroup comp.os.msdos.djgpp
Reply-To: djgpp AT delorie DOT com

Hello,
I have coded a very simple C program.
It's very simple and it can be coded into a different way, but I just
want to show the point.

#include <stdio.h>

#define MAXBUF  16

unsigned char funct(unsigned char *ptr,int size)
{
    unsigned char res = 0;
    unsigned char a,b,c,d,e,f;

    size >>= 3;
    do {
        a = ptr[0];
        b = ptr[1];
        c = ptr[2];
        d = ptr[3];
        e = ptr[4];
        f = ptr[5];
        if (a>64) a=64;
        if (b>64) b=64;
        if (c>64) c=64;
        if (d>64) d=64;
        if (e>64) c=64;
        if (f>64) d=64;
        res += ((a^b) & (c^d)) | (e^f);
        ptr += 4;
    } while (--size);

    return res;
}

void main()
{
    unsigned char buffer[MAXBUF];
    int x;

    for (x=0; x<MAXBUF; x++)
        buffer[x] = random();

    printf("%d\n",funct(buffer,MAXBUF));
}

I have used GCC 3.2.3 and I got this assembly output for funct():

_funct:
	pushl	%ebp
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	pushl	%ebx
	pushl	%ebx
	movl	32(%esp), %ebp
	movl	28(%esp), %edx
	movb	$0, 7(%esp)
	sarl	$3, %ebp
	.p2align 4,,7
L2:
	movb	3(%edx), %bl
	movb	(%edx), %cl
	movzbl	1(%edx), %edi
	movb	2(%edx), %al
	cmpb	$64, %cl
	movb	%bl, 3(%esp)
	movb	5(%edx), %bl
	movzbl	4(%edx), %esi
	movb	%bl, 6(%esp)
	jbe	L5
	movb	$64, %cl
L5:
	movl	%edi, %ebx
	cmpb	$64, %bl
	jbe	L6
	movl	$64, %edi
L6:
	cmpb	$64, %al
	jbe	L7
	movb	$64, %al
L7:
	cmpb	$64, 3(%esp)
	jbe	L8
	movb	$64, 3(%esp)
L8:
	movl	%esi, %ebx
	cmpb	$64, %bl
	jbe	L9
	movb	$64, %al
L9:
	cmpb	$64, 6(%esp)
	jbe	L10
	movb	$64, 3(%esp)
L10:
	xorb	3(%esp), %al
	xorl	%edi, %ecx
	addl	$4, %edx
	andl	%eax, %ecx
	movb	6(%esp), %al
	xorl	%eax, %esi
	orl	%esi, %ecx
	addb	%cl, 7(%esp)
	decl	%ebp
	jne	L2
	xorl	%eax, %eax
	movb	7(%esp), %al
	popl	%edx
	popl	%ecx
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret

It has been compiled with:

gcc demo.c -S -O2 -fomit-frame-pointer

I have also compiled with -O9 option instead of -O2, but the only true
difference was that the function was expanded inline, since it's so
simple.
In my opinion this is a better code:

_funct:
	pushl	%edi
	pushl	%esi
	pushl	%ebx
	movl	ARG1, %edi
	movl	ARG0, %esi
	xorl	%eax, %eax
	sarl	$3, %edi
	.p2align 4,,7
L2:
	movb	 (%edx), %bl
	movb	1(%edx), %bh
	movb	2(%edx), %cl
	movb	3(%edx), %ch
	movb	4(%edx), %dl
	movb	5(%edx), %dh

	cmpb	$64, %cl
	jbe	L5
	movb	$64, %cl
L5:
	cmpb	$64, %ch
	jbe	L6
	movb	$64, %ch
L6:
	cmpb	$64, %bl
	jbe	L7
	movb	$64, %bl
L7:
	cmpb	$64, %bh
	jbe	L8
	movb	$64, %bh
L8:
	cmpb	$64, %dl
	jbe	L9
	movb	$64, %dl
L9:
	cmpb	$64, %dh
	jbe	L10
	movb	$64, %dh
L10:
	xorb	%bh, %bl
	xorb	%ch, %cl
	xorb	%dh, %dl
	andb	%cl, %bl
	addl	$4, %esi
	orb	%dl, %bl
	decl	%edi
	addb	%bl, %al
	jne	L2
	popl	%ebx
	andl	$0xFF, %eax
	popl	%esi
	popl	%edi
	ret

I know there are many things to examine, like memory access speed (GCC
compiled version could be fast too).
However, I just wonder if there is a way for telling: "use upper
registers too".
As I wrote previously, I tried the trick of local register variable,
but it doesn't work.
Maybe the only way is to code the interesting parts with inline
assembly functions (when it's possible) or an entire assembly code
with our wanted function.
I'm waiting your opinions for that.

Sincerely,

Carlo

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019