Mail Archives: djgpp-workers/1998/03/23/03:40:37
This is a multi-part message in MIME format.
--------------410C4B192C5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Hi workers,
I have written a faster memmove routine in asm after seeing some reports
on slow performance of that function.
The code was tested with all possible alignment cases and all possible
sizes (actually a representative subset of sizes), and I found no bugs.
However, if someone who is less prejudiced as I am, tested it again, I
would feel more comfortable.
The patchfile also contains some nano-opt's for some related functions.
The patchfile contains also an unrelated patch that causes the djlsr202
sources not to compile successful with gcc-2.8.0 due to a new warning
message.
The patch should be applied from the base directory with -p1.
ref. PATCH: lsr-alpha-980101, memmove, memcpy, overlapping regions
handled correctly.
--
\ Vik /-_-_-_-_-_-_/
\___/ Heyndrickx /
\ /-_-_-_-_-_-_/
--------------410C4B192C5
Content-Type: text/plain; charset=us-ascii; name="memmove.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="memmove.diff"
diff -r -c3 orig/src/libc/ansi/string/memcpy.S mem.diff/src/libc/ansi/string/memcpy.S
*** orig/src/libc/ansi/string/memcpy.S Sat Mar 11 08:38:38 1995
--- mem.diff/src/libc/ansi/string/memcpy.S Sun Mar 22 22:40:46 1998
***************
*** 1,4 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
.file "memcpy.s"
.text
.align 4
--- 1,4 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
.file "memcpy.s"
.text
.align 4
***************
*** 11,17 ****
movl 8(%ebp),%edi
movl 12(%ebp),%esi
movl 16(%ebp),%ecx
! call ___dj_movedata
popl %edi
popl %esi
movl 8(%ebp),%eax
--- 11,17 ----
movl 8(%ebp),%edi
movl 12(%ebp),%esi
movl 16(%ebp),%ecx
! call ___dj_copydata
popl %edi
popl %esi
movl 8(%ebp),%eax
diff -r -c3 orig/src/libc/ansi/string/memmove.S mem.diff/src/libc/ansi/string/memmove.S
*** orig/src/libc/ansi/string/memmove.S Tue Mar 28 09:14:46 1995
--- mem.diff/src/libc/ansi/string/memmove.S Sun Mar 22 22:42:40 1998
***************
*** 1,33 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
! .file "memmove.s"
! .globl _memmove
_memmove:
! pushl %ebp
! movl %esp,%ebp
! pushl %esi
! pushl %edi
! movl 8(%ebp),%edi
! movl 12(%ebp),%esi
! movl 16(%ebp),%ecx
! jecxz L2
! cld
! cmpl %esi,%edi
! jb L3
!
! std
! addl %ecx,%esi
! addl %ecx,%edi
! decl %esi
! decl %edi
! L3:
! rep
! movsb
!
! L2:
! cld
! popl %edi
! popl %esi
! movl 8(%ebp),%eax
! leave
! ret
--- 1,20 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
! .file "memmove.s"
! .text
! .align 4
! .globl _memmove
_memmove:
! pushl %ebp
! movl %esp,%ebp
! pushl %esi
! pushl %edi
! movl 8(%ebp),%edi
! movl 12(%ebp),%esi
! movl 16(%ebp),%ecx
! call ___dj_movedata
! popl %edi
! popl %esi
! movl 8(%ebp),%eax
! leave
! ret
diff -r -c3 orig/src/libc/pc_hw/mem/djmd.S mem.diff/src/libc/pc_hw/mem/djmd.S
*** orig/src/libc/pc_hw/mem/djmd.S Tue Mar 21 10:44:56 1995
--- mem.diff/src/libc/pc_hw/mem/djmd.S Sun Mar 22 23:08:46 1998
***************
*** 1,39 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
! # This routine moves %ecx bytes from %ds:%esi to %es:%edi. It clobbers
! # %eax, %ecx, %esi, %edi, and eflags. The memory ranges must not overlap,
! # unless %esi >= %edi.
!
! .file "djmd.s"
! .text
! .align 4
! .globl ___dj_movedata
! ___dj_movedata:
! cld
! cmpl $15,%ecx
! jle small_move
! jmp mod_4_check
!
! # Transfer bytes until either %esi or %edi is aligned % 4
! align_mod_4:
! movsb
! decl %ecx
! mod_4_check:
! testl $3,%esi
! jz big_move
! testl $3,%edi
! jnz align_mod_4
!
! big_move:
! movb %cl,%al # We will store leftover count in %al
! shrl $2,%ecx
! andb $3,%al
! rep
! movsl
! # %ecx known to be zero here, so insert the leftover count in %al
! movb %al,%cl
small_move:
! rep
! movsb
! ret
--- 1,93 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
! /*
! * Copyright 1998 Vik Heyndrickx
! *
! * Written by Vik Heyndrickx <vheyndri AT rug DOT ac DOT be> for DJGPP
! * Distribution and usage restriction: this file is part of DJGPP
! *
! * This software was written in the hope that it will be useful, but
! * WITHOUT ANY WARRANTY, as well expressed as implied.
! * The author disclaims all responsabilities on damage to data that might
! * arise, directly or indirectly, from the intended usage or abuse of this
! * software */
!
! /* This routines copy %ecx bytes from %ds:%esi to %es:%edi
! They clobber %eax, ((%ebx)), %ecx, %esi, %edi, and eflags.
!
! These routines are not to be called by a HLL application program directly,
! but through interface functions.
!
! ___dj_copydata assumes the src and dst region do not overlap
! ___dj_movedata works always, included for overlapping regions
! ___dj_movedata_fw works also if %edi <= %esi
! ___dj_movedata_bw works also if %esi <= %edi
! ANY value in %ecx is valid as count parameter.
! These functions are also optimized for a fast text-mode video write (they
! pre-align their moves based upon the destination address only)
!
! BUG (feature): when two memory region overlap, but src and dst are in
! different segments, their overlapping will go undetected, and therefore
! the moving will fail. It is difficult (nearly impossible) to do otherwise.
! That situation will probably never occur in a DJGPP program.
! */
!
! .file "djmd.s"
! .text
! .globl ___dj_copydata
! .globl ___dj_movedata
! //.globl ___dj_movedata_fw
! //.globl ___dj_movedata_bw
! .align 4
! ___dj_copydata:
! ___dj_movedata_fw:
! movl %edi,%eax
! negl %eax
! andl $3,%eax // calculate amount of pos's to align the dst
! cld // reset direction flag (DF = 0)
! subl %eax,%ecx // we cannot copy more bytes than necessary
! jbe small_move // so jump to small count mover
! xchgl %ecx,%eax
! rep
! movsb // copy bytes until %edi gets lword aligned
! shldl $30,%eax,%ecx // put number of lwords to copy in %ecx, TRICKY
! andl $3,%eax // left over count (max. 3 bytes)
! rep
! movsl // big copy
small_move:
! addl %eax,%ecx // copy the remaining bytes
! rep
! movsb
! ret
!
! .align 4
! ___dj_movedata:
! //movl %ds,%ax
! //movl %es,%bx
! //cmpw %ax,%bx // test if same segment
! //jne ___dj_movedata_fw // by default assume not-overlapping
! cmpl %edi,%esi
! jnb ___dj_movedata_fw
! ___dj_movedata_bw:
! addl %ecx,%esi // move to one position beyond the end
! addl %ecx,%edi
! decl %esi // Oops, little big-endian (= blame Intel)
! movl %edi,%eax
! std // set direction flag (DF = 1)
! decl %edi // Oops, little big-endian (= blame Intel)
! andl $3,%eax // calculate amount of pos's to align the dst
! subl %eax,%ecx // we cannot copy more bytes than necessary
! jbe small_move // so jump to small count mover
! xchgl %ecx,%eax
! rep
! movsb // copy bytes until %edi gets lword aligned
! subl $3,%esi // Oops, little big-endian (= blame Intel)
! subl $3,%edi
! shldl $30,%eax,%ecx // put number of lwords to copy in %ecx, TRICKY
! andl $3,%eax // left over count (max. 3 bytes)
! rep
! movsl
! addl $3,%esi // Oops, little big-endian (= blame Intel)
! addl $3,%edi
! jmp small_move
diff -r -c3 orig/src/libc/pc_hw/mem/md.S mem.diff/src/libc/pc_hw/mem/md.S
*** orig/src/libc/pc_hw/mem/md.S Sat Mar 11 08:38:46 1995
--- mem.diff/src/libc/pc_hw/mem/md.S Sun Mar 22 22:51:30 1998
***************
*** 1,4 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
#define USE_ESI
#define USE_EDI
#include <libc/asmdefs.h>
--- 1,4 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
#define USE_ESI
#define USE_EDI
#include <libc/asmdefs.h>
***************
*** 6,25 ****
FUNC(___movedata) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushw %ds
! pushw %es
!
! movw ARG1,%ds
! movw ARG3,%es
movl ARG2,%esi
movl ARG4,%edi
movl ARG5,%ecx
! call ___dj_movedata
! popw %es
! popw %ds
LEAVE
--- 6,26 ----
FUNC(___movedata) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushl %ds
! pushl %es
+ movl ARG1,%ds
movl ARG2,%esi
+
+ movl ARG3,%es
movl ARG4,%edi
+
movl ARG5,%ecx
! call ___dj_copydata
! popl %es
! popl %ds
LEAVE
diff -r -c3 orig/src/libc/pc_hw/mem/mdb.S mem.diff/src/libc/pc_hw/mem/mdb.S
*** orig/src/libc/pc_hw/mem/mdb.S Tue Mar 21 10:45:12 1995
--- mem.diff/src/libc/pc_hw/mem/mdb.S Sun Mar 22 22:51:42 1998
***************
*** 6,20 ****
FUNC(__movedatab) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushw %ds
! pushw %es
! movl ARG1,%eax
! movw %ax,%ds
movl ARG2,%esi
! movl ARG3,%eax
! movw %ax,%es
movl ARG4,%edi
movl ARG5,%ecx
--- 6,18 ----
FUNC(__movedatab) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushl %ds
! pushl %es
! movl ARG1,%ds
movl ARG2,%esi
! movl ARG3,%es
movl ARG4,%edi
movl ARG5,%ecx
***************
*** 22,28 ****
rep
movsb
! popw %es
! popw %ds
LEAVE
--- 20,26 ----
rep
movsb
! popl %es
! popl %ds
LEAVE
diff -r -c3 orig/src/libc/pc_hw/mem/mdl.S mem.diff/src/libc/pc_hw/mem/mdl.S
*** orig/src/libc/pc_hw/mem/mdl.S Tue Mar 21 10:45:08 1995
--- mem.diff/src/libc/pc_hw/mem/mdl.S Sun Mar 22 22:49:22 1998
***************
*** 6,20 ****
FUNC(__movedatal) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushw %ds
! pushw %es
! movl ARG1,%eax
! movw %ax,%ds
movl ARG2,%esi
! movl ARG3,%eax
! movw %ax,%es
movl ARG4,%edi
movl ARG5,%ecx
--- 6,18 ----
FUNC(__movedatal) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushl %ds
! pushl %es
! movl ARG1,%ds
movl ARG2,%esi
! movl ARG3,%es
movl ARG4,%edi
movl ARG5,%ecx
***************
*** 22,28 ****
rep
movsl
! popw %es
! popw %ds
LEAVE
--- 20,26 ----
rep
movsl
! popl %es
! popl %ds
LEAVE
diff -r -c3 orig/src/libc/pc_hw/mem/mdw.S mem.diff/src/libc/pc_hw/mem/mdw.S
*** orig/src/libc/pc_hw/mem/mdw.S Tue Mar 21 10:45:16 1995
--- mem.diff/src/libc/pc_hw/mem/mdw.S Sun Mar 22 22:49:56 1998
***************
*** 6,20 ****
FUNC(__movedataw) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushw %ds
! pushw %es
! movl ARG1,%eax
! movw %ax,%ds
movl ARG2,%esi
! movl ARG3,%eax
! movw %ax,%es
movl ARG4,%edi
movl ARG5,%ecx
--- 6,18 ----
FUNC(__movedataw) /* src_sel, src_ofs, dest_sel, dest_ofs, len */
ENTER
! pushl %ds
! pushl %es
! movl ARG1,%ds
movl ARG2,%esi
! movl ARG3,%es
movl ARG4,%edi
movl ARG5,%ecx
***************
*** 22,28 ****
rep
movsw
! popw %es
! popw %ds
LEAVE
--- 20,26 ----
rep
movsw
! popl %es
! popl %ds
LEAVE
diff -r -c3 orig/src/libc/posix/fnmatch/fnmatch.c mem.diff/src/libc/posix/fnmatch/fnmatch.c
*** orig/src/libc/posix/fnmatch/fnmatch.c Mon Mar 27 12:25:40 1995
--- mem.diff/src/libc/posix/fnmatch/fnmatch.c Sun Mar 22 20:26:44 1998
***************
*** 78,87 ****
--- 78,89 ----
/* optimize for pattern with * at end or before / */
if (c == 0)
+ {
if (flags & FNM_PATHNAME)
return find_slash(string) ? FNM_NOMATCH : 0;
else
return 0;
+ }
else if (isslash(c) && flags & FNM_PATHNAME)
{
if ((string = find_slash(string)) == NULL)
--------------410C4B192C5--
- Raw text -