Sender: vheyndri AT rug DOT ac DOT be Message-Id: <35161F31.2854@rug.ac.be> Date: Mon, 23 Mar 1998 09:37:05 +0100 From: Vik Heyndrickx Mime-Version: 1.0 To: DJGPP workers Subject: a faster memmove: patch for alpha-980101 Content-Type: multipart/mixed; boundary="------------410C4B192C5" Precedence: bulk This is a multi-part message in MIME format. --------------410C4B192C5 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Hi workers, I have written a faster memmove routine in asm after seeing some reports on slow performance of that function. The code was tested with all possible alignment cases and all possible sizes (actually a representative subset of sizes), and I found no bugs. However, if someone who is less prejudiced as I am, tested it again, I would feel more comfortable. The patchfile also contains some nano-opt's for some related functions. The patchfile contains also an unrelated patch that causes the djlsr202 sources not to compile successful with gcc-2.8.0 due to a new warning message. The patch should be applied from the base directory with -p1. ref. PATCH: lsr-alpha-980101, memmove, memcpy, overlapping regions handled correctly. -- \ Vik /-_-_-_-_-_-_/ \___/ Heyndrickx / \ /-_-_-_-_-_-_/ --------------410C4B192C5 Content-Type: text/plain; charset=us-ascii; name="memmove.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="memmove.diff" diff -r -c3 orig/src/libc/ansi/string/memcpy.S mem.diff/src/libc/ansi/string/memcpy.S *** orig/src/libc/ansi/string/memcpy.S Sat Mar 11 08:38:38 1995 --- mem.diff/src/libc/ansi/string/memcpy.S Sun Mar 22 22:40:46 1998 *************** *** 1,4 **** ! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ .file "memcpy.s" .text .align 4 --- 1,4 ---- ! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ .file "memcpy.s" .text .align 4 *************** *** 11,17 **** movl 8(%ebp),%edi movl 12(%ebp),%esi movl 16(%ebp),%ecx ! call ___dj_movedata popl %edi popl %esi movl 8(%ebp),%eax --- 11,17 ---- movl 8(%ebp),%edi movl 12(%ebp),%esi movl 16(%ebp),%ecx ! call ___dj_copydata popl %edi popl %esi movl 8(%ebp),%eax diff -r -c3 orig/src/libc/ansi/string/memmove.S mem.diff/src/libc/ansi/string/memmove.S *** orig/src/libc/ansi/string/memmove.S Tue Mar 28 09:14:46 1995 --- mem.diff/src/libc/ansi/string/memmove.S Sun Mar 22 22:42:40 1998 *************** *** 1,33 **** ! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ ! .file "memmove.s" ! .globl _memmove _memmove: ! pushl %ebp ! movl %esp,%ebp ! pushl %esi ! pushl %edi ! movl 8(%ebp),%edi ! movl 12(%ebp),%esi ! movl 16(%ebp),%ecx ! jecxz L2 ! cld ! cmpl %esi,%edi ! jb L3 ! ! std ! addl %ecx,%esi ! addl %ecx,%edi ! decl %esi ! decl %edi ! L3: ! rep ! movsb ! ! L2: ! cld ! popl %edi ! popl %esi ! movl 8(%ebp),%eax ! leave ! ret --- 1,20 ---- ! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ ! .file "memmove.s" ! .text ! .align 4 ! .globl _memmove _memmove: ! pushl %ebp ! movl %esp,%ebp ! pushl %esi ! pushl %edi ! movl 8(%ebp),%edi ! movl 12(%ebp),%esi ! movl 16(%ebp),%ecx ! call ___dj_movedata ! popl %edi ! popl %esi ! movl 8(%ebp),%eax ! leave ! ret diff -r -c3 orig/src/libc/pc_hw/mem/djmd.S mem.diff/src/libc/pc_hw/mem/djmd.S *** orig/src/libc/pc_hw/mem/djmd.S Tue Mar 21 10:44:56 1995 --- mem.diff/src/libc/pc_hw/mem/djmd.S Sun Mar 22 23:08:46 1998 *************** *** 1,39 **** ! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ ! # This routine moves %ecx bytes from %ds:%esi to %es:%edi. It clobbers ! # %eax, %ecx, %esi, %edi, and eflags. The memory ranges must not overlap, ! # unless %esi >= %edi. ! ! .file "djmd.s" ! .text ! .align 4 ! .globl ___dj_movedata ! ___dj_movedata: ! cld ! cmpl $15,%ecx ! jle small_move ! jmp mod_4_check ! ! # Transfer bytes until either %esi or %edi is aligned % 4 ! align_mod_4: ! movsb ! decl %ecx ! mod_4_check: ! testl $3,%esi ! jz big_move ! testl $3,%edi ! jnz align_mod_4 ! ! big_move: ! movb %cl,%al # We will store leftover count in %al ! shrl $2,%ecx ! andb $3,%al ! rep ! movsl ! # %ecx known to be zero here, so insert the leftover count in %al ! movb %al,%cl small_move: ! rep ! movsb ! ret --- 1,93 ---- ! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ ! /* ! * Copyright 1998 Vik Heyndrickx ! * ! * Written by Vik Heyndrickx for DJGPP ! * Distribution and usage restriction: this file is part of DJGPP ! * ! * This software was written in the hope that it will be useful, but ! * WITHOUT ANY WARRANTY, as well expressed as implied. ! * The author disclaims all responsabilities on damage to data that might ! * arise, directly or indirectly, from the intended usage or abuse of this ! * software */ ! ! /* This routines copy %ecx bytes from %ds:%esi to %es:%edi ! They clobber %eax, ((%ebx)), %ecx, %esi, %edi, and eflags. ! ! These routines are not to be called by a HLL application program directly, ! but through interface functions. ! ! ___dj_copydata assumes the src and dst region do not overlap ! ___dj_movedata works always, included for overlapping regions ! ___dj_movedata_fw works also if %edi <= %esi ! ___dj_movedata_bw works also if %esi <= %edi ! ANY value in %ecx is valid as count parameter. ! These functions are also optimized for a fast text-mode video write (they ! pre-align their moves based upon the destination address only) ! ! BUG (feature): when two memory region overlap, but src and dst are in ! different segments, their overlapping will go undetected, and therefore ! the moving will fail. It is difficult (nearly impossible) to do otherwise. ! That situation will probably never occur in a DJGPP program. ! */ ! ! .file "djmd.s" ! .text ! .globl ___dj_copydata ! .globl ___dj_movedata ! //.globl ___dj_movedata_fw ! //.globl ___dj_movedata_bw ! .align 4 ! ___dj_copydata: ! ___dj_movedata_fw: ! movl %edi,%eax ! negl %eax ! andl $3,%eax // calculate amount of pos's to align the dst ! cld // reset direction flag (DF = 0) ! subl %eax,%ecx // we cannot copy more bytes than necessary ! jbe small_move // so jump to small count mover ! xchgl %ecx,%eax ! rep ! movsb // copy bytes until %edi gets lword aligned ! shldl $30,%eax,%ecx // put number of lwords to copy in %ecx, TRICKY ! andl $3,%eax // left over count (max. 3 bytes) ! rep ! movsl // big copy small_move: ! addl %eax,%ecx // copy the remaining bytes ! rep ! movsb ! ret ! ! .align 4 ! ___dj_movedata: ! //movl %ds,%ax ! //movl %es,%bx ! //cmpw %ax,%bx // test if same segment ! //jne ___dj_movedata_fw // by default assume not-overlapping ! cmpl %edi,%esi ! jnb ___dj_movedata_fw ! ___dj_movedata_bw: ! addl %ecx,%esi // move to one position beyond the end ! addl %ecx,%edi ! decl %esi // Oops, little big-endian (= blame Intel) ! movl %edi,%eax ! std // set direction flag (DF = 1) ! decl %edi // Oops, little big-endian (= blame Intel) ! andl $3,%eax // calculate amount of pos's to align the dst ! subl %eax,%ecx // we cannot copy more bytes than necessary ! jbe small_move // so jump to small count mover ! xchgl %ecx,%eax ! rep ! movsb // copy bytes until %edi gets lword aligned ! subl $3,%esi // Oops, little big-endian (= blame Intel) ! subl $3,%edi ! shldl $30,%eax,%ecx // put number of lwords to copy in %ecx, TRICKY ! andl $3,%eax // left over count (max. 3 bytes) ! rep ! movsl ! addl $3,%esi // Oops, little big-endian (= blame Intel) ! addl $3,%edi ! jmp small_move diff -r -c3 orig/src/libc/pc_hw/mem/md.S mem.diff/src/libc/pc_hw/mem/md.S *** orig/src/libc/pc_hw/mem/md.S Sat Mar 11 08:38:46 1995 --- mem.diff/src/libc/pc_hw/mem/md.S Sun Mar 22 22:51:30 1998 *************** *** 1,4 **** ! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ #define USE_ESI #define USE_EDI #include --- 1,4 ---- ! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */ #define USE_ESI #define USE_EDI #include *************** *** 6,25 **** FUNC(___movedata) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushw %ds ! pushw %es ! ! movw ARG1,%ds ! movw ARG3,%es movl ARG2,%esi movl ARG4,%edi movl ARG5,%ecx ! call ___dj_movedata ! popw %es ! popw %ds LEAVE --- 6,26 ---- FUNC(___movedata) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushl %ds ! pushl %es + movl ARG1,%ds movl ARG2,%esi + + movl ARG3,%es movl ARG4,%edi + movl ARG5,%ecx ! call ___dj_copydata ! popl %es ! popl %ds LEAVE diff -r -c3 orig/src/libc/pc_hw/mem/mdb.S mem.diff/src/libc/pc_hw/mem/mdb.S *** orig/src/libc/pc_hw/mem/mdb.S Tue Mar 21 10:45:12 1995 --- mem.diff/src/libc/pc_hw/mem/mdb.S Sun Mar 22 22:51:42 1998 *************** *** 6,20 **** FUNC(__movedatab) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushw %ds ! pushw %es ! movl ARG1,%eax ! movw %ax,%ds movl ARG2,%esi ! movl ARG3,%eax ! movw %ax,%es movl ARG4,%edi movl ARG5,%ecx --- 6,18 ---- FUNC(__movedatab) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushl %ds ! pushl %es ! movl ARG1,%ds movl ARG2,%esi ! movl ARG3,%es movl ARG4,%edi movl ARG5,%ecx *************** *** 22,28 **** rep movsb ! popw %es ! popw %ds LEAVE --- 20,26 ---- rep movsb ! popl %es ! popl %ds LEAVE diff -r -c3 orig/src/libc/pc_hw/mem/mdl.S mem.diff/src/libc/pc_hw/mem/mdl.S *** orig/src/libc/pc_hw/mem/mdl.S Tue Mar 21 10:45:08 1995 --- mem.diff/src/libc/pc_hw/mem/mdl.S Sun Mar 22 22:49:22 1998 *************** *** 6,20 **** FUNC(__movedatal) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushw %ds ! pushw %es ! movl ARG1,%eax ! movw %ax,%ds movl ARG2,%esi ! movl ARG3,%eax ! movw %ax,%es movl ARG4,%edi movl ARG5,%ecx --- 6,18 ---- FUNC(__movedatal) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushl %ds ! pushl %es ! movl ARG1,%ds movl ARG2,%esi ! movl ARG3,%es movl ARG4,%edi movl ARG5,%ecx *************** *** 22,28 **** rep movsl ! popw %es ! popw %ds LEAVE --- 20,26 ---- rep movsl ! popl %es ! popl %ds LEAVE diff -r -c3 orig/src/libc/pc_hw/mem/mdw.S mem.diff/src/libc/pc_hw/mem/mdw.S *** orig/src/libc/pc_hw/mem/mdw.S Tue Mar 21 10:45:16 1995 --- mem.diff/src/libc/pc_hw/mem/mdw.S Sun Mar 22 22:49:56 1998 *************** *** 6,20 **** FUNC(__movedataw) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushw %ds ! pushw %es ! movl ARG1,%eax ! movw %ax,%ds movl ARG2,%esi ! movl ARG3,%eax ! movw %ax,%es movl ARG4,%edi movl ARG5,%ecx --- 6,18 ---- FUNC(__movedataw) /* src_sel, src_ofs, dest_sel, dest_ofs, len */ ENTER ! pushl %ds ! pushl %es ! movl ARG1,%ds movl ARG2,%esi ! movl ARG3,%es movl ARG4,%edi movl ARG5,%ecx *************** *** 22,28 **** rep movsw ! popw %es ! popw %ds LEAVE --- 20,26 ---- rep movsw ! popl %es ! popl %ds LEAVE diff -r -c3 orig/src/libc/posix/fnmatch/fnmatch.c mem.diff/src/libc/posix/fnmatch/fnmatch.c *** orig/src/libc/posix/fnmatch/fnmatch.c Mon Mar 27 12:25:40 1995 --- mem.diff/src/libc/posix/fnmatch/fnmatch.c Sun Mar 22 20:26:44 1998 *************** *** 78,87 **** --- 78,89 ---- /* optimize for pattern with * at end or before / */ if (c == 0) + { if (flags & FNM_PATHNAME) return find_slash(string) ? FNM_NOMATCH : 0; else return 0; + } else if (isslash(c) && flags & FNM_PATHNAME) { if ((string = find_slash(string)) == NULL) --------------410C4B192C5--