delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/1998/03/23/03:40:37

Sender: vheyndri AT rug DOT ac DOT be
Message-Id: <35161F31.2854@rug.ac.be>
Date: Mon, 23 Mar 1998 09:37:05 +0100
From: Vik Heyndrickx <Vik DOT Heyndrickx AT rug DOT ac DOT be>
Mime-Version: 1.0
To: DJGPP workers <djgpp-workers AT delorie DOT com>
Subject: a faster memmove: patch for alpha-980101

This is a multi-part message in MIME format.

--------------410C4B192C5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi workers,

I have written a faster memmove routine in asm after seeing some reports
on slow performance of that function.
The code was tested with all possible alignment cases and all possible
sizes (actually a representative subset of sizes), and I found no bugs.
However, if someone who is less prejudiced as I am, tested it again, I 
would feel more comfortable.
The patchfile also contains some nano-opt's for some related functions.

The patchfile contains also an unrelated patch that causes the djlsr202
sources not to compile successful with gcc-2.8.0 due to a new warning
message.

The patch should be applied from the base directory with -p1.

ref. PATCH: lsr-alpha-980101, memmove, memcpy, overlapping regions
handled correctly.

-- 
 \ Vik /-_-_-_-_-_-_/   
  \___/ Heyndrickx /          
   \ /-_-_-_-_-_-_/

--------------410C4B192C5
Content-Type: text/plain; charset=us-ascii; name="memmove.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="memmove.diff"

diff -r -c3 orig/src/libc/ansi/string/memcpy.S mem.diff/src/libc/ansi/string/memcpy.S
*** orig/src/libc/ansi/string/memcpy.S	Sat Mar 11 08:38:38 1995
--- mem.diff/src/libc/ansi/string/memcpy.S	Sun Mar 22 22:40:46 1998
***************
*** 1,4 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
  	.file "memcpy.s"
  	.text
  	.align	4
--- 1,4 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
  	.file "memcpy.s"
  	.text
  	.align	4
***************
*** 11,17 ****
  	movl	8(%ebp),%edi
  	movl	12(%ebp),%esi
  	movl	16(%ebp),%ecx
! 	call	___dj_movedata
  	popl	%edi
  	popl	%esi
  	movl	8(%ebp),%eax
--- 11,17 ----
  	movl	8(%ebp),%edi
  	movl	12(%ebp),%esi
  	movl	16(%ebp),%ecx
! 	call	___dj_copydata
  	popl	%edi
  	popl	%esi
  	movl	8(%ebp),%eax
diff -r -c3 orig/src/libc/ansi/string/memmove.S mem.diff/src/libc/ansi/string/memmove.S
*** orig/src/libc/ansi/string/memmove.S	Tue Mar 28 09:14:46 1995
--- mem.diff/src/libc/ansi/string/memmove.S	Sun Mar 22 22:42:40 1998
***************
*** 1,33 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
! 	.file "memmove.s"
! 	.globl	_memmove
  _memmove:
! 	pushl	%ebp
! 	movl	%esp,%ebp
! 	pushl	%esi
! 	pushl	%edi
! 	movl	8(%ebp),%edi
! 	movl	12(%ebp),%esi
! 	movl	16(%ebp),%ecx
! 	jecxz	L2
! 	cld
! 	cmpl	%esi,%edi
! 	jb	L3
! 
! 	std
! 	addl	%ecx,%esi
! 	addl	%ecx,%edi
! 	decl	%esi
! 	decl	%edi
! L3:
! 	rep
! 	movsb
! 
! L2:
! 	cld
! 	popl	%edi
! 	popl	%esi
! 	movl	8(%ebp),%eax
! 	leave
! 	ret
  
--- 1,20 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
!         .file "memmove.s"
!         .text
!         .align  4
!         .globl  _memmove
  _memmove:
!         pushl   %ebp
!         movl    %esp,%ebp
!         pushl   %esi
!         pushl   %edi
!         movl    8(%ebp),%edi
!         movl    12(%ebp),%esi
!         movl    16(%ebp),%ecx
!         call    ___dj_movedata
!         popl    %edi
!         popl    %esi
!         movl    8(%ebp),%eax
!         leave
!         ret
  
diff -r -c3 orig/src/libc/pc_hw/mem/djmd.S mem.diff/src/libc/pc_hw/mem/djmd.S
*** orig/src/libc/pc_hw/mem/djmd.S	Tue Mar 21 10:44:56 1995
--- mem.diff/src/libc/pc_hw/mem/djmd.S	Sun Mar 22 23:08:46 1998
***************
*** 1,39 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
! # This routine moves %ecx bytes from %ds:%esi to %es:%edi.  It clobbers
! # %eax, %ecx, %esi, %edi, and eflags.  The memory ranges must not overlap,
! # unless %esi >= %edi.
! 
! 	.file "djmd.s"
! 	.text
! 	.align 4
! 	.globl ___dj_movedata
! ___dj_movedata:
! 	cld
! 	cmpl	$15,%ecx
! 	jle	small_move
! 	jmp	mod_4_check
! 	
! 	# Transfer bytes until either %esi or %edi is aligned % 4
! align_mod_4:	
! 	movsb
! 	decl	%ecx
! mod_4_check:
! 	testl	$3,%esi
! 	jz big_move
! 	testl	$3,%edi
! 	jnz	align_mod_4
! 
! big_move:
! 	movb	%cl,%al	 # We will store leftover count in %al
! 	shrl	$2,%ecx
! 	andb	$3,%al
! 	rep
! 	movsl
  
! 	# %ecx known to be zero here, so insert the leftover count in %al
! 	movb	%al,%cl
  small_move:
! 	rep
! 	movsb
! 	ret
  
--- 1,93 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
! /*
!  * Copyright 1998 Vik Heyndrickx
!  *
!  * Written by Vik Heyndrickx <vheyndri AT rug DOT ac DOT be> for DJGPP
!  * Distribution and usage restriction: this file is part of DJGPP
!  *
!  * This software was written in the hope that it will be useful, but
!  * WITHOUT ANY WARRANTY, as well expressed as implied.
!  * The author disclaims all responsabilities on damage to data that might
!  * arise, directly or indirectly, from the intended usage or abuse of this
!  * software */
! 
! /* This routines copy %ecx bytes from %ds:%esi to %es:%edi
!    They clobber %eax, ((%ebx)), %ecx, %esi, %edi, and eflags.
! 
!    These routines are not to be called by a HLL application program directly,
!    but through interface functions.
! 
!    ___dj_copydata       assumes the src and dst region do not overlap
!    ___dj_movedata       works always, included for overlapping regions
!    ___dj_movedata_fw    works also if %edi <= %esi
!    ___dj_movedata_bw    works also if %esi <= %edi
  
!    ANY value in %ecx is valid as count parameter.
!    These functions are also optimized for a fast text-mode video write (they
!    pre-align their moves based upon the destination address only)
! 
!    BUG (feature): when two memory region overlap, but src and dst are in
!    different segments, their overlapping will go undetected, and therefore
!    the moving will fail. It is difficult (nearly impossible) to do otherwise.
!    That situation will probably never occur in a DJGPP program.
!    */
! 
!         .file "djmd.s"
!         .text
!         .globl ___dj_copydata
!         .globl ___dj_movedata
!         //.globl ___dj_movedata_fw
!         //.globl ___dj_movedata_bw
!         .align 4
! ___dj_copydata:
! ___dj_movedata_fw:
!         movl    %edi,%eax
!         negl    %eax
!         andl    $3,%eax         // calculate amount of pos's to align the dst
!         cld                     // reset direction flag (DF = 0)
!         subl    %eax,%ecx       // we cannot copy more bytes than necessary
!         jbe     small_move      // so jump to small count mover
!         xchgl   %ecx,%eax
!         rep
!         movsb                   // copy bytes until %edi gets lword aligned
!         shldl   $30,%eax,%ecx   // put number of lwords to copy in %ecx, TRICKY
!         andl    $3,%eax         // left over count (max. 3 bytes)
!         rep
!         movsl                   // big copy
  small_move:
!         addl    %eax,%ecx       // copy the remaining bytes
!         rep
!         movsb
!         ret
! 
!         .align 4
! ___dj_movedata:
!         //movl    %ds,%ax
!         //movl    %es,%bx
!         //cmpw    %ax,%bx       // test if same segment
!         //jne     ___dj_movedata_fw // by default assume not-overlapping
!         cmpl    %edi,%esi
!         jnb     ___dj_movedata_fw
! ___dj_movedata_bw:
!         addl    %ecx,%esi       // move to one position beyond the end
!         addl    %ecx,%edi
!         decl    %esi            // Oops, little big-endian (= blame Intel)
!         movl    %edi,%eax
!         std                     // set direction flag (DF = 1)
!         decl    %edi            // Oops, little big-endian (= blame Intel)
!         andl    $3,%eax         // calculate amount of pos's to align the dst
!         subl    %eax,%ecx       // we cannot copy more bytes than necessary
!         jbe     small_move      // so jump to small count mover
!         xchgl   %ecx,%eax
!         rep
!         movsb                   // copy bytes until %edi gets lword aligned
!         subl    $3,%esi         // Oops, little big-endian (= blame Intel)
!         subl    $3,%edi
!         shldl   $30,%eax,%ecx   // put number of lwords to copy in %ecx, TRICKY
!         andl    $3,%eax         // left over count (max. 3 bytes)
!         rep
!         movsl
!         addl    $3,%esi         // Oops, little big-endian (= blame Intel)
!         addl    $3,%edi
!         jmp     small_move
  
diff -r -c3 orig/src/libc/pc_hw/mem/md.S mem.diff/src/libc/pc_hw/mem/md.S
*** orig/src/libc/pc_hw/mem/md.S	Sat Mar 11 08:38:46 1995
--- mem.diff/src/libc/pc_hw/mem/md.S	Sun Mar 22 22:51:30 1998
***************
*** 1,4 ****
! /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
  #define USE_ESI
  #define USE_EDI
  #include <libc/asmdefs.h>
--- 1,4 ----
! /* Copyright (C) 1998 DJ Delorie, see COPYING.DJ for details */
  #define USE_ESI
  #define USE_EDI
  #include <libc/asmdefs.h>
***************
*** 6,25 ****
  	FUNC(___movedata)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushw	%ds
! 	pushw	%es
! 
! 	movw	ARG1,%ds
! 	movw	ARG3,%es
  
  	movl	ARG2,%esi
  	movl	ARG4,%edi
  	movl	ARG5,%ecx
  
! 	call	___dj_movedata
  
! 	popw	%es
! 	popw	%ds
  
  	LEAVE
  
--- 6,26 ----
  	FUNC(___movedata)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushl	%ds
! 	pushl	%es
  
+ 	movl	ARG1,%ds
  	movl	ARG2,%esi
+ 
+ 	movl	ARG3,%es
  	movl	ARG4,%edi
+ 
  	movl	ARG5,%ecx
  
! 	call	___dj_copydata
  
! 	popl	%es
! 	popl	%ds
  
  	LEAVE
  
diff -r -c3 orig/src/libc/pc_hw/mem/mdb.S mem.diff/src/libc/pc_hw/mem/mdb.S
*** orig/src/libc/pc_hw/mem/mdb.S	Tue Mar 21 10:45:12 1995
--- mem.diff/src/libc/pc_hw/mem/mdb.S	Sun Mar 22 22:51:42 1998
***************
*** 6,20 ****
  	FUNC(__movedatab)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushw	%ds
! 	pushw	%es
  
! 	movl	ARG1,%eax
! 	movw	%ax,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%eax
! 	movw	%ax,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
--- 6,18 ----
  	FUNC(__movedatab)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushl	%ds
! 	pushl	%es
  
! 	movl	ARG1,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
***************
*** 22,28 ****
  	rep
  	movsb
  
! 	popw	%es
! 	popw	%ds
  
  	LEAVE
--- 20,26 ----
  	rep
  	movsb
  
! 	popl	%es
! 	popl	%ds
  
  	LEAVE
diff -r -c3 orig/src/libc/pc_hw/mem/mdl.S mem.diff/src/libc/pc_hw/mem/mdl.S
*** orig/src/libc/pc_hw/mem/mdl.S	Tue Mar 21 10:45:08 1995
--- mem.diff/src/libc/pc_hw/mem/mdl.S	Sun Mar 22 22:49:22 1998
***************
*** 6,20 ****
  	FUNC(__movedatal)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushw	%ds
! 	pushw	%es
  
! 	movl	ARG1,%eax
! 	movw	%ax,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%eax
! 	movw	%ax,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
--- 6,18 ----
  	FUNC(__movedatal)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushl	%ds
! 	pushl	%es
  
! 	movl	ARG1,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
***************
*** 22,28 ****
  	rep
  	movsl
  
! 	popw	%es
! 	popw	%ds
  
  	LEAVE
--- 20,26 ----
  	rep
  	movsl
  
! 	popl	%es
! 	popl	%ds
  
  	LEAVE
diff -r -c3 orig/src/libc/pc_hw/mem/mdw.S mem.diff/src/libc/pc_hw/mem/mdw.S
*** orig/src/libc/pc_hw/mem/mdw.S	Tue Mar 21 10:45:16 1995
--- mem.diff/src/libc/pc_hw/mem/mdw.S	Sun Mar 22 22:49:56 1998
***************
*** 6,20 ****
  	FUNC(__movedataw)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushw	%ds
! 	pushw	%es
  
! 	movl	ARG1,%eax
! 	movw	%ax,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%eax
! 	movw	%ax,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
--- 6,18 ----
  	FUNC(__movedataw)	/* src_sel, src_ofs, dest_sel, dest_ofs, len */
  	ENTER
  
! 	pushl	%ds
! 	pushl	%es
  
! 	movl	ARG1,%ds
  	movl	ARG2,%esi
  
! 	movl	ARG3,%es
  	movl	ARG4,%edi
  
  	movl	ARG5,%ecx
***************
*** 22,28 ****
  	rep
  	movsw
  
! 	popw	%es
! 	popw	%ds
  
  	LEAVE
--- 20,26 ----
  	rep
  	movsw
  
! 	popl	%es
! 	popl	%ds
  
  	LEAVE
diff -r -c3 orig/src/libc/posix/fnmatch/fnmatch.c mem.diff/src/libc/posix/fnmatch/fnmatch.c
*** orig/src/libc/posix/fnmatch/fnmatch.c	Mon Mar 27 12:25:40 1995
--- mem.diff/src/libc/posix/fnmatch/fnmatch.c	Sun Mar 22 20:26:44 1998
***************
*** 78,87 ****
--- 78,89 ----
  
        /* optimize for pattern with * at end or before / */
        if (c == 0)
+       {
  	if (flags & FNM_PATHNAME)
  	  return find_slash(string) ? FNM_NOMATCH : 0;
  	else
  	  return 0;
+       }
        else if (isslash(c) && flags & FNM_PATHNAME)
        {
  	if ((string = find_slash(string)) == NULL)

--------------410C4B192C5--


- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019