Sender: rich AT phekda DOT freeserve DOT co DOT uk Message-ID: <3DAD4230.B4F990C4@phekda.freeserve.co.uk> Date: Wed, 16 Oct 2002 11:40:48 +0100 From: Richard Dawe X-Mailer: Mozilla 4.77 [en] (X11; U; Linux 2.2.19 i586) X-Accept-Language: de,fr MIME-Version: 1.0 To: djgpp-workers AT delorie DOT com Subject: Re: libc' getenv optimization (patch2) References: <10210150631 DOT AA20605 AT clio DOT rice DOT edu> <2 DOT 7 DOT 9 DOT 1C51H DOT H41539 AT pauzner DOT dnttm DOT ru> Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Reply-To: djgpp-workers AT delorie DOT com Hello. Leonid Pauzner wrote: [snip] > This is the revised patch, I choose 16 bucket. > > diff -u -p old/getenv.c ./getenv.c > --- old/getenv.c Fri Nov 24 22:21:18 1995 > +++ ./getenv.c Tue Oct 15 19:23:14 2002 > @@ -1,20 +1,41 @@ > /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ > #include > #include > +#include > > extern char **environ; > > +/* > + getenv optimization: we copy environ pointers into 16 (or 32) lists, > + associated with the mask of the first letter of the text string. > + This will speedup getenv by the factor of 4, or like. > + Reset hash_env[] if environ was changed since the previous getenv call. > +*/ > +#define BUCKETS 16 > + > +char **hash_env[BUCKETS] = {}; /* init with NULLs*/ > +void set_hash_env(void); > + These should be declared as static, to avoid polluting the namespace when people use getenv. > char * > getenv(const char *name) > { > - int i; > + static unsigned last_env_changed = 0; > + char **envi; > + > + if (last_env_changed != __environ_changed) > + { > + last_env_changed = __environ_changed; > + set_hash_env(); > + } > > - if (environ == 0) > + envi = hash_env[(unsigned char)*name % BUCKETS]; > + if (envi == 0) > return 0; > > - for (i=0; environ[i]; i++) > + envi++; /* envi[0] is reserved */ > + for ( ; *envi; envi++) > { > - char *ep = environ[i]; > + char *ep = *envi; > const char *np = name; > while (*ep && *np && *ep == *np && *np != '=') > ep++, np++; > @@ -22,4 +43,61 @@ getenv(const char *name) > return ep+1; > } > return 0; > +} > + > + > +void > +set_hash_env(void) > +{ > + char **env = environ; > + int size[BUCKETS]; > + int i; > + int alloc; > + > + for (i=0; i < BUCKETS; i++) > + size[i] = 0; > + while (*env) > + { > + i = (unsigned char)**env % BUCKETS; > + size[i]++; > + env++; > + } > + > + /* (re)allocation: to avoid too many small realloc's > + * we reserve hash_env[i][0] for allocated size info, using a cast. > + * Do not forget it in this file to avoid off-by-one error. > + */ Aargh. I hate it when people use pointers to store ints and vice-versa. What's wrong with having a separate array to hold the size information? It would make the code more readable too. > + for (i=0; i < BUCKETS; i++) > + { > + if (size[i]) > + { > + if (!hash_env[i] || (int)hash_env[i][0] < size[i] + 2) > + { > + alloc = 2 * size[i] + 2; /* x2 step */ > + hash_env[i] = (char **)realloc(hash_env[i], (alloc) * sizeof(char *)); You shouldn't cast the return value of malloc, calloc and realloc. That can hide errors, if you forget to include . > + hash_env[i][0] = (char *)alloc; > + hash_env[i][size[i]+1] = 0; > + } > + else > + hash_env[i][size[i]+1] = 0; > + } > + else > + { > + if (hash_env[i]) > + { > + free(hash_env[i]); > + hash_env[i] = 0; > + } > + } > + } > + /* (re)allocation done */ > + > + env = environ; > + while (*env) > + { > + i = (unsigned char)**env % BUCKETS; > + hash_env[i][size[i]] = *env; > + size[i]--; > + env++; > + } > } Bye, Rich =] -- Richard Dawe [ http://www.phekda.freeserve.co.uk/richdawe/ ]