To: djgpp-workers AT delorie DOT com References: <10210150631 DOT AA20605 AT clio DOT rice DOT edu> Message-Id: <2.7.9.1C51H.H41539@pauzner.dnttm.ru> From: "Leonid Pauzner" Date: Tue, 15 Oct 2002 19:45:09 +0400 (MSD) X-Mailer: dMail [Demos Mail for DOS v2.7.9] Subject: Re: libc' getenv optimization (patch2) MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Reply-To: djgpp-workers AT delorie DOT com 15-Oct-2002 01:28 Charles Sandmann wrote: > With the very sparse distribution of letters, I think it would be much > more space efficient to mask the lower 4 or 5 bits for the buckets > (16 or 32 should be fine, 256 is overkill). This also would speed up > the initial setup. This is the revised patch, I choose 16 bucket. diff -u -p old/getenv.c ./getenv.c --- old/getenv.c Fri Nov 24 22:21:18 1995 +++ ./getenv.c Tue Oct 15 19:23:14 2002 @@ -1,20 +1,41 @@ /* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ #include #include +#include extern char **environ; +/* + getenv optimization: we copy environ pointers into 16 (or 32) lists, + associated with the mask of the first letter of the text string. + This will speedup getenv by the factor of 4, or like. + Reset hash_env[] if environ was changed since the previous getenv call. +*/ +#define BUCKETS 16 + +char **hash_env[BUCKETS] = {}; /* init with NULLs*/ +void set_hash_env(void); + char * getenv(const char *name) { - int i; + static unsigned last_env_changed = 0; + char **envi; + + if (last_env_changed != __environ_changed) + { + last_env_changed = __environ_changed; + set_hash_env(); + } - if (environ == 0) + envi = hash_env[(unsigned char)*name % BUCKETS]; + if (envi == 0) return 0; - for (i=0; environ[i]; i++) + envi++; /* envi[0] is reserved */ + for ( ; *envi; envi++) { - char *ep = environ[i]; + char *ep = *envi; const char *np = name; while (*ep && *np && *ep == *np && *np != '=') ep++, np++; @@ -22,4 +43,61 @@ getenv(const char *name) return ep+1; } return 0; +} + + +void +set_hash_env(void) +{ + char **env = environ; + int size[BUCKETS]; + int i; + int alloc; + + for (i=0; i < BUCKETS; i++) + size[i] = 0; + while (*env) + { + i = (unsigned char)**env % BUCKETS; + size[i]++; + env++; + } + + /* (re)allocation: to avoid too many small realloc's + * we reserve hash_env[i][0] for allocated size info, using a cast. + * Do not forget it in this file to avoid off-by-one error. + */ + for (i=0; i < BUCKETS; i++) + { + if (size[i]) + { + if (!hash_env[i] || (int)hash_env[i][0] < size[i] + 2) + { + alloc = 2 * size[i] + 2; /* x2 step */ + hash_env[i] = (char **)realloc(hash_env[i], (alloc) * sizeof(char *)); + hash_env[i][0] = (char *)alloc; + hash_env[i][size[i]+1] = 0; + } + else + hash_env[i][size[i]+1] = 0; + } + else + { + if (hash_env[i]) + { + free(hash_env[i]); + hash_env[i] = 0; + } + } + } + /* (re)allocation done */ + + env = environ; + while (*env) + { + i = (unsigned char)**env % BUCKETS; + hash_env[i][size[i]] = *env; + size[i]--; + env++; + } }