delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp/1997/01/23/12:09:31

Date: Thu, 23 Jan 1997 18:58:13 +0200 (IST)
From: Eli Zaretskii <eliz AT is DOT elta DOT co DOT il>
To: j DOT potman AT student DOT utwente DOT nl
cc: djgpp AT delorie DOT com
Subject: Re: program with profiling info runs slower with DJGPP 2.01
In-Reply-To: <73048.s9404198@student.utwente.nl>
Message-ID: <Pine.SUN.3.91.970123184724.1293A-100000@is>
MIME-Version: 1.0

On Thu, 23 Jan 1997, Jordy Potman wrote:

> be compared at all and that version 2.01 seems to be faster. But I'm 
> still sure that the v2.01 version is a lot slower, because you notice 
> it when running both versions. The v2.01 version responds slower to the 
> keyboard commands and updates the screen at a lower frame rate.
> This only happens when I use the -pg switch with compiling and linking, 
> without profiling the v2.01 version runs fine.

There might be a reason to this which is totally unrelated to your 
program.  Recently, a bug has been discovered in a library function which 
is only linked in when you compile with -pg.  This bug causes a call to 
another library function (`_mono_printf') which tries to write to the 
(probably absent) extra mono display.  On some configurations, this will 
just crash the program being profiled, but if you run on a machine where 
the relevant addresses aren't remapped by the memory manager (to make 
UMB possible), then the program might run.  However, I can imagine a 
configuration where this bug slows down the program instead of crashing 
it.  Since this bug was introduced with v2.01, it might be the cause of 
what you see.

I attach below a corrected source for the library function in point.  
Compile it, put it into your libc.a and see if that helps.  Here's what 
you should do:

	gcc -Wall -c -O3 mcount.c
	ar rvs c:/djgpp/lib/libc.a mcount.o

(change the pathname of libc.a as appropriate for your installation).  
Then relink your program with -pg and try again.

----------------------------- mcount.c ---------------------------------
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
#include <libc/stubs.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
#include <sys/time.h>
#include <sys/exceptn.h>
#include <sys/mono.h>

/* header of a GPROF type file
*/
typedef struct {
  long low;
  long high;
  long nbytes;
} header;

/* entry of a GPROF type file
*/
typedef struct {
    unsigned long from;
    unsigned long to;
    unsigned long count;
} MTABE;

/* internal form - sizeof(MTAB) is 4096 for efficiency
*/
typedef struct MTAB {
  MTABE calls[341];
  struct MTAB *prev;
} MTAB;

static header h;
static short *histogram;
static int mcount_skip = 1;
static int histlen;
static MTAB *mtab=0;

extern int etext;

/* called by functions.  Use the pointer it provides to cache
** the last used MTABE, so that repeated calls to/from the same
** pair works quickly - no lookup.
*/
void mcount(int _to);
void mcount(int _to)
{
  MTAB *m;
  int i;
  int to;
  int ebp;
  int from;
  int mtabi;
  MTABE **cache;

  if (&_to < &etext)
    *(int *)(-1) = 0; /* fault! */

  mcount_skip = 1;
  asm("movl %%edx,%0" : "=g" (cache)); /* obtain the cached pointer */
  to = *((&_to)-1) - 12;
  ebp = *((&_to)-2); /* glean the caller's return address from the stack */
  from = ((int *)ebp)[1];
  if (*cache && ((*cache)->from == from) && ((*cache)->to == to))
  {
    /* cache paid off - works quickly */
    (*cache)->count++;
    mcount_skip = 0;
    return;
  }

  /* no cache hit - search all mtab tables for a match, or an empty slot */
  mtabi = -1;
  for (m=mtab; m; m=m->prev)
  {
    for (i=0; i<341; i++)
    {
      if (m->calls[i].from == 0)
      {
        /* empty slot - end of table */
        mtabi = i;
        break;
      }
      if ((m->calls[i].from == from) &&
          (m->calls[i].to == to))
        {
          /* found a match - bump count and return */
          m->calls[i].count ++;
          *cache = m->calls + i;
          mcount_skip = 0;
          return;
        }
    }
  }
  if (mtabi != -1)
  {
    /* found an empty - fill it in */
    mtab->calls[mtabi].from = from;
    mtab->calls[mtabi].to = to;
    mtab->calls[mtabi].count = 1;
    *cache = mtab->calls + mtabi;
    mcount_skip = 0;
    return;
  }
  /* lob off another page of memory and initialize the new table */
  m = (MTAB *)sbrk(sizeof(MTAB));
  memset(m, 0, sizeof(MTAB));
  m->prev = mtab;
  mtab = m;
  m->calls[0].from = from;
  m->calls[0].to = to;
  m->calls[0].count = 1;
  *cache = m->calls;
  mcount_skip = 0;
}

/* this is called during program exit (installed by atexit). */
static void
mcount_write(void)
{
  MTAB *m;
  int i, f;
  struct itimerval new_values;

  mcount_skip = 1;

  /* disable timer */
  new_values.it_value.tv_usec = new_values.it_interval.tv_usec = 0;   
  new_values.it_value.tv_sec = new_values.it_interval.tv_sec = 0;
  setitimer(ITIMER_PROF, &new_values, NULL);

  f = open("gmon.out", O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0666);
  write(f, &h, sizeof(header));
  write(f, histogram, histlen);
  for (m=mtab; m; m=m->prev)
  {
    for (i=0; i<341; i++)
      if (m->calls[i].from == 0)
        break;
    write(f, m->calls, i*12);
  }
  close(f);
}

extern unsigned start __asm__ ("start");
#define START (unsigned)&start
extern int etext;

/* ARGSUSED */
static void
mcount_tick(int _x)
{
  unsigned bin;
  
  if(!mcount_skip) {
    bin = __djgpp_exception_state->__eip;
    if(bin >= START && bin <= (unsigned)&etext) {
      bin = (bin - START) / 4;	/* 4 EIP's per bin */
      histogram[bin]++;
    }
  }
}

/* this is called to initialize profiling before the program starts */
void _mcount_init(void);
void
_mcount_init(void)
{
  struct itimerval new_values;

  h.low = START;
  h.high = (int)&etext;
  histlen = (h.high-h.low)/4*sizeof(short);
  h.nbytes = sizeof(header) + histlen;
  histogram = (short *)sbrk(histlen);
  memset(histogram, 0, histlen);
  atexit(mcount_write);

  /* here, do whatever it takes to initialize the timer interrupt */
  signal(SIGPROF, mcount_tick);

  /* 18.2 tics per second */
  new_values.it_value.tv_usec = new_values.it_interval.tv_usec = 5494;
  new_values.it_value.tv_sec = new_values.it_interval.tv_sec = 0;

  setitimer(ITIMER_PROF, &new_values, NULL);

  mcount_skip = 0;
}

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019