Date: Tue, 27 Apr 1999 12:49:09 -0700 Message-Id: <199904271949.MAA13292@hermes.astro.washington.edu> From: Tom Quinn To: pgcc AT delorie DOT com Subject: stack misalignment in pgcc-1.1.3 Reply-To: pgcc AT delorie DOT com I've reported this before for earlier versions of pgcc, but it is still a problem with pgcc-1.1.3: The following program, when compiled with the double stack alignment flags, prints out the address of a double on the stack which is misaligned. gcc -v gives: Reading specs from /usr/local/lib/gcc-lib/i686-pc-linux-gnu/pgcc-2.91.66/specs gcc version pgcc-2.91.66 19990314 (egcs-1.1.2 release) This is on a Xeon running Redhat 5.2, Linux 2.2.3 and glibc 2.0.7. The compilation line is: gcc -v -Wall -mpentium -malign-double -mstack-align-double -O3 -g main.c -o stack -lm The result of running the program is: % ./stack msrInit: bffff7e4 showing that although the stack starts out aligned in main(), the double "tst" in msrInitialize() is not 8 byte aligned. This misalignment causes a factor of 2 (!) performance drop in our floating point loops. -- Tom Quinn Astronomy, University of Washington Internet: trq AT astro DOT washington DOT edu Phone: 206-685-9009 Here is the program run through "gcc -E": ------------------------------------------------------------------------ typedef unsigned char __u_char; typedef unsigned short __u_short; typedef unsigned int __u_int; typedef unsigned long __u_long; typedef unsigned long long int __u_quad_t; typedef long long int __quad_t; typedef __quad_t *__qaddr_t; typedef __u_quad_t __dev_t; typedef __u_int __uid_t; typedef __u_int __gid_t; typedef __u_long __ino_t; typedef __u_int __mode_t; typedef __u_int __nlink_t; typedef long int __off_t; typedef __quad_t __loff_t; typedef int __pid_t; typedef int __ssize_t; typedef struct { int __val[2]; } __fsid_t; typedef int __daddr_t; typedef char *__caddr_t; typedef long int __time_t; typedef long int __swblk_t; typedef long int __clock_t; typedef unsigned long int __fd_mask; typedef struct { __fd_mask fds_bits[1024 / (8 * sizeof (__fd_mask)) ]; } __fd_set; typedef int __key_t; typedef unsigned short int __ipc_pid_t; typedef unsigned int size_t; typedef unsigned int wint_t; typedef int _G_int16_t __attribute__ ((__mode__ (__HI__))); typedef int _G_int32_t __attribute__ ((__mode__ (__SI__))); typedef unsigned int _G_uint16_t __attribute__ ((__mode__ (__HI__))); typedef unsigned int _G_uint32_t __attribute__ ((__mode__ (__SI__))); typedef void *__gnuc_va_list; struct _IO_jump_t; struct _IO_FILE; typedef void _IO_lock_t; struct _IO_marker { struct _IO_marker *_next; struct _IO_FILE *_sbuf; int _pos; }; struct _IO_FILE { int _flags; char* _IO_read_ptr; char* _IO_read_end; char* _IO_read_base; char* _IO_write_base; char* _IO_write_ptr; char* _IO_write_end; char* _IO_buf_base; char* _IO_buf_end; char *_IO_save_base; char *_IO_backup_base; char *_IO_save_end; struct _IO_marker *_markers; struct _IO_FILE *_chain; int _fileno; int _blksize; __off_t _offset; unsigned short _cur_column; char _unused; char _shortbuf[1]; _IO_lock_t *_lock; }; typedef struct _IO_FILE _IO_FILE; struct _IO_FILE_plus; extern struct _IO_FILE_plus _IO_stdin_, _IO_stdout_, _IO_stderr_; typedef struct { __ssize_t (*read) (struct _IO_FILE *, void *, __ssize_t ) ; __ssize_t (*write) (struct _IO_FILE *, const void *, __ssize_t ) ; __off_t (*seek) (struct _IO_FILE *, __off_t , int) ; int (*close) (struct _IO_FILE *) ; } _IO_cookie_io_functions_t; struct _IO_cookie_file { struct _IO_FILE file; const void *vtable; void *cookie; _IO_cookie_io_functions_t io_functions; }; extern int __underflow (_IO_FILE*) ; extern int __uflow (_IO_FILE*) ; extern int __overflow (_IO_FILE*, int) ; extern int _IO_getc (_IO_FILE *__fp) ; extern int _IO_putc (int __c, _IO_FILE *__fp) ; extern int _IO_feof (_IO_FILE *__fp) ; extern int _IO_ferror (_IO_FILE *__fp) ; extern int _IO_peekc_locked (_IO_FILE *__fp) ; extern void _IO_flockfile (_IO_FILE *) ; extern void _IO_funlockfile (_IO_FILE *) ; extern int _IO_ftrylockfile (_IO_FILE *) ; extern int _IO_vfscanf (_IO_FILE*, const char*, __gnuc_va_list , int*) ; extern int _IO_vfprintf (_IO_FILE*, const char*, __gnuc_va_list ) ; extern __ssize_t _IO_padn (_IO_FILE *, int, __ssize_t ) ; extern size_t _IO_sgetn (_IO_FILE *, void*, size_t ) ; extern __off_t _IO_seekoff (_IO_FILE*, __off_t , int, int) ; extern __off_t _IO_seekpos (_IO_FILE*, __off_t , int) ; extern void _IO_free_backup_area (_IO_FILE*) ; typedef struct _IO_FILE FILE; typedef __off_t fpos_t; extern FILE *stdin, *stdout, *stderr; extern void clearerr (FILE*) ; extern int fclose (FILE*) ; extern int feof (FILE*) ; extern int ferror (FILE*) ; extern int fflush (FILE*) ; extern int fgetc (FILE *) ; extern int fgetpos (FILE* fp, fpos_t *__pos) ; extern char* fgets (char*, int, FILE*) ; extern FILE* fopen (__const char*, __const char*) ; extern FILE* fopencookie (void *__cookie, __const char *__mode, _IO_cookie_io_functions_t __io_functions) ; extern int fprintf (FILE*, __const char* __format, ...) ; extern int fputc (int, FILE*) ; extern int fputs (__const char *__str, FILE *__fp) ; extern size_t fread (void*, size_t, size_t, FILE*) ; extern FILE* freopen (__const char*, __const char*, FILE*) ; extern int fscanf (FILE *__fp, __const char* __format, ...) ; extern int fseek (FILE* __fp, long int __offset, int __whence) ; extern int fsetpos (FILE* __fp, __const fpos_t *__pos) ; extern long int ftell (FILE* __fp) ; extern size_t fwrite (__const void*, size_t, size_t, FILE*) ; extern int getc (FILE *) ; extern int getchar (void) ; extern char* gets (char*) ; extern void perror (__const char *) ; extern int printf (__const char* __format, ...) ; extern int putc (int, FILE *) ; extern int putchar (int) ; extern int puts (__const char *__str) ; extern int remove (__const char*) ; extern int rename (__const char* __old, __const char* __new) ; extern void rewind (FILE*) ; extern int scanf (__const char* format, ...) ; extern void setbuf (FILE*, char*) ; extern void setlinebuf (FILE*) ; extern void setbuffer (FILE*, char*, int) ; extern int setvbuf (FILE*, char*, int __mode, size_t __size) ; extern int sprintf (char*, __const char* __format, ...) ; extern int sscanf (__const char* string, __const char* __format, ...) ; extern FILE* tmpfile (void) ; extern char* tmpnam (char*) ; extern char* tmpnam_r (char*) ; extern char *tempnam (__const char *__dir, __const char *__pfx) ; extern char *__stdio_gen_tempname (char *__buf, size_t bufsize, __const char *dir, __const char *pfx, int dir_search, size_t *lenptr, FILE **streamptr) ; extern int ungetc (int c, FILE* fp) ; extern int vfprintf (FILE *fp, char __const *fmt0, __gnuc_va_list ) ; extern int vprintf (char __const *fmt, __gnuc_va_list ) ; extern int vsprintf (char* string, __const char* format, __gnuc_va_list ) ; extern void __libc_fatal (__const char *__message) __attribute__ ((__noreturn__)); extern int dprintf (int, __const char *, ...) ; extern int vdprintf (int, __const char *, __gnuc_va_list ) ; extern int vfscanf (FILE*, __const char *, __gnuc_va_list ) ; extern int __vfscanf (FILE*, __const char *, __gnuc_va_list ) ; extern int vscanf (__const char *, __gnuc_va_list ) ; extern int vsscanf (__const char *, __const char *, __gnuc_va_list ) ; extern int __vsscanf (__const char *, __const char *, __gnuc_va_list ) ; extern FILE *fdopen (int, __const char *) ; extern int fileno (FILE*) ; extern FILE* popen (__const char*, __const char*) ; extern int pclose (FILE*) ; extern char *ctermid (char *__buf) ; extern char *cuserid (char * __buf) ; extern int snprintf (char *, size_t, __const char *, ...) ; extern int __snprintf (char *, size_t, __const char *, ...) ; extern int vsnprintf (char *, size_t, __const char *, __gnuc_va_list ) ; extern int __vsnprintf (char *, size_t, __const char *, __gnuc_va_list ) ; extern int __underflow (struct _IO_FILE*) ; extern int __overflow (struct _IO_FILE*, int) ; extern int sys_nerr; extern const char *const sys_errlist[]; extern void clearerr_locked (FILE *) ; extern void clearerr_unlocked (FILE *) ; extern int feof_locked (FILE *) ; extern int feof_unlocked (FILE *) ; extern int ferror_locked (FILE*) ; extern int ferror_unlocked (FILE*) ; extern int fileno_locked (FILE *) ; extern int fileno_unlocked (FILE *) ; extern int fclose_unlocked (FILE *) ; extern int fflush_locked (FILE *) ; extern int fflush_unlocked (FILE *) ; extern size_t fread_unlocked (void *, size_t, size_t, FILE *) ; extern size_t fwrite_unlocked (const void *, size_t, size_t, FILE *) ; extern int fputc_locked (int, FILE*) ; extern int fputc_unlocked (int, FILE*) ; extern int getc_locked (FILE *) ; extern int getchar_locked (void) ; extern int putc_locked (int, FILE *) ; extern int putchar_locked (int) ; extern void flockfile (FILE *) ; extern void funlockfile (FILE *) ; extern int ftrylockfile (FILE *) ; extern int getc_unlocked (FILE *) ; extern int getchar_unlocked (void) ; extern int putc_unlocked (int, FILE *) ; extern int putchar_unlocked (int) ; extern int signgam; typedef enum { _IEEE_ = -1, _SVID_, _XOPEN_, _POSIX_ } _LIB_VERSION_TYPE; extern _LIB_VERSION_TYPE _LIB_VERSION; struct exception { int type; char *name; double arg1; double arg2; double retval; }; extern int __matherr (struct exception *) ; extern int matherr (struct exception *) ; union __convert_long_double { unsigned __convert_long_double_i[4]; long double __convert_long_double_d; }; typedef long int wchar_t; typedef struct { int quot; int rem; } div_t; typedef struct { long int quot; long int rem; } ldiv_t; extern int __ctype_get_mb_cur_max (void) ; extern double atof (__const char *__nptr) ; extern int atoi (__const char *__nptr) ; extern long int atol (__const char *__nptr) ; extern long long int atoll (__const char *__nptr) ; extern double strtod (__const char *__nptr, char **__endptr) ; extern long int strtol (__const char *__nptr, char **__endptr, int __base) ; extern unsigned long int strtoul (__const char *__nptr, char **__endptr, int __base) ; extern long long int strtoq (__const char *__nptr, char **__endptr, int __base) ; extern unsigned long long int strtouq (__const char *__nptr, char **__endptr, int __base) ; extern long long int strtoll (__const char *__nptr, char **__endptr, int __base) ; extern unsigned long long int strtoull (__const char *__nptr, char **__endptr, int __base) ; extern double __strtod_internal (__const char *__nptr, char **__endptr, int __group) ; extern long double __strtold_internal (__const char *__nptr, char **__endptr, int __group) ; extern long int __strtol_internal (__const char *__nptr, char **__endptr, int __base, int __group) ; extern unsigned long int __strtoul_internal (__const char *__nptr, char **__endptr, int __base, int __group) ; extern long long int __strtoq_internal (__const char *__nptr, char **__endptr, int __base, int __group) ; extern unsigned long long int __strtouq_internal (__const char *__nptr, char **__endptr, int __base, int __group) ; extern char *l64a (long int __n) ; extern long int a64l (__const char *__s) ; typedef __u_char u_char; typedef __u_short u_short; typedef __u_int u_int; typedef __u_long u_long; typedef __quad_t quad_t; typedef __u_quad_t u_quad_t; typedef __fsid_t fsid_t; typedef __dev_t dev_t; typedef __gid_t gid_t; typedef __ino_t ino_t; typedef __mode_t mode_t; typedef __nlink_t nlink_t; typedef __off_t off_t; typedef __loff_t loff_t; typedef __pid_t pid_t; typedef __uid_t uid_t; typedef __ssize_t ssize_t; typedef __daddr_t daddr_t; typedef __caddr_t caddr_t; typedef __key_t key_t; typedef __time_t time_t; typedef unsigned long int ulong; typedef unsigned short int ushort; typedef unsigned int uint; typedef int int8_t __attribute__ ((__mode__ ( __QI__ ))) ; typedef unsigned int u_int8_t __attribute__ ((__mode__ ( __QI__ ))) ; typedef int int16_t __attribute__ ((__mode__ ( __HI__ ))) ; typedef unsigned int u_int16_t __attribute__ ((__mode__ ( __HI__ ))) ; typedef int int32_t __attribute__ ((__mode__ ( __SI__ ))) ; typedef unsigned int u_int32_t __attribute__ ((__mode__ ( __SI__ ))) ; typedef int int64_t __attribute__ ((__mode__ ( __DI__ ))) ; typedef unsigned int u_int64_t __attribute__ ((__mode__ ( __DI__ ))) ; typedef int register_t __attribute__ ((__mode__ (__word__))); struct timespec { long int tv_sec; long int tv_nsec; }; struct timeval; typedef __fd_mask fd_mask; typedef __fd_set fd_set; extern int __select (int __nfds, __fd_set *__readfds, __fd_set *__writefds, __fd_set *__exceptfds, struct timeval *__timeout) ; extern int select (int __nfds, __fd_set *__readfds, __fd_set *__writefds, __fd_set *__exceptfds, struct timeval *__timeout) ; extern int __pselect (int __nfds, __fd_set *__readfds, __fd_set *__writefds, __fd_set *__exceptfds, struct timespec *__timeout) ; extern int pselect (int __nfds, __fd_set *__readfds, __fd_set *__writefds, __fd_set *__exceptfds, struct timespec *__timeout) ; extern void * malloc (size_t __size) ; extern void * realloc (void * __ptr, size_t __size) ; extern void * calloc (size_t __nmemb, size_t __size) ; extern void free (void * __ptr) ; extern void cfree (void * __ptr) ; extern void * __alloca (size_t __size) ; extern void * alloca (size_t __size) ; extern void * valloc (size_t __size) ; extern void abort (void) __attribute__ ((__noreturn__)); extern int atexit (void (*__func) (void)) ; extern int __on_exit (void (*__func) (int __status, void * __arg), void * __arg) ; extern int on_exit (void (*__func) (int __status, void * __arg), void * __arg) ; extern void exit (int __status) __attribute__ ((__noreturn__)); extern char *getenv (__const char *__name) ; extern char *__secure_getenv (__const char *__name) ; extern int putenv (__const char *__string) ; extern int setenv (__const char *__name, __const char *__value, int __replace) ; extern void unsetenv (__const char *__name) ; extern int __clearenv (void) ; extern int clearenv (void) ; extern char *mktemp (char *__template) ; extern int mkstemp (char *__template) ; extern int system (__const char *__command) ; extern char *realpath (__const char *__name, char *__resolved) ; typedef int (*__compar_fn_t) (__const void * , __const void * ) ; extern void * bsearch (__const void * __key, __const void * __base, size_t __nmemb, size_t __size, __compar_fn_t __compar) ; extern void qsort (void * __base, size_t __nmemb, size_t __size, __compar_fn_t __compar) ; extern int abs (int __x) __attribute__ ((__const__)); extern long int labs (long int __x) __attribute__ ((__const__)); extern div_t div (int __numer, int __denom) __attribute__ ((__const__)); extern ldiv_t ldiv (long int __numer, long int __denom) __attribute__ ((__const__)); extern char *ecvt (double __value, int __ndigit, int *__decpt, int *__sign) ; extern char *fcvt (double __value, int __ndigit, int *__decpt, int *__sign) ; extern char *gcvt (double __value, int __ndigit, char *__buf) ; extern char *qecvt (long double __value, int __ndigit, int *__decpt, int *__sign) ; extern char *qfcvt (long double __value, int __ndigit, int *__decpt, int *__sign) ; extern char *qgcvt (long double __value, int __ndigit, char *__buf) ; extern int ecvt_r (double __value, int __ndigit, int *__decpt, int *__sign, char *__buf, size_t __len) ; extern int fcvt_r (double __value, int __ndigit, int *__decpt, int *__sign, char *__buf, size_t __len) ; extern int qecvt_r (long double __value, int __ndigit, int *__decpt, int *__sign, char *__buf, size_t __len) ; extern int qfcvt_r (long double __value, int __ndigit, int *__decpt, int *__sign, char *__buf, size_t __len) ; extern int mblen (__const char *__s, size_t __n) ; extern int mbtowc (wchar_t *__pwc, __const char *__s, size_t __n) ; extern int wctomb (char *__s, wchar_t __wchar) ; extern size_t mbstowcs (wchar_t *__pwcs, __const char *__s, size_t __n) ; extern size_t wcstombs (char *__s, __const wchar_t *__pwcs, size_t __n) ; extern int rpmatch (__const char *__response) ; extern void * memcpy (void * __dest, __const void * __src, size_t __n) ; extern void * memmove (void * __dest, __const void * __src, size_t __n) ; extern void * __memccpy (void * __dest, __const void * __src, int __c, size_t __n) ; extern void * memccpy (void * __dest, __const void * __src, int __c, size_t __n) ; extern void * memset (void * __s, int __c, size_t __n) ; extern int memcmp (__const void * __s1, __const void * __s2, size_t __n) ; extern void * memchr (__const void * __s, int __c, size_t __n) ; extern char *strcpy (char *__dest, __const char *__src) ; extern char *strncpy (char *__dest, __const char *__src, size_t __n) ; extern char *strcat (char *__dest, __const char *__src) ; extern char *strncat (char *__dest, __const char *__src, size_t __n) ; extern int strcmp (__const char *__s1, __const char *__s2) ; extern int strncmp (__const char *__s1, __const char *__s2, size_t __n) ; extern int strcoll (__const char *__s1, __const char *__s2) ; extern size_t strxfrm (char *__dest, __const char *__src, size_t __n) ; extern char *__strdup (__const char *__s) ; extern char *strdup (__const char *__s) ; extern char *__strndup (__const char *__string, size_t __n) ; extern char *strchr (__const char *__s, int __c) ; extern char *strrchr (__const char *__s, int __c) ; extern size_t strcspn (__const char *__s, __const char *__reject) ; extern size_t strspn (__const char *__s, __const char *__accept) ; extern char *strpbrk (__const char *__s, __const char *__accept) ; extern char *strstr (__const char *__haystack, __const char *__needle) ; extern char *strtok (char *__s, __const char *__delim) ; extern char *strtok_r (char *__s, __const char *__delim, char **__save_ptr) ; extern size_t strlen (__const char *__s) ; extern char *strerror (int __errnum) ; extern char *__strerror_r (int __errnum, char *__buf, size_t __buflen) ; extern char *strerror_r (int __errnum, char *__buf, size_t __buflen) ; extern void bcopy (__const void * __src, void * __dest, size_t __n) ; extern void bzero (void * __s, size_t __n) ; extern int bcmp (__const void * __s1, __const void * __s2, size_t __n) ; extern char *index (__const char *__s, int __c) ; extern char *rindex (__const char *__s, int __c) ; extern int ffs (int __i) ; extern int __strcasecmp (__const char *__s1, __const char *__s2) ; extern int strcasecmp (__const char *__s1, __const char *__s2) ; extern int __strncasecmp (__const char *__s1, __const char *__s2, size_t __n) ; extern int strncasecmp (__const char *__s1, __const char *__s2, size_t __n) ; extern char *__strsep (char **__stringp, __const char *__delim) ; extern char *strsep (char **__stringp, __const char *__delim) ; extern char *basename (__const char *__filename) ; typedef __clock_t clock_t; struct tm { int tm_sec; int tm_min; int tm_hour; int tm_mday; int tm_mon; int tm_year; int tm_wday; int tm_yday; int tm_isdst; long int tm_gmtoff; __const char *tm_zone; }; extern clock_t clock (void) ; extern time_t time (time_t *__timer) ; extern double difftime (time_t __time1, time_t __time0) __attribute__ ((__const__)); extern time_t mktime (struct tm *__tp) ; extern time_t __mktime_internal (struct tm *__tp, struct tm *(*__func) (const time_t *, struct tm *), time_t *__offset) ; extern size_t strftime (char *__s, size_t __maxsize, __const char *__format, __const struct tm *__tp) ; extern struct tm *gmtime (__const time_t *__timer) ; extern struct tm *localtime (__const time_t *__timer) ; extern struct tm *__gmtime_r (__const time_t *__timer, struct tm *__tp) ; extern struct tm *gmtime_r (__const time_t *__timer, struct tm *__tp) ; extern struct tm *__localtime_r (__const time_t *__timer, struct tm *__tp) ; extern struct tm *localtime_r (__const time_t *__timer, struct tm *__tp) ; extern int __offtime (__const time_t *__timer, long int __offset, struct tm *__TP) ; extern char *asctime (__const struct tm *__tp) ; extern char *ctime (__const time_t *__timer) ; extern char *__asctime_r (__const struct tm *__tp, char *__buf) ; extern char *asctime_r (__const struct tm *__tp, char *__buf) ; extern char *ctime_r (__const time_t *__timer, char *__buf) ; extern char *__tzname[2]; extern int __daylight; extern long int __timezone; extern void __tzset (void) ; extern char *tzname[2]; extern long int __tzname_max (void) ; extern void tzset (void) ; extern int daylight; extern long int timezone; extern int stime (__const time_t *__when) ; extern time_t timegm (struct tm *__tp) ; extern time_t timelocal (struct tm *__tp) ; extern int dysize (int __year) ; extern int __nanosleep (__const struct timespec *__requested_time, struct timespec *__remaining) ; extern int nanosleep (__const struct timespec *__requested_time, struct timespec *__remaining) ; extern void __assert_fail (__const char *__assertion, __const char *__file, unsigned int __line, __const char *__function) __attribute__ ((__noreturn__)); extern void __assert_perror_fail (int __errnum, __const char *__file, unsigned int __line, __const char *__function) __attribute__ ((__noreturn__)); typedef struct mdlContext { int nThreads; } * MDL; int mdlInitialize(MDL *,char **,void (*)(MDL)); typedef struct msrContext { MDL mdl; } * MSR; void msrInitialize(MSR *,MDL,int,char **); void main_ch(MDL mdl) { } int main(int argc,char **argv) { MDL mdl; MSR msr; double dTime; long lStart; if((((int) &dTime) & 7) != 0) { fprintf(((_IO_FILE*)(&_IO_stderr_)) , "main: Stack misaligned\n"); } setbuf(((_IO_FILE*)(&_IO_stdout_)) ,(char *) ((void *)0) ); lStart=time(0); mdlInitialize(&mdl,argv,main_ch); msrInitialize(&msr,mdl,argc,argv); return 0; } void msrInitialize(MSR *pmsr,MDL mdl,int argc,char **argv) { MSR msr; double tst; fprintf(((_IO_FILE*)(&_IO_stderr_)) , "msrInit: %x\n", &tst); msr = (MSR)malloc(sizeof(struct msrContext)); ((void) (( msr != ((void *)0) ) ? 0 : (__assert_fail ("msr != ((void *)0)" , "save.c", 50, __PRETTY_FUNCTION__ ), 0))) ; *pmsr = msr; } int mdlInitialize(MDL *pmdl,char **argv,void (*fcnChild)(MDL)) { MDL mdl; *pmdl = ((void *)0) ; mdl = malloc(sizeof(struct mdlContext)); ((void) (( mdl != ((void *)0) ) ? 0 : (__assert_fail ("mdl != ((void *)0)" , "save.c", 60, __PRETTY_FUNCTION__ ), 0))) ; return(1); }