To: mlist-djgpp AT nntp-server DOT caltech DOT edu Path: rollins From: rollins AT grue DOT caltech DOT edu (hr) Newsgroups: mlist.djgpp Subject: [bug] ebp optimization problem Date: 30 Apr 1995 19:31:56 GMT Organization: California Institute of Technology, Pasadena Lines: 119 Nntp-Posting-Host: grue.caltech.edu #if 0 hi, I think I found an optimization bug in djgpp. I compiled with: gcc -m486 -O6 -ffast-math -fstrength-reduce -funroll-loops -fomit-frame-pointer -finline-functions -fexpensive-optimizations -Wall Using the -fomit-frame-pointer caused ebp to be used for indexing an array in the data segment, however, no ds: override was produced: .data _shifts2: .byte 0 .byte 0 .byte 1 .byte 1 .byte 1 .byte 1 .byte 1 .byte 1 .byte 0 .byte 1 .byte 1 .byte 1 .byte 1 .byte 1 .byte 1 .byte 0 .text .align 4 .globl _des_set_key _des_set_key: ... L5: cmpb $0,_shifts2(%ebp) ; should have ds: override!!!! je L6 ... This causes a stack fault under DPMI but doesn't crash otherwise. The C code to produce this follows. -hr #endif #include extern unsigned long skb[8][64]; #define PERM_OP(a,b,t,n,m) ((t)=((((a)>>(n))^(b))&(m)),\ (b)^=(t),\ (a)^=((t)<<(n))) #define HPERM_OP(a,t,n,m) ((t)=((((a)<<(16-(n)))^(a))&(m)),\ (a)=(a)^(t)^(t>>(16-(n))))\ static char shifts2[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; int des_set_key(char *key, char *schedule) { register unsigned long c,d,t,s; register unsigned char *in; register unsigned long *k; register int i; k=(unsigned long *)schedule; in=(unsigned char *)key; c = schedule[0]; d = schedule[1]; /* I now do it in 47 simple operations :-) * for the inspiration. :-) */ PERM_OP (d,c,t,4,0x0f0f0f0f); HPERM_OP(c,t,-2,0xcccc0000); HPERM_OP(d,t,-2,0xcccc0000); PERM_OP (d,c,t,1,0x55555555); PERM_OP (c,d,t,8,0x00ff00ff); PERM_OP (d,c,t,1,0x55555555); d= (((d&0x000000ff)<<16)| (d&0x0000ff00) | ((d&0x00ff0000)>>16)|((c&0xf0000000)>>4)); c&=0x0fffffff; for (i=0; i<16; i++) { if (shifts2[i]) { c=((c>>2)|(c<<26)); d=((d>>2)|(d<<26)); } else { c=((c>>1)|(c<<27)); d=((d>>1)|(d<<27)); } c&=0x0fffffff; d&=0x0fffffff; /* could be a few less shifts but I am to lazy at this * point in time to investigate */ s= skb[0][ (c )&0x3f ]| skb[1][((c>> 6)&0x03)|((c>> 7)&0x3c)]| skb[2][((c>>13)&0x0f)|((c>>14)&0x30)]| skb[3][((c>>20)&0x01)|((c>>21)&0x06) | ((c>>22)&0x38)]; t= skb[4][ (d )&0x3f ]| skb[5][((d>> 7)&0x03)|((d>> 8)&0x3c)]| skb[6][ (d>>15)&0x3f ]| skb[7][((d>>21)&0x0f)|((d>>22)&0x30)]; /* table contained 0213 4657 */ *(k++)=((t<<16)|(s&0x0000ffff))&0xffffffff; s= ((s>>16)|(t&0xffff0000)); s=(s<<4)|(s>>28); *(k++)=s&0xffffffff; } return(0); }