delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp/2000/04/12/13:10:05

From: "Alexei A. Frounze" <alex DOT fru AT mtu-net DOT ru>
Newsgroups: comp.os.msdos.djgpp
Subject: Re: inefficiency of GCC output code & -O problem
Date: Wed, 12 Apr 2000 19:46:13 +0400
Organization: MTU-Intel ISP
Lines: 730
Message-ID: <38F49A45.13F0AB1@mtu-net.ru>
References: <38F20E7A DOT 3330E9A4 AT mtu-net DOT ru> <38F23A21 DOT A59621A1 AT inti DOT gov DOT ar>
NNTP-Posting-Host: ppp98-177.dialup.mtu-net.ru
Mime-Version: 1.0
X-Trace: gavrilo.mtu.ru 955554391 54168 212.188.98.177 (12 Apr 2000 15:46:31 GMT)
X-Complaints-To: usenet-abuse AT mtu DOT ru
NNTP-Posting-Date: 12 Apr 2000 15:46:31 GMT
X-Mailer: Mozilla 4.61 [en] (Win95; I)
X-Accept-Language: en,ru
To: djgpp AT delorie DOT com
DJ-Gateway: from newsgroup comp.os.msdos.djgpp
Reply-To: djgpp AT delorie DOT com

This is a multi-part message in MIME format.
--------------6DF912F1089F64E61B72F12B
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Here goes a part of my project. I simply removed as much code as needed to leave
it along.

Well, it still isn't compiled with the -O2 switch, although it's okay w/o it.

See the attchment... I renamed the file from .C to .TXT because Netscape treats
as a binary file due to file associacions. :))

Thanks.
Alexei A. Frounze
-----------------------------------------
Homepage: http://alexfru.chat.ru
Mirror:   http://members.xoom.com/alexfru

salvador wrote:
> > 2nd question...
> >
> > Why the "-O2" switch works normally for pure C source code and makes compiler
> > failing on the source with inline assembly (in the .S file made out of such .C
> > an error encounters:
> > "Error: Error: Missing ')' assumed"
> > "Error: Error: Ignoring junk `(%ebp))' after expression")?
> > W/o the -O2 switch it's compiled fine. Isn't it a little bit strange?
> 
> Can you show a small example? I guess that's an error in your inline assembler
> code, but I can't know without the actual code.
--------------6DF912F1089F64E61B72F12B
Content-Type: text/plain; charset=us-ascii;
 name="Tmapping.txt"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Tmapping.txt"

/***************************************************/
/* Written by Alexei A. Frounze on                 */
/*                  12 of jan  1999 ... 5 mar 2000 */
/*                                                 */
/* Compiler: DJGPP 2.95.2                          */
/*                                                 */
/* E-mail  : alexfru AT chat DOT ru                       */
/* Homepage: http://www.chat.ru/~alexfru           */
/*           http://members.xoom.com/alexfru       */
/***************************************************/

// See Mark Feldman's article about texture mapping:
// "http://www.geocities.com/SiliconValley/2151"
// and Mikael Kalms' article about texture mapping:
// "http://www.lysator.liu.se/~mikaelk/doc/perspectivetexture/"

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

typedef struct {
  double X, Y, Z;
} Vector;

#define Screen_Width    320
#define Screen_Height   200
#define Center_X        160
#define Center_Y        100
#define y2addr(y)       ((y<<8)+(y<<6))
#define VGAAddr         NULL

#define KP              256
#define Max_Edges       64
#define ZClose          0.01

#define SUB_BITS        4
#define SUB_CNT         (1 << SUB_BITS)
#define FP_BITS         (16-SUB_BITS)

                        /* ORIGINAL POLYGON DATA */

// Number of polygon vertices
int     VCNT;
// Verices of a polygon in a 3d space
Vector  vertices[Max_Edges+1];
// Appropriate u and v values
double  tu[Max_Edges+1], tv[Max_Edges+1];

                        /* TEMPORARY POLYGON DATA */

// Number of vertices at the screen
int     _VCNT;
// Clipped polygon vertices
Vector  _Vertices[Max_Edges+1];
// Appropriate u and v values
double  _tu[Max_Edges+1], _tv[Max_Edges+1];
// Screen verices of a polygon
double  XR[Max_Edges+1], YR[Max_Edges+1];
// Buffers for projecting a polygon. Left & right screen boundaries:
int     Scan_Buf_L[Screen_Height], Scan_Buf_R[Screen_Height];
double  inv_z_L[Screen_Height], u_div_z_L[Screen_Height], v_div_z_L[Screen_Height];
double  inv_z_R[Screen_Height], u_div_z_R[Screen_Height], v_div_z_R[Screen_Height];
double  Scan_Buf_L2[Screen_Height], Scan_Buf_R2[Screen_Height];

                        /* OTHER VARIABLES */

// Minimal & maximal screen X and Y (integer) of a polygon:
int     YminI, YmaxI, XminI, XmaxI;

// Boolean array for tmapper. It is used to clip polygons with
// Z=ZClose plain
int Behind[Max_Edges+1];

// Linear interpolation choosing (0=none,1=16pxls)
int SubDiv=1;

                        /* FUNCTIONS */


// This one finds screen min & max X for each Y of polgon's edge
void Scan_Edge (int N) {
  double X, DX;
  int IsLeftEdge;
  double X1R, Y1R, X2R, Y2R;
  int Y1I, Y2I, XI, YI;

  double iz1, iz2, uz1, uz2, vz1, vz2;
  double iz, uz, vz, diz, duz, dvz;
  double ddd;
  int M;
  short  SW, LW = 0x1B3F;

  M = (N+1)%_VCNT;

  X1R = XR[N]; Y1R = YR[N];
  iz1 = (double)1/_Vertices[N].Z;
  uz1 = _tu[N] * iz1;
  vz1 = _tv[N] * iz1;

  X2R = XR[M]; Y2R = YR[M];
  iz2 = (double)1/_Vertices[M].Z;
  uz2 = _tu[M] * iz2;
  vz2 = _tv[M] * iz2;

  IsLeftEdge = Y2R < Y1R;

  ddd = 1.0 / (Y1R - Y2R);
  DX  = (X1R - X2R) * ddd;
  diz = (iz1 - iz2) * ddd;
  duz = (uz1 - uz2) * ddd;
  dvz = (vz1 - vz2) * ddd;

  if (IsLeftEdge) {
    Y2I = ceil(Y2R); Y1I = ceil(Y1R)-1;
    if (Y2I > Y1I) return;
    ddd = Y2I - Y2R;
    X  = X2R + ddd * DX;
    iz = iz2 + ddd * diz;
    uz = uz2 + ddd * duz;
    vz = vz2 + ddd * dvz;
    YI = Y1I; Y1I = Y2I; Y2I = YI;
  } else {
    Y1I = ceil(Y1R); Y2I = ceil(Y2R)-1;
    if (Y1I > Y2I) return;
    ddd = Y1I - Y1R;
    X  = X1R + ddd * DX;
    iz = iz1 + ddd * diz;
    uz = uz1 + ddd * duz;
    vz = vz1 + ddd * dvz;
  };

  if (Y1I < 0) {
    X  -= Y1I*DX;
    iz -= Y1I*diz;
    uz -= Y1I*duz;
    vz -= Y1I*dvz;
    Y1I = 0;
  };
  if (Y2I > Screen_Height-1)
    Y2I = Screen_Height-1;

  __asm__ __volatile__ ("
    fstcw   (%0)
    fldcw   (%1)
    fldl    (%2)
  "
  :
  : "g" (&SW), "g" (&LW), "g" (&X)
  );

  if (IsLeftEdge)
    for (YI=Y1I;YI<=Y2I;YI++) {
/*
      Scan_Buf_L2[YI] = X;
      XI = ceil(X);
      Scan_Buf_L[YI] = XI;
      X += DX;
*/
      __asm__ __volatile__ ("
        fld     %%st
        fstl    (%0)
        frndint
        fistpl  (%2)
        faddl   (%1)
      "
      :
      : "g" (&Scan_Buf_L2[YI]), "g" (&DX), "g" (&Scan_Buf_L[YI])
      );
/*
      inv_z_L[YI] = iz;
      iz += diz;
      u_div_z_L[YI] = uz;
      uz += duz;
      v_div_z_L[YI] = vz;
      vz += dvz;
*/
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&iz), "g" (&diz), "g" (&inv_z_L[YI])
      );
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&uz), "g" (&duz), "g" (&u_div_z_L[YI])
      );
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&vz), "g" (&dvz), "g" (&v_div_z_L[YI])
      );
    }
   else
    for (YI=Y1I;YI<=Y2I;YI++) {
/*
      Scan_Buf_R2[YI] = X;
      XI = ceil(X)-1;
      Scan_Buf_R[YI] = XI;
      X += DX;
*/
      __asm__ __volatile__ ("
        fld     %%st
        fstl    (%0)
        frndint
        fistpl  (%2)
        faddl   (%1)
        decl    (%2)
      "
      :
      : "g" (&Scan_Buf_R2[YI]), "g" (&DX), "g" (&Scan_Buf_R[YI])
      );
/*
      inv_z_R[YI] = iz;
      iz += diz;
      u_div_z_R[YI] = uz;
      uz += duz;
      v_div_z_R[YI] = vz;
      vz += dvz;
*/
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&iz), "g" (&diz), "g" (&inv_z_R[YI])
      );
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&uz), "g" (&duz), "g" (&u_div_z_R[YI])
      );
      __asm__ __volatile__ ("
        fldl  (%0)
        fstl  (%2)
        faddl (%1)
        fstpl (%0)
      "
      :
      : "g" (&vz), "g" (&dvz), "g" (&v_div_z_R[YI])
      );
    };

  __asm__ __volatile__ ("
    fstpl   (%1)
    fldcw   (%0)
  "
  :
  : "g" (&SW), "g" (&X)
  );
}

/**********************************/
/* MAIN TEXTURE MAPPING FUNCTIONS */
/**********************************/

// Maps a texture onto an arbitrary polygon
void T_Map (char *texture) {
  double NX, NY, IZ;
  int IndexMin, IndexMax, IndexMinX, IndexMaxX, i, j, k;
  int xl, xr;
  double uz, vz, duz, dvz;
  double iz, diz;
  int u, v;
  int zzz;
  char *scr;
  double iiz;
  int u1, v1, u2, v2, du, dv, n;
  int tmp1, tmp2;
  int sub_cnt = SUB_CNT;
  int CONST = 1 << (SUB_BITS + FP_BITS);
  double sss;
  double diz2, duz2, dvz2;

  if (!texture) return;

  for(i=0;i<VCNT;i++)
    Behind[i] = (vertices[i].Z < ZClose);

  _VCNT=0;
  for (i=0;i<VCNT;i++) {
    if (!Behind[i]) {
      IZ = (double)1/ vertices[i].Z;
      XR[_VCNT] = vertices[i].X * IZ;
      YR[_VCNT] = vertices[i].Y * IZ;
      _Vertices[_VCNT] = vertices[i];
      _tu[_VCNT] = tu[i];
      _tv[_VCNT] = tv[i];
      _VCNT++;
    };
    k = (i+1)%VCNT;
    if (Behind[i] != Behind[k]) {
      IZ = (ZClose-vertices[i].Z) / (vertices[k].Z-vertices[i].Z);
      NX = vertices[i].X + (vertices[k].X-vertices[i].X) * IZ;
      NY = vertices[i].Y + (vertices[k].Y-vertices[i].Y) * IZ;
      XR[_VCNT] = NX / ZClose;
      YR[_VCNT] = NY / ZClose;
      _Vertices[_VCNT].X = NX;
      _Vertices[_VCNT].Y = NY;
      _Vertices[_VCNT].Z = ZClose;
      _tu[_VCNT] = tu[i] + (tu[k]-tu[i]) * IZ;
      _tv[_VCNT] = tv[i] + (tv[k]-tv[i]) * IZ;
      _VCNT++;
    };
  };

  if (_VCNT < 3) return;

  // Computing XR[] and YR[] screen TVertices for a polygon

  for (i=0;i<_VCNT;i++) {
    XR[i] *= KP; XR[i] += Center_X;
    YR[i] *= KP; YR[i] += Center_Y;
  };

  // Done. Computing minimal Y and maximal Y at screen for a polygon

  IndexMin = IndexMax = IndexMinX = IndexMaxX = 0;
  for (i=0;i<_VCNT;i++) {
    if (YR[i] < YR[IndexMin]) IndexMin = i;
    if (YR[i] > YR[IndexMax]) IndexMax = i;
    if (XR[i] < XR[IndexMinX]) IndexMinX = i;
    if (XR[i] > XR[IndexMaxX]) IndexMaxX = i;
  };

  // Getting integer values of min Y and max Y

  YminI = ceil(YR[IndexMin]);
  YmaxI = ceil(YR[IndexMax])-1;
  XminI = ceil(XR[IndexMinX]);
  XmaxI = ceil(XR[IndexMaxX])-1;

  // Is polygon visible at screen?

  if ((YminI > YmaxI) || (XminI > XmaxI)) return;
  tmp1 = 0;//Boundaries[SSP-1].Ymin;
  tmp2 = 319;//Boundaries[SSP-1].Ymax;

  if ((YminI > tmp2) || (YmaxI < tmp1)) return;

  if (YminI < tmp1) YminI = tmp1;
  if (YmaxI > tmp2) YmaxI = tmp2;

  // It's visible. Finding left and right screen boundaries per each y-line.
  // It's done for all edges of a polygon

  for (i=0;i<_VCNT;i++)
    Scan_Edge (i);

  for (i=YminI;i<=YmaxI;i++) {
    xl=Scan_Buf_L[i];
    xr=Scan_Buf_R[i];
    if (xl > xr) continue;
    if ((xl>=Screen_Width) || (xr<0)) continue;

    tmp1 = 0;//(int)Boundaries[SSP-1].bounds[i][0];
    tmp2 = 319;//(int)Boundaries[SSP-1].bounds[i][1];
    if ((tmp1 > tmp2) || (xl > tmp2) || (xr < tmp1)) continue;

    iz = inv_z_L[i];
    uz = u_div_z_L[i];
    vz = v_div_z_L[i];
/*
    sss = Scan_Buf_R2[i]-Scan_Buf_L2[i];
    diz = (inv_z_R[i] - inv_z_L[i]) / sss;
    duz = (u_div_z_R[i] - u_div_z_L[i]) / sss;
    dvz = (v_div_z_R[i] - v_div_z_L[i]) / sss;
*/
    __asm__ __volatile__ ("
      fld1
      fldl  (%0)
      fsubl (%1)"
    :
    : "g" (&Scan_Buf_R2[i]), "g" (&Scan_Buf_L2[i])
    );
    __asm__ __volatile__ ("
      fdivrp
      fld     %%st
      fld     %%st
      fldl    (%0)
      fsubl   (%1)
      fmulp
      fstpl   (%2)"
    :
    : "g" (&inv_z_R[i]), "g" (&inv_z_L[i]), "g" (&diz)
    );
    __asm__ __volatile__ ("
      fldl    (%0)
      fsubl   (%1)
      fmulp
      fstpl   (%2)
      fldl    (%3)
      fsubl   (%4)
      fmulp
      fstpl   (%5)"
    :
    : "g" (&u_div_z_R[i]), "g" (&u_div_z_L[i]), "g" (&duz), "g" (&v_div_z_R[i]), "g" (&v_div_z_L[i]), "g" (&dvz)
    );
/*
    sss = xl - Scan_Buf_L2[i];
    iz += diz * sss;
    uz += duz * sss;
    vz += dvz * sss;
*/
    __asm__ __volatile__ ("
      fildl   (%0)
      fsubl   (%1)
      fld     %%st
      fld     %%st
    "
    :
    : "g" (&xl), "g" (&Scan_Buf_L2[i])
    );
    __asm__ __volatile__ ("
      fmull (%0)
      faddl (%1)
      fstpl (%1)
      fmull (%2)
      faddl (%3)
      fstpl (%3)
      fmull (%4)
      faddl (%5)
      fstpl (%5)
    "
    :
    : "g" (&diz), "g" (&iz), "g" (&duz), "g" (&uz), "g" (&dvz), "g" (&vz)
    );

    if (xl < tmp1) {
/*
      iz += diz*(tmp1-xl);
      uz += duz*(tmp1-xl);
      vz += dvz*(tmp1-xl);
*/
      __asm__ __volatile__ ("
        fildl   (%0)
        fisubl  (%1)
        fld     %%st
        fld     %%st"
      :
      : "g" (&tmp1), "g" (&xl)
      );

      __asm__ __volatile__ ("
        fmull   (%0)
        faddl   (%1)
        fstpl   (%1)
        fmull   (%2)
        faddl   (%3)
        fstpl   (%3)
        fmull   (%4)
        faddl   (%5)
        fstpl   (%5)"
      :
      : "g" (&diz), "g" (&iz), "g" (&duz), "g" (&uz), "g" (&dvz), "g" (&vz)
      );

      xl = tmp1;
    };
    if (xr > tmp2) xr = tmp2;

    zzz = xr-xl+1;
    scr = NULL;//VGAAddr + xl + y2addr (i);

    diz2 = diz;
    duz2 = duz;
    dvz2 = dvz;
    
if (SubDiv) {
if (zzz>=SUB_CNT) {
/*
    diz *= SUB_CNT;
    duz *= SUB_CNT;
    dvz *= SUB_CNT;
*/
    __asm__ __volatile__ ("
      fildl (%0)
      fld   %%st
      fld   %%st
      fldl  (%1)
      fmulp
      fstpl (%1)
      fldl  (%2)
      fmulp
      fstpl (%2)
      fldl  (%3)
      fmulp
      fstpl (%3)"
    :
    : "g" (&sub_cnt), "g" (&diz), "g" (&duz), "g" (&dvz)
    );
/*
    iiz = (1 << (SUB_BITS + FP_BITS)) / iz;
    u1 = (int)(uz * iiz);
    v1 = (int)(vz * iiz);
*/
    __asm__ __volatile__ ("
      fildl   (%0)
      fdivl   (%1)
      fld     %%st
      fmull   (%2)
      fistpl  (%4)
      fmull   (%3)
      fistpl  (%5)"
    :
    : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u1), "g" (&v1)
    );
    iz += diz;
    uz += duz;
    vz += dvz;
/*
    iiz = (1 << (SUB_BITS + FP_BITS)) / iz;
    u2 = (int)(uz * iiz);
    v2 = (int)(vz * iiz);
*/
    __asm__ __volatile__ ("
      fildl   (%0)
      fdivl   (%1)
      fld     %%st
      fmull   (%2)
      fistpl  (%4)
      fmull   (%3)
      fistpl  (%5)"
    :
    : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u2), "g" (&v2)
    );
    du = u2 - u1;
    dv = v2 - v1;
//  sar du, SUB_BITS
//  sar dv, SUB_BITS
    __asm__ __volatile__ ("
      sarl %2, (%0)
      sarl %2, (%1)"
      :
      : "g" (&du), "g" (&dv), "g" (SUB_BITS)
    );
//    while(1) {
    while (zzz>=SUB_CNT) {
      n = SUB_CNT;
      zzz -= SUB_CNT;
//      if (zzz<0) n+=zzz;

//      iz += diz;
//      iiz = (1 << (SUB_BITS + FP_BITS)) / iz;

      __asm__ __volatile__ ("
        fldl    (%1)
        faddl   (%2)
        fstl    (%1)
        fidivrl (%0)"
        :
        : "g" (&CONST), "g" (&iz), "g" (&diz)
      );

//      span (scr, texture, n, &u1, &v1, du, dv);

      scr += n;
      if (zzz<SUB_CNT) {
        __asm__ __volatile__ ("
          fstpl (%0)"
          :
          : "g" (&iiz)
        );
        iz -= diz;
        break;
      };

      u1 = u2;
      v1 = v2;
//      iz += diz;
      uz += duz;
      vz += dvz;
/*
      iiz = (1 << (SUB_BITS + FP_BITS)) / iz;
      u2 = (int)(uz * iiz);
      v2 = (int)(vz * iiz);
*/
      __asm__ __volatile__ ("
        fld     %%st
        fmull   (%0)
        fistpl  (%2)
        fmull   (%1)
        fistpl  (%3)"
      :
      : "g" (&uz), "g" (&vz), "g" (&u2), "g" (&v2)
      );
      du = u2 - u1;
      dv = v2 - v1;
//    sar du, SUB_BITS
//    sar dv, SUB_BITS
      __asm__ __volatile__ ("
        sarl %2, (%0)
        sarl %2, (%1)"
        :
        : "g" (&du), "g" (&dv), "g" (SUB_BITS)
      );
    };
};
if (zzz) {
/*
    while (zzz--) {
      u = ((int)(uz / iz))&255;
      v = ((int)(vz / iz))&255;
      *scr++ = *(texture+(v<<8)+u);
      iz += diz2;
      uz += duz2;
      vz += dvz2;
    };
    goto lll;
*/
/*
    iiz = (1 << (SUB_BITS + FP_BITS)) / iz;
    u1 = (int)(uz * iiz);
    v1 = (int)(vz * iiz);
*/
    __asm__ __volatile__ ("
      fildl   (%0)
      fdivl   (%1)
      fld     %%st
      fmull   (%2)
      fistpl  (%4)
      fmull   (%3)
      fistpl  (%5)"
    :
    : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u1), "g" (&v1)
    );
    iz += diz2*zzz;
    uz += duz2*zzz;
    vz += dvz2*zzz;
/*
    iiz = (1 << (SUB_BITS + FP_BITS)) / iz;
    u2 = (int)(uz * iiz);
    v2 = (int)(vz * iiz);
*/
    __asm__ __volatile__ ("
      fildl   (%0)
      fdivl   (%1)
      fld     %%st
      fmull   (%2)
      fistpl  (%4)
      fmull   (%3)
      fistpl  (%5)"
    :
    : "g" (&CONST), "g" (&iz), "g" (&uz), "g" (&vz), "g" (&u2), "g" (&v2)
    );
    du = u2 - u1;
    dv = v2 - v1;
    du /= zzz;
    dv /= zzz;

//    span (scr, texture, zzz, &u1, &v1, du, dv);
lll:;
}
} else {
    while (zzz--) {
      u = ((int)(uz / iz))&255;
      v = ((int)(vz / iz))&255;
      *scr++ = *(texture+(v<<8)+u);
      iz += diz;
      uz += duz;
      vz += dvz;
    };
};
  };
}

int main() {
  T_Map (NULL);
  return 0;
}


--------------6DF912F1089F64E61B72F12B--


- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019