/* here are some functions quake3 realy likes to be fast */

#define USE_DUAL_LINTERP 0 /* slower */

/* this isnt used yet */
/*
#define QUAD_XMM_LINTERP(O, T, A, B) {          \
  __asm__ __volatile__(                         \
  "movss  (%%eax),%%xmm0   \n"                  \
  "shufps $0,%%xmm0, %%xmm0   \n"               \
  "movups (%%esi), %%xmm1   \n"                 \
  "movups (%%edx), %%xmm2   \n"                 \
  "subps  %%xmm1, %%xmm2   \n"                  \
  "mulps  %%xmm0, %%xmm2  \n"                   \
  "addps  %%xmm1, %%xmm2  \n"                   \
  "movups  %%xmm2, (%%edi)"                     \
  :                                             \
  :  "D" (&O), "S" (&A), "d" (&B), "a" (&T)     \
  : "memory" );      \
}
*/

#define DUAL_LINTERP(O, T, A, B) { \
  __asm__ (  \
  "flds   (%%eax)     \n"   \
  "flds   (%%esi)     \n"   \
  "flds   0x4(%%esi)  \n"   \
  "flds   (%%edx)     \n"   \
  "flds   0x4(%%edx)  \n"   \
  "fxch  %%st(1)     \n"   /* b1 b2 a2 a1 t */ \
  "fsub  %%st(3)     \n"   /* b1 -= a1 */      \
  "fxch  %%st(1)     \n"   /* b2 b1 a2 a1 t */ \
  "fsub  %%st(2)     \n"   /* b2 -= a2 */      \
  "fxch  %%st(1)     \n"   /* b1 b2 a2 a1 t */ \
  "fmul  %%st(4)     \n"   /* b1 *= t */       \
  "fxch  %%st(1)     \n"   /* b2 b1 a2 a1 t */ \
  "fmul  %%st(4)     \n"   /* b2 *= t */       \
  "fxch  %%st(3)     \n"   /* a1 b1 a2 b2 t */ \
  "faddp %%st(1)     \n"   /* a1 += b1 */      \
  "fxch  %%st(2)     \n"   /* b2 a2 b1 t */    \
  "faddp %%st(1)     \n"   /* a2 += b2 */      \
  "fxch  %%st(1)     \n"   /* b1 b2 t */       \
  "fstps  (%%edi)     \n"   \
  "fstps  0x4(%%edi)  \n"   \
  "fstp  %%st(0)     \n"   \
  :  /* no out regs */     \
  :  "D" (&O), "S" (&A), "d" (&B), "a" (&T)    \
  :  "memory" );  \
}



#define INTERP_RGBA_TMU0(O, T, TI, I , J)   \
do {       \
     __asm__ __volatile__( \
  "flds    (%%ecx)   \n" \
     "push %%ebx    \n"      \
     " movzbl 16(%%esi),%%ebx  \n" \
  "flds   24(%%esi)   \n"             \
     " movzbl 16(%%edx),%%ecx  \n"  \
     " subl %%ebx,%%ecx     \n"  \
  "flds   24(%%edx)   \n"             \
     " imull %%eax,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " shrl  $8,%%ecx        \n"  \
     " addl %%ecx,%%ebx     \n"  \
  "fld   %%st(2)     \n"             \
     " movb %%bl,16(%%edi)   \n"  \
  "fmulp %%st(1)     \n"             \
     " movzbl 17(%%esi),%%ebx  \n"  \
     " movzbl 17(%%edx),%%ecx  \n"  \
  "faddp %%st(1)     \n"             \
     " subl %%ebx,%%ecx     \n"  \
     " imull %%eax,%%ecx     \n"  \
  "fstps  24(%%edi)   \n"             \
     " shrl  $8,%%ecx        \n"  \
     " addl %%ecx,%%ebx     \n"  \
  "flds   28(%%esi)   \n"          \
     " movb %%bl, 0x11(%%edi)   \n"  \
     " movzbl 0x12(%%esi),%%ebx  \n"  \
  "flds   28(%%edx)   \n"          \
     " movzbl 0x12(%%edx),%%ecx  \n"  \
     " subl %%ebx,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " imull %%eax,%%ecx     \n"  \
     " shrl  $8,%%ecx        \n"  \
     " addl %%ecx,%%ebx     \n"  \
  "fld   %%st(2)     \n"             \
     " movb %%bl, 0x12(%%edi)   \n"  \
     " movzbl 0x13(%%esi),%%ebx  \n"  \
     " movzbl 0x13(%%edx),%%ecx  \n"  \
  "fmulp %%st(1)     \n"             \
     " subl %%ebx,%%ecx     \n"  \
     " imull %%eax,%%ecx     \n"  \
     " shrl  $8,%%ecx        \n"  \
  "faddp %%st(1)     \n"             \
     " addl %%ecx,%%ebx     \n"  \
     " movb %%bl, 0x13(%%edi) \n "  \
  "fstps  28(%%edi)   \n"          \
     " pop %%ebx    \n"               \
  "fstp  %%st(0)     \n"          \
     :   \
     : "S" (&I[0]), "d" (&J[0]), "D" (&O[0]), "c" (&T), "a" (TI) \
     : "memory", "%ebx" );  \
}while(0);



#define INTERP_RGBA_TMU0_TMU1(O, T, TI, I , J)   \
do {       \
     __asm__ __volatile__( \
     "push %%ebx    \n"      \
  "flds    (%%ecx)   \n" \
     " movzbl 16(%%esi),%%ebx  \n" \
  "flds   24(%%esi)   \n"             \
     " movzbl 16(%%edx),%%ecx  \n"  \
  "flds   24(%%edx)   \n"             \
     " subl %%ebx,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " imull %%eax,%%ecx     \n"  \
  "fld   %%st(2)     \n"             \
     " shrl  $8,%%ecx        \n"  \
  "fmulp %%st(1)     \n"             \
     " addl %%ecx,%%ebx     \n"  \
  "faddp %%st(1)     \n"             \
     " movb %%bl,16(%%edi)   \n"  \
  "fstps  24(%%edi)   \n"             \
     " movzbl 17(%%esi),%%ebx  \n"  \
  "flds   28(%%esi)   \n"          \
     " movzbl 17(%%edx),%%ecx  \n"  \
  "flds   28(%%edx)   \n"          \
     " subl %%ebx,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " imull %%eax,%%ecx     \n"  \
  "fld   %%st(2)     \n"             \
     " shrl  $8,%%ecx        \n"  \
  "fmulp %%st(1)     \n"             \
     " addl %%ecx,%%ebx     \n"  \
  "faddp %%st(1)     \n"             \
     " movb %%bl, 0x11(%%edi)   \n"  \
  "fstps  28(%%edi)   \n"          \
     " movzbl 0x12(%%esi),%%ebx  \n"  \
  "flds   32(%%esi)   \n"             \
     " movzbl 0x12(%%edx),%%ecx  \n"  \
  "flds   32(%%edx)   \n"             \
     " subl %%ebx,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " imull %%eax,%%ecx     \n"  \
  "fld   %%st(2)     \n"             \
     " shrl  $8,%%ecx        \n"  \
  "fmulp %%st(1)     \n"             \
     " addl %%ecx,%%ebx     \n"  \
  "faddp %%st(1)     \n"             \
     " movb %%bl, 0x12(%%edi)   \n"  \
  "fstps  32(%%edi)   \n"             \
     " movzbl 0x13(%%esi),%%ebx  \n"  \
  "flds   36(%%esi)   \n"          \
     " movzbl 0x13(%%edx),%%ecx  \n"  \
  "flds   36(%%edx)   \n"          \
     " subl %%ebx,%%ecx     \n"  \
  "fsub  %%st(1)     \n"             \
     " imull %%eax,%%ecx     \n"  \
  "fld   %%st(2)     \n"             \
     " shrl  $8,%%ecx        \n"  \
  "fmulp %%st(1)     \n"             \
     " addl %%ecx,%%ebx     \n"  \
  "faddp %%st(1)     \n"             \
     " movb %%bl, 0x13(%%edi)  \n"  \
  "fstps  36(%%edi)   \n"          \
     " pop %%ebx        \n"               \
  "fstp  %%st(0)        \n"          \
     :   \
     : "S" (&I[0]), "d" (&J[0]), "D" (&O[0]), "c" (&T), "a" (TI) \
     : "memory", "%ebx" );  \
}while(0);



void inline gl_mga_x86_project_verts(GLfloat *first,
				     GLfloat *last,
				     const GLfloat *m)
{
        int dummy;
	__asm__ __volatile__(
	"cmpl   %%esi,%%ecx \n"
	"je     2f          \n"
        ".balign 16         \n"
	"1:                 \n"
	"fld1               \n"
	"fdivs  12(%%esi)   \n"
	"flds   (%%esi)     \n"
	"fmuls  (%%edx)     \n"
	"flds   4(%%esi)    \n"
	"fmuls  20(%%edx)   \n"
	"flds   8(%%esi)    \n"
	"fmuls  40(%%edx)   \n"
	"fxch   %%st(1)     \n"  /* y z x*/
	"fmul   %%st(3)     \n"
	"fxch   %%st(2)     \n"  /* x z yd */
	"fmul   %%st(3)     \n"
	"fxch   %%st(1)     \n"  /* z xd yd */
	"fmul   %%st(3)     \n"
	"fxch   %%st(1)     \n"  /* xd zd yd */
        "fadds  48(%%edx)   \n"
	"fxch   %%st(2)     \n"  /* yd zd xda */
        "fadds  52(%%edx)   \n"
	"fxch   %%st(1)     \n"  /* zd yda xda */
        "fadds  56(%%edx)   \n"
	"fxch   %%st(2)     \n"  /* x y z d*/
	"fstps   (%%esi)     \n"
	"fstps   4(%%esi)    \n"
	"fstps   8(%%esi)    \n"
	"fstps   12(%%esi)   \n"
	"addl   $64,%%esi \n"
	"cmpl   %%esi,%%ecx \n"
	"jne    1b          \n"
	"2:                 \n"
	: "=S" (dummy)
	: "d" (m), "S" (first), "c" (last)
	: "memory" );
}

void inline gl_mga_x86_project_clipped_verts(GLfloat *first,
				     GLfloat *last,
				     const GLfloat *m,
				     const GLubyte *clipmask )
{
        int dummy, dummy2;
	__asm__ __volatile__(
	"cmpl   %%esi,%%ecx \n"
	"je 3f              \n"
        ".balign 16         \n"
	"1:                 \n"
	"cmpb  $0,(%%edi)   \n"
	"jne 2f             \n"
	"fld1               \n"
	"fdivs  12(%%esi)   \n"
	"flds   (%%esi)     \n"
	"fmuls  (%%edx)     \n"
	"flds   4(%%esi)    \n"
	"fmuls  20(%%edx)   \n"
	"flds   8(%%esi)    \n"
	"fmuls  40(%%edx)   \n"
	"fxch   %%st(1)     \n"  /* y z x*/
	"fmul   %%st(3)     \n"
	"fxch   %%st(2)     \n"  /* x z yd */
	"fmul   %%st(3)     \n"
	"fxch   %%st(1)     \n"  /* z xd yd */
	"fmul   %%st(3)     \n"
	"fxch   %%st(1)     \n"  /* xd zd yd */
        "fadds  48(%%edx)   \n"
	"fxch   %%st(2)     \n"  /* yd zd xda */
        "fadds  52(%%edx)   \n"
	"fxch   %%st(1)     \n"  /* zd yda xda */
        "fadds  56(%%edx)   \n"
	"fxch   %%st(2)     \n"  /* x y z d*/
	"fstps   (%%esi)     \n"
	"fstps   4(%%esi)    \n"
	"fstps   8(%%esi)    \n"
	"fstps   12(%%esi)   \n"
	"2:                 \n"
	"addl   $64,%%esi \n"
	"incl   %%edi       \n"
	"cmpl   %%esi,%%ecx \n"
	"jne    1b          \n"
	"3:                 \n"
	: "=S" (dummy), "=D" (dummy2)
	: "d" (m), "S" (first), "c" (last), "D" (clipmask)
	: "memory");
}


void mga_setup_full_x86_with_transform_rgba_tmu0(GLuint *setup_args)
{
        __asm__ __volatile__(
	"push %%ebx              \n"
	"push %%ebp              \n"
        "movl (%%esi), %%edx     \n"     
	"movl 8(%%esi), %%ecx    \n"     
	"movl 16(%%esi), %%edi   \n"     
	"movl 24(%%esi), %%ebx   \n"     
	"movl 28(%%esi), %%ebp   \n"      
	".balign 16              \n"     
	"1:                      \n"
        "flds   (%%ebp)    \n"
        "fmuls  (%%ebx) \n"
	"movl (%%edx),%%eax      \n"     
        "flds   (%%ebp) \n"
        "fmuls  4(%%ebx) \n"
	"bswapl %%eax            \n"    
        "flds   (%%ebp) \n"
        "fmuls  8(%%ebx)\n"
	"rorl $8,%%eax           \n"    
        "flds   (%%ebp) \n"
        "fmuls  12(%%ebx)\n"
	"mov %%eax,16(%%edi)     \n"    
        "flds   4(%%ebp) \n"
        "fmuls  16(%%ebx)\n"
	"movl (%%ecx),%%eax      \n"     
        "flds   4(%%ebp) \n"
        "fmuls  20(%%ebx)\n"
	"movl %%eax,24(%%edi)    \n"
        "flds   4(%%ebp)\n"
        "fmuls  24(%%ebx)\n"
	"movl 4(%%ecx),%%eax     \n"     
        "flds   4(%%ebp)\n"
        "fmuls  28(%%ebx)\n"
        "fxch   %%st(1)\n"
        "faddp  %%st,%%st(5)    \n"
        "faddp  %%st,%%st(3)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  32(%%ebx)    \n"
	"movl %%eax,28(%%edi)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  36(%%ebx)    \n"
	"addl 4(%%esi),%%edx     \n"     
        "flds   8(%%ebp)    \n"
        "fmuls  40(%%ebx)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  44(%%ebx)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "fxch   %%st(1)    \n"
        "fadds  56(%%ebx)    \n"
        "fxch   %%st(2)    \n"
        "fadds  52(%%ebx)    \n"
        "fxch   %%st(1)    \n"
        "fadds  60(%%ebx)    \n"
        "fxch   %%st(3)    \n"
        "fadds  48(%%ebx)    \n"
        "fstps  (%%edi)    \n"
        "fstps  4(%%edi)    \n"
        "fstps  8(%%edi)    \n"
        "fstps  12(%%edi)    \n"
	"addl   32(%%esi),%%ebp \n"      
	"addl $64,%%edi          \n"     
	"addl 12(%%esi),%%ecx    \n"     
	"cmpl %%edi,20(%%esi)    \n"     
	"jne 1b                  \n"
	"pop %%ebp               \n"
	"pop %%ebx               \n"
        : : "S" (setup_args): "%edx", "%edi", "%eax", "%ecx");

}


void inline mga_setup_full_x86_with_transform_rgba_tmu0_tmu1(GLuint *setup_args)
{
        __asm__ __volatile__(
        "push %%ebx              \n"     
        "push %%ebp              \n"
        "movl (%%esi), %%edx     \n"     
	"movl %%esp,(%%esi)      \n"     
	"movl 8(%%esi), %%ecx    \n"     
	"movl 16(%%esi), %%ebx   \n"     
	"movl 24(%%esi), %%edi   \n"     
	"movl 32(%%esi), %%esp   \n"     
	"movl 36(%%esi), %%ebp   \n"     
	".balign 16              \n"     
	"1:                      \n"
        "flds   (%%ebp)    \n"
        "fmuls  (%%esp) \n"
        "flds   (%%ebp) \n"
        "fmuls  4(%%esp) \n"
        "flds   (%%ebp) \n"
        "fmuls  8(%%esp)\n"
        "flds   (%%ebp) \n"
        "fmuls  12(%%esp)\n"
        "flds   4(%%ebp) \n"
        "fmuls  16(%%esp)\n"
        "flds   4(%%ebp) \n"
        "fmuls  20(%%esp)\n"
        "flds   4(%%ebp)\n"
        "fmuls  24(%%esp)\n"
        "flds   4(%%ebp)\n"
        "fmuls  28(%%esp)\n"
        "fxch   %%st(1)\n"
        "faddp  %%st,%%st(5)    \n"
        "faddp  %%st,%%st(3)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  32(%%esp)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  36(%%esp)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  40(%%esp)    \n"
        "flds   8(%%ebp)    \n"
        "fmuls  44(%%esp)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "faddp  %%st,%%st(4)    \n"
        "fxch   %%st(1)    \n"
        "fadds  56(%%esp)    \n"
        "fxch   %%st(2)    \n"
        "fadds  52(%%esp)    \n"
        "fxch   %%st(1)    \n"
        "fadds  60(%%esp)    \n"
        "fxch   %%st(3)    \n"
        "fadds  48(%%esp)    \n"
        "fstps  (%%edi)    \n"
        "fstps  4(%%edi)    \n"
        "fstps  8(%%edi)    \n"
        "fstps  12(%%edi)    \n"
	"addl   40(%%esi),%%ebp \n"      
	"movl (%%edx),%%eax      \n"     
	"bswapl %%eax            \n"    
	"rorl $8,%%eax           \n"    
	"mov %%eax,16(%%edi)     \n"    
	"movl (%%ecx),%%eax      \n"     
	"movl %%eax,24(%%edi)    \n"
	"movl 4(%%ecx),%%eax     \n"     
	"movl %%eax,28(%%edi)    \n"
	"movl (%%ebx),%%eax      \n"     
	"movl %%eax,32(%%edi)    \n"
	"movl 4(%%ebx),%%eax     \n"     
	"movl %%eax,36(%%edi)    \n"
	"addl $64,%%edi          \n"     
	"addl 12(%%esi),%%ecx    \n"
	"addl 20(%%esi),%%ebx    \n"
	"cmpl %%edi,28(%%esi)    \n"     
	"jne 1b                  \n"
	"mov (%%esi), %%esp      \n"
	"pop %%ebp               \n"
	"pop %%ebx               \n"
	: : "S" (setup_args) : "%edx" , "%ecx" , "%edi", "%eax");
}


void  mga_setup_full_x86_RGBA_TMU0 ( struct vertex_buffer *VB, GLuint do_cliptest )
{
   GLcontext *ctx = VB->ctx;
   const GLfloat * const m = ctx->ModelProjectMatrix.m;
   GLuint start = VB->CopyStart;
   GLuint count = VB->Count;
   GLuint setup_args[9];
   if((count - start) == 0)goto nosetupdata;

   {
      GLuint t0 = mgaCtx->tmu_source[0];
      setup_args[0] = (GLuint) VB->ColorPtr->start;
      setup_args[1] = (GLuint) VB->ColorPtr->stride;
      setup_args[2] = (GLuint) VB->TexCoordPtr[t0]->start;
      setup_args[3] = (GLuint) VB->TexCoordPtr[t0]->stride;
      setup_args[4] = (GLuint) ((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f;
      setup_args[5] = (GLuint) (((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f+(16*(count-start)));
      setup_args[6] = (GLuint) m;
      setup_args[7] = (GLuint) VB->ObjPtr->start;
      setup_args[8] = (GLuint) VB->ObjPtr->stride;
      mga_setup_full_x86_with_transform_rgba_tmu0(&setup_args[0]);
   }
   if (do_cliptest)
   {
      VB->ClipAndMask = ~0; 
      VB->ClipOrMask = 0;
      gl_cliptest_points4_v16(((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f,
			      ((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[count].f,
			      &(VB->ClipOrMask),
			      &(VB->ClipAndMask),
			      VB->ClipMask + start);
   }
   nosetupdata:;

   ((mgaVertexBufferPtr)(( VB )->driver_data)) ->clipped_elements.count = start;
   ((mgaVertexBufferPtr)(( VB )->driver_data)) ->last_vert = count;
}

static void  mga_interp_vert_x86_RGBA_TMU0 ( GLfloat t, 
				  GLfloat *O,
				  const GLfloat *I,
				  const GLfloat *J )
{
   unsigned int ti;
#if USE_DUAL_LINTERP
   DUAL_LINTERP(O[0],t,I[0],J[0]);
   DUAL_LINTERP(O[2],t,I[2],J[2]);
#else
   O[0] = ( (  I[0] ) + ( t ) * ( (  J[0] ) - (  I[0] ) ) ) ;
   O[1] = ( (  I[1] ) + ( t ) * ( (  J[1] ) - (  I[1] ) ) ) ;
   O[2] = ( (  I[2] ) + ( t ) * ( (  J[2] ) - (  I[2] ) ) ) ;
   O[3] = ( (  I[3] ) + ( t ) * ( (  J[3] ) - (  I[3] ) ) ) ;
#endif
   
   ti = FloatToInt(t*256.0F);
   INTERP_RGBA_TMU0(O, t, ti, I , J);
}

void  mga_setup_full_x86_RGBA_TMU0_TMU1 ( struct vertex_buffer *VB, GLuint do_cliptest )
{
   GLcontext *ctx = VB->ctx;
   const GLfloat * const m = ctx->ModelProjectMatrix.m;
   GLuint start = VB->CopyStart;
   GLuint count = VB->Count;

   if((count - start) == 0)goto nosetupdata;
   
   {
        GLuint setup_args[11];  
        GLuint t0 = mgaCtx->tmu_source[0];
        GLuint t1 = mgaCtx->tmu_source[1];
        setup_args[0] = (GLuint) VB->ColorPtr->start;
        setup_args[1] = (GLuint) VB->ColorPtr->stride;
        setup_args[2] = (GLuint) VB->TexCoordPtr[t0]->start;
        setup_args[3] = (GLuint) VB->TexCoordPtr[t0]->stride;
        setup_args[4] = (GLuint) VB->TexCoordPtr[t1]->start;
        setup_args[5] = (GLuint) VB->TexCoordPtr[t1]->stride;
        setup_args[6] = (GLuint) ((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f;
        setup_args[7] = (GLuint) (((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f+(16*(count-start)));
	setup_args[8] = (GLuint) m;
	setup_args[9] = (GLuint) VB->ObjPtr->start;
	setup_args[10] = (GLuint) VB->ObjPtr->stride;
        mga_setup_full_x86_with_transform_rgba_tmu0_tmu1(&setup_args[0]);
   }
 

   if (do_cliptest)
   {
      VB->ClipAndMask = ~0; 
      VB->ClipOrMask = 0;
      gl_cliptest_points4_v16(((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[start].f,
			      ((mgaVertexBufferPtr)(( VB )->driver_data)) ->verts[count].f,
			      &(VB->ClipOrMask),
			      &(VB->ClipAndMask),
			      VB->ClipMask + start);
   }

   nosetupdata:;

   ((mgaVertexBufferPtr)(( VB )->driver_data)) ->clipped_elements.count = start;
   ((mgaVertexBufferPtr)(( VB )->driver_data)) ->last_vert = count;
}

static void  mga_interp_vert_x86_RGBA_TMU0_TMU1 ( GLfloat t, 
				  GLfloat *O,
				  const GLfloat *I,
				  const GLfloat *J )
{
#if USE_DUAL_LINTERP
   DUAL_LINTERP(O[0],t,I[0],J[0]);
   DUAL_LINTERP(O[2],t,I[2],J[2]);
#else
   O[0] = ( (  I[0] ) + ( t ) * ( (  J[0] ) - (  I[0] ) ) ) ;
   O[1] = ( (  I[1] ) + ( t ) * ( (  J[1] ) - (  I[1] ) ) ) ;
   O[2] = ( (  I[2] ) + ( t ) * ( (  J[2] ) - (  I[2] ) ) ) ;
   O[3] = ( (  I[3] ) + ( t ) * ( (  J[3] ) - (  I[3] ) ) ) ;
#endif
  
   /* and this is special.. quake3 uses multitexture for
    walls so we could get away without 4 integer multiplies */
   if( *(unsigned int *)(I+4) != *(unsigned int *)(J+4))
   {
      unsigned int ti;
      ti = FloatToInt(t*256.0F);
      INTERP_RGBA_TMU0_TMU1(O, t, ti, I , J);
      return;
   }else *(unsigned int *)(O+4) = *(unsigned int*)(I+4);
   O[6] = ( (  I[6] ) + ( t ) * ( (  J[6] ) - (  I[6] ) ) ) ;
   O[7] = ( (  I[7] ) + ( t ) * ( (  J[7] ) - (  I[7] ) ) ) ;
   O[8] = ( (  I[8] ) + ( t ) * ( (  J[8] ) - (  I[8] ) ) ) ;
   O[9] = ( (  I[9] ) + ( t ) * ( (  J[9] ) - (  I[9] ) ) ) ;
}

