diff options
| author | Keith Whitwell <keith@tungstengraphics.com> | 2005-04-21 13:11:02 +0000 | 
|---|---|---|
| committer | Keith Whitwell <keith@tungstengraphics.com> | 2005-04-21 13:11:02 +0000 | 
| commit | 0ff98b03c6a54db609c82620d6093dbd24cdaec3 (patch) | |
| tree | cc51063fff6714aebe516fc4e0f31cb0cacf2cf9 | |
| parent | 70375aff48292fe0ee78c4a93758166b1324c020 (diff) | |
New software implementation of ARB_vertex_program.  Similar in speed
to existing version, but with the potential for good improvements.
| -rw-r--r-- | src/mesa/tnl/t_vb_arbprogram.c | 1551 | 
1 files changed, 1551 insertions, 0 deletions
| diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c new file mode 100644 index 0000000000..af91546f2e --- /dev/null +++ b/src/mesa/tnl/t_vb_arbprogram.c @@ -0,0 +1,1551 @@ +/* + * Mesa 3-D graphics library + * Version:  6.3 + * + * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file t_arb_program.c + * Compile vertex programs to an intermediate representation. + * Execute vertex programs over a buffer of vertices. + * \author Keith Whitwell, Brian Paul + */ + +#include "glheader.h" +#include "context.h" +#include "imports.h" +#include "macros.h" +#include "mtypes.h" +#include "arbprogparse.h" +#include "program.h" +#include "math/m_matrix.h" +#include "math/m_translate.h" +#include "t_context.h" +#include "t_pipeline.h" + + + + +/* New, internal instructions: + */ +#define IN1        (VP_OPCODE_XPD+1) +#define IN2        (IN1+1)	/* intput-to-reg MOV */ +#define IN3        (IN1+2) +#define IN4        (IN1+3) +#define OUT        (IN1+4)	/* reg-to-output MOV */ +#define OUM        (IN1+5)	/* reg-to-output MOV with mask */ +#define RSW        (IN1+6) +#define MSK        (IN1+7)	/* reg-to-reg MOV with mask */ +#define PAR        (IN1+8)      /* parameter-to-reg MOV */ +#define PRL        (IN1+9)      /* parameter-to-reg MOV */ + + +/* Layout of register file: + +  0 -- Scratch (Arg0) +  1 -- Scratch (Arg1) +  2 -- Scratch (Arg2) +  3 -- Scratch (Result) +  4 -- Program Temporary 0 +  .. +  31 -- Program Temporary 27 +  32 -- State/Input/Const shadow 0 +  .. +  63 -- State/Input/Const shadow 31 + +*/ + + + +#define REG_ARG0  0 +#define REG_ARG1  1 +#define REG_ARG2  2 +#define REG_RES   3 +#define REG_TMP0  4 +#define REG_TMP_MAX 32 +#define REG_TMP_NR (REG_TMP_MAX-REG_TMP0) +#define REG_PAR0  32 +#define REG_PAR_MAX 64 +#define REG_PAR_NR (REG_PAR_MAX-REG_PAR0) + +#define REG_MAX 64 +#define REG_SWZDST_MAX 16 + +/* ARB_vp instructions are broken down into one or more of the + * following micro-instructions, each representable in a 32 bit packed + * structure. + */ + + +union instruction { +   struct { +      GLuint opcode:6; +      GLuint dst:5; +      GLuint arg0:6; +      GLuint arg1:6; +      GLuint elt:2;		/* x,y,z or w */ +      GLuint pad:7; +   } scl; + + +   struct { +      GLuint opcode:6; +      GLuint dst:5; +      GLuint arg0:6; +      GLuint arg1:6; +      GLuint arg2:6; +      GLuint pad:3; +   } vec; + +   struct { +      GLuint opcode:6; +      GLuint dst:4;		/* NOTE!  REG 0..16 only! */ +      GLuint arg0:6; +      GLuint neg:4;		 +      GLuint swz:12;		 +   } swz; + +   struct { +      GLuint opcode:6; +      GLuint dst:6; +      GLuint arg0:6; +      GLuint neg:1;		/* 1 bit only */ +      GLuint swz:8;		/* xyzw only */ +      GLuint pad:5; +   } rsw; + +   struct { +      GLuint opcode:6; +      GLuint reg:6; +      GLuint file:5; +      GLuint idx:8;		/* plenty? */ +      GLuint rel:1; +      GLuint pad:6; +   } inr; + + +   struct { +      GLuint opcode:6; +      GLuint reg:6; +      GLuint file:5; +      GLuint idx:8;		/* plenty? */ +      GLuint mask:4; +      GLuint pad:3; +   } out; + +   struct { +      GLuint opcode:6; +      GLuint dst:5; +      GLuint arg0:6; +      GLuint mask:4; +      GLuint pad:11; +   } msk; + +   GLuint dword; +}; + + + +struct compilation { +   struct { +      GLuint file:5; +      GLuint idx:8;  +   } reg[REG_PAR_NR]; + +   GLuint par_active; +   GLuint par_protected; +   GLuint tmp_active; +    +   union instruction *csr; + +   struct vertex_buffer *VB;	/* for input sizes! */ +}; + +/*--------------------------------------------------------------------------- */ + +/*! + * Private storage for the vertex program pipeline stage. + */ +struct arb_vp_machine { +   GLfloat reg[REG_MAX][4];	/* Program temporaries, shadowed parameters and inputs, +				   plus some internal values */ + +   GLfloat (*File[8])[4];	/* Src/Dest for PAR/PRL instructions. */ +   GLint AddressReg; + +   union instruction store[1024]; +/*    GLuint store_size; */ + +   union instruction *instructions; +   GLint nr_instructions; + +   GLvector4f attribs[VERT_RESULT_MAX]; /**< result vectors. */ +   GLvector4f ndcCoords;              /**< normalized device coords */ +   GLubyte *clipmask;                 /**< clip flags */ +   GLubyte ormask, andmask;           /**< for clipping */ + +   GLuint vtx_nr;		/**< loop counter */ + +   struct vertex_buffer *VB; +   GLcontext *ctx; +}; + + +/*--------------------------------------------------------------------------- */ + +struct opcode_info { +   GLuint type; +   GLuint nr_args; +   const char *string; +   void (*func)( struct arb_vp_machine *, union instruction ); +   void (*print)( union instruction , const struct opcode_info * ); +}; + + +#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr)) + + + +/** + * Set x to positive or negative infinity. + * + * XXX: FIXME - type punning. + */ +#if defined(USE_IEEE) || defined(_WIN32) +#define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 ) +#define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 ) +#elif defined(VMS) +#define SET_POS_INFINITY(x)  x = __MAXFLOAT +#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT +#define IS_INF_OR_NAN(t)   ((t) == __MAXFLOAT) +#else +#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL +#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL +#endif + +#define FREXPF(a,b) frexpf(a,b) + +#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0]) + +/* FIXME: more type punning (despite use of fi_type...) + */ +#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits + + +static GLfloat RoughApproxLog2(GLfloat t) +{ +   return LOG2(t); +} + +static GLfloat RoughApproxPow2(GLfloat t) +{    +   GLfloat q; +#ifdef USE_IEEE +   GLint ii = (GLint) t; +   ii = (ii < 23) + 0x3f800000; +   SET_FLOAT_BITS(q, ii); +   q = *((GLfloat *) (void *)&ii); +#else +   q = (GLfloat) pow(2.0, floor_t0); +#endif +   return q; +} + +static GLfloat RoughApproxPower(GLfloat x, GLfloat y) +{ +#if 0 +   return (GLfloat) exp(y * log(x)); +#else +   return (GLfloat) _mesa_pow(x, y); +#endif +} + + +static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; + + + + +/** + * This is probably the least-optimal part of the process, have to + * multiply out the stride to access each incoming input value. + */ +static GLfloat *get_input( struct arb_vp_machine *m, GLuint index ) +{ +   return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr); +} + + +/** + * Fetch a 4-element float vector from the given source register. + * Deal with the possibility that not all elements are present. + */ +static void do_IN1( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   const GLfloat *src = get_input(m, op.inr.idx); + +   result[0] = src[0]; +   result[1] = 0; +   result[2] = 0; +   result[3] = 1; +} + +static void do_IN2( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   const GLfloat *src = get_input(m, op.inr.idx); +    +   result[0] = src[0]; +   result[1] = src[1]; +   result[2] = 0; +   result[3] = 1; +} + +static void do_IN3( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   const GLfloat *src = get_input(m, op.inr.idx); + +   result[0] = src[0]; +   result[1] = src[1]; +   result[2] = src[2]; +   result[3] = 1; +} + +static void do_IN4( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   const GLfloat *src = get_input(m, op.inr.idx); +    +   result[0] = src[0]; +   result[1] = src[1]; +   result[2] = src[2]; +   result[3] = src[3]; +} + +/** + * Perform a reduced swizzle: + */ +static void do_RSW( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.rsw.dst]; +   const GLfloat *arg0 = m->reg[op.rsw.arg0]; +   GLuint swz = op.rsw.swz; +   GLuint neg = op.rsw.neg; +   GLuint i; + +   if (neg)  +      for (i = 0; i < 4; i++, swz >>= 2)  +	 result[i] = -arg0[swz & 0x3]; +   else +      for (i = 0; i < 4; i++, swz >>= 2)  +	 result[i] = arg0[swz & 0x3]; +} + + + +/** + * Store 4 floats into an external address. + */ +static void do_OUM( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr]; +   const GLfloat *value = m->reg[op.out.reg]; + +   if (op.out.mask & 0x1) dst[0] = value[0]; +   if (op.out.mask & 0x2) dst[1] = value[1]; +   if (op.out.mask & 0x4) dst[2] = value[2]; +   if (op.out.mask & 0x8) dst[3] = value[3]; +} + +static void do_OUT( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr]; +   const GLfloat *value = m->reg[op.out.reg]; + +   dst[0] = value[0]; +   dst[1] = value[1]; +   dst[2] = value[2]; +   dst[3] = value[3]; +} + +/* Register-to-register MOV with writemask. + */ +static void do_MSK( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *dst = m->reg[op.msk.dst]; +   const GLfloat *arg0 = m->reg[op.msk.arg0]; +  +   if (op.msk.mask & 0x1) dst[0] = arg0[0]; +   if (op.msk.mask & 0x2) dst[1] = arg0[1]; +   if (op.msk.mask & 0x4) dst[2] = arg0[2]; +   if (op.msk.mask & 0x8) dst[3] = arg0[3]; +} + + +/* Retreive parameters and other constant values: + */ +static void do_PAR( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   const GLfloat *src = m->File[op.inr.file][op.inr.idx]; + +   result[0] = src[0]; +   result[1] = src[1]; +   result[2] = src[2]; +   result[3] = src[3]; +} + + +#define RELADDR_MASK MAX_NV_VERTEX_PROGRAM_PARAMS + +static void do_PRL( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.inr.reg]; +   GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK; +   const GLfloat *src = m->File[op.inr.file][index]; + +   result[0] = src[0]; +   result[1] = src[1]; +   result[2] = src[2]; +   result[3] = src[3]; +} + +static void do_PRT( struct arb_vp_machine *m, union instruction op ) +{ +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +    +   _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,  +		arg0[0], arg0[1], arg0[2], arg0[3]); +} + + +/** + * The traditional ALU and texturing instructions.  All operate on + * internal registers and ignore write masks and swizzling issues. + */ + +static void do_ABS( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; + +   result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0]; +   result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1]; +   result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2]; +   result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3]; +} + +static void do_ADD( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = arg0[0] + arg1[0]; +   result[1] = arg0[1] + arg1[1]; +   result[2] = arg0[2] + arg1[2]; +   result[3] = arg0[3] + arg1[3]; +} + + +static void do_ARL( struct arb_vp_machine *m, union instruction op ) +{ +   const GLfloat *arg0 = m->reg[op.out.reg]; +   m->AddressReg = (GLint) floor(arg0[0]); +} + + +static void do_DP3( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; +   const GLfloat *arg1 = m->reg[op.scl.arg1]; + +   result[0] = (arg0[0] * arg1[0] +  +		arg0[1] * arg1[1] +  +		arg0[2] * arg1[2]); + +   PUFF(result); +} + +static void do_DP4( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; +   const GLfloat *arg1 = m->reg[op.scl.arg1]; + +   result[0] = (arg0[0] * arg1[0] +  +		arg0[1] * arg1[1] +  +		arg0[2] * arg1[2] +  +		arg0[3] * arg1[3]); + +   PUFF(result); +} + +static void do_DPH( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; +   const GLfloat *arg1 = m->reg[op.scl.arg1]; + +   result[0] = (arg0[0] * arg1[0] +  +		arg0[1] * arg1[1] +  +		arg0[2] * arg1[2] +  +		1.0     * arg1[3]); +    +   PUFF(result); +} + +static void do_DST( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = 1.0F; +   result[1] = arg0[1] * arg1[1]; +   result[2] = arg0[2]; +   result[3] = arg1[3]; +} + + +static void do_EX2( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; + +   result[0] = (GLfloat)RoughApproxPow2(arg0[0]); +   PUFF(result); +} + +static void do_EXP( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   GLfloat tmp = arg0[0]; +   GLfloat flr_tmp = FLOORF(tmp); + +   /* KW: previous definition of this instruction was really messed +    * up...  Maybe the nv instruction is quite different? +    */ +   result[0] = (GLfloat) (1 << (int)flr_tmp); +   result[1] = tmp - flr_tmp; +   result[2] = RoughApproxPow2(tmp); +   result[3] = 1.0F; +} + +static void do_FLR( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; + +   result[0] = FLOORF(arg0[0]); +   result[1] = FLOORF(arg0[1]); +   result[2] = FLOORF(arg0[2]); +   result[3] = FLOORF(arg0[3]); +} + +static void do_FRC( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; + +   result[0] = arg0[0] - FLOORF(arg0[0]); +   result[1] = arg0[1] - FLOORF(arg0[1]); +   result[2] = arg0[2] - FLOORF(arg0[2]); +   result[3] = arg0[3] - FLOORF(arg0[3]); +} + +static void do_LG2( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; + +   result[0] = RoughApproxLog2(arg0[0]); +   PUFF(result); +} + + + +static void do_LIT( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; + +   const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */ +   GLfloat tmp[4]; + +   tmp[0] = MAX2(arg0[0], 0.0F); +   tmp[1] = MAX2(arg0[1], 0.0F); +   tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon)); + +   result[0] = 1.0; +   result[1] = tmp[0]; +   result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F; +   result[3] = 1.0; +} + + +static void do_LOG( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   GLfloat tmp = FABSF(arg0[0]); +   int exponent; +   GLfloat mantissa = FREXPF(tmp, &exponent); + +   result[0] = (GLfloat) (exponent - 1); +   result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */ +   result[2] = result[0] + LOG2(result[1]); +   result[3] = 1.0; +} + + +static void do_MAD( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; +   const GLfloat *arg2 = m->reg[op.vec.arg2]; + +   result[0] = arg0[0] * arg1[0] + arg2[0]; +   result[1] = arg0[1] * arg1[1] + arg2[1]; +   result[2] = arg0[2] * arg1[2] + arg2[2]; +   result[3] = arg0[3] * arg1[3] + arg2[3]; +} + +static void do_MAX( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0]; +   result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1]; +   result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2]; +   result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3]; +} + + +static void do_MIN( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0]; +   result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1]; +   result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2]; +   result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3]; +} + +static void do_MOV( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; + +   result[0] = arg0[0]; +   result[1] = arg0[1]; +   result[2] = arg0[2]; +   result[3] = arg0[3]; +} + +static void do_MUL( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = arg0[0] * arg1[0]; +   result[1] = arg0[1] * arg1[1]; +   result[2] = arg0[2] * arg1[2]; +   result[3] = arg0[3] * arg1[3]; +} + + +static void do_POW( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; +   const GLfloat *arg1 = m->reg[op.scl.arg1]; + +   result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]); +   PUFF(result); +} + +static void do_RCP( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; + +   result[0] = 1.0F / arg0[0];   +   PUFF(result); +} + +static void do_RSQ( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.scl.dst]; +   const GLfloat *arg0 = m->reg[op.scl.arg0]; + +   result[0] = INV_SQRTF(FABSF(arg0[0])); +   PUFF(result); +} + + +static void do_SGE( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F; +   result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F; +   result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F; +   result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F; +} + + +static void do_SLT( struct arb_vp_machine *m, union instruction op ) +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F; +   result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F; +   result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F; +   result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F; +} + +static void do_SWZ( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.swz.dst]; +   const GLfloat *arg0 = m->reg[op.swz.arg0]; +   GLuint swz = op.swz.swz; +   GLuint neg = op.swz.neg; +   GLuint i; + +   for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) { +      switch (swz & 0x7) { +      case SWIZZLE_ZERO: result[i] = 0.0; break; +      case SWIZZLE_ONE:  result[i] = 1.0; break; +      default:           result[i] = arg0[swz & 0x7]; break; +      } +      if (neg & 0x1)     result[i] = -result[i]; +   } +} + +static void do_SUB( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = arg0[0] - arg1[0]; +   result[1] = arg0[1] - arg1[1]; +   result[2] = arg0[2] - arg1[2]; +   result[3] = arg0[3] - arg1[3]; +} + + +static void do_XPD( struct arb_vp_machine *m, union instruction op )  +{ +   GLfloat *result = m->reg[op.vec.dst]; +   const GLfloat *arg0 = m->reg[op.vec.arg0]; +   const GLfloat *arg1 = m->reg[op.vec.arg1]; + +   result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; +   result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; +   result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; +} + +static void do_NOP( struct arb_vp_machine *m, union instruction op )  +{ +} + +/* Some useful debugging functions: + */ +static void print_reg( GLuint reg ) +{ +   if (reg == REG_RES)  +      _mesa_printf("RES"); +   else if (reg >= REG_ARG0 && reg <= REG_ARG2) +      _mesa_printf("ARG%d", reg - REG_ARG0); +   else if (reg >= REG_TMP0 && reg < REG_TMP_MAX) +      _mesa_printf("TMP%d", reg - REG_TMP0); +   else if (reg >= REG_PAR0 && reg < REG_PAR_MAX) +      _mesa_printf("PAR%d", reg - REG_PAR0); +   else +      _mesa_printf("???");      +} + +static void print_mask( GLuint mask ) +{ +   _mesa_printf("."); +   if (mask&0x1) _mesa_printf("x"); +   if (mask&0x2) _mesa_printf("y"); +   if (mask&0x4) _mesa_printf("z"); +   if (mask&0x8) _mesa_printf("w"); +} + +static void print_extern( GLuint file, GLuint idx ) +{ +   static const char *reg_file[] = { +      "TEMPORARY", +      "INPUT", +      "OUTPUT", +      "LOCAL_PARAM", +      "ENV_PARAM", +      "NAMED_PARAM", +      "STATE_VAR", +      "WRITE_ONLY", +      "ADDRESS" +   }; + +   _mesa_printf("%s:%d", reg_file[file], idx); +} + + + +static void print_SWZ( union instruction op, const struct opcode_info *info ) +{ +   GLuint swz = op.swz.swz; +   GLuint neg = op.swz.neg; +   GLuint i; + +   _mesa_printf("%s ", info->string); +   print_reg(op.swz.dst); +   _mesa_printf(", "); +   print_reg(op.swz.arg0); +   _mesa_printf("."); +   for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) { +      const char *cswz = "xyzw01??"; +      if (neg & 0x1)    +	 _mesa_printf("-"); +      _mesa_printf("%c", cswz[swz&0x7]); +   } +   _mesa_printf("\n"); +} + +static void print_RSW( union instruction op, const struct opcode_info *info ) +{ +   GLuint swz = op.rsw.swz; +   GLuint neg = op.rsw.neg; +   GLuint i; + +   _mesa_printf("%s ", info->string); +   print_reg(op.rsw.dst); +   _mesa_printf(", "); +   print_reg(op.rsw.arg0); +   _mesa_printf("."); +   for (i = 0; i < 4; i++, swz >>= 2) { +      const char *cswz = "xyzw"; +      if (neg)    +	 _mesa_printf("-"); +      _mesa_printf("%c", cswz[swz&0x3]); +   } +   _mesa_printf("\n"); +} + + +static void print_SCL( union instruction op, const struct opcode_info *info ) +{ +   _mesa_printf("%s ", info->string); +   print_reg(op.scl.dst); +   _mesa_printf(", "); +   print_reg(op.scl.arg0); +   if (info->nr_args > 1) { +      _mesa_printf(", "); +      print_reg(op.scl.arg1); +   } +   _mesa_printf("\n"); +} + + +static void print_VEC( union instruction op, const struct opcode_info *info ) +{ +   _mesa_printf("%s ", info->string); +   print_reg(op.vec.dst); +   _mesa_printf(", "); +   print_reg(op.vec.arg0); +   if (info->nr_args > 1) { +      _mesa_printf(", "); +      print_reg(op.vec.arg1); +   } +   if (info->nr_args > 2) { +      _mesa_printf(", "); +      print_reg(op.vec.arg2); +   } +   _mesa_printf("\n"); +} + +static void print_MSK( union instruction op, const struct opcode_info *info ) +{ +   _mesa_printf("%s ", info->string); +   print_reg(op.msk.dst); +   print_mask(op.msk.mask); +   _mesa_printf(", "); +   print_reg(op.msk.arg0); +   _mesa_printf("\n"); +} + +static void print_IN( union instruction op, const struct opcode_info *info ) +{ +   _mesa_printf("%s ", info->string); +   print_reg(op.inr.reg); +   _mesa_printf(", "); +   print_extern(op.inr.file, op.inr.idx); +   _mesa_printf("\n"); +} + +static void print_OUT( union instruction op, const struct opcode_info *info ) +{ +   _mesa_printf("%s ", info->string); +   print_extern(op.out.file, op.out.idx); +   if (op.out.opcode == OUM) +      print_mask(op.out.mask); +   _mesa_printf(", "); +   print_reg(op.out.reg); +   _mesa_printf("\n"); +} + +static void print_NOP( union instruction op, const struct opcode_info *info ) +{ +} + +#define NOP 0 +#define VEC 1 +#define SCL 2 +#define SWZ 3 + +static const struct opcode_info opcode_info[] =  +{ +   { VEC, 1, "ABS", do_ABS, print_VEC }, +   { VEC, 2, "ADD", do_ADD, print_VEC }, +   { OUT, 1, "ARL", do_ARL, print_OUT }, +   { SCL, 2, "DP3", do_DP3, print_SCL }, +   { SCL, 2, "DP4", do_DP4, print_SCL }, +   { SCL, 2, "DPH", do_DPH, print_SCL }, +   { VEC, 2, "DST", do_DST, print_VEC }, +   { NOP, 0, "END", do_NOP, print_NOP }, +   { SCL, 1, "EX2", do_EX2, print_VEC }, +   { VEC, 1, "EXP", do_EXP, print_VEC }, +   { VEC, 1, "FLR", do_FLR, print_VEC }, +   { VEC, 1, "FRC", do_FRC, print_VEC }, +   { SCL, 1, "LG2", do_LG2, print_VEC }, +   { VEC, 1, "LIT", do_LIT, print_VEC }, +   { VEC, 1, "LOG", do_LOG, print_VEC }, +   { VEC, 3, "MAD", do_MAD, print_VEC }, +   { VEC, 2, "MAX", do_MAX, print_VEC }, +   { VEC, 2, "MIN", do_MIN, print_VEC }, +   { VEC, 1, "MOV", do_MOV, print_VEC }, +   { VEC, 2, "MUL", do_MUL, print_VEC }, +   { SCL, 2, "POW", do_POW, print_VEC }, +   { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */ +   { NOP, 1, "RCC", do_NOP, print_NOP }, +   { SCL, 1, "RCP", do_RCP, print_VEC }, +   { SCL, 1, "RSQ", do_RSQ, print_VEC }, +   { VEC, 2, "SGE", do_SGE, print_VEC }, +   { VEC, 2, "SLT", do_SLT, print_VEC }, +   { VEC, 2, "SUB", do_SUB, print_VEC }, +   { SWZ, 1, "SWZ", do_SWZ, print_SWZ }, +   { VEC, 2, "XPD", do_XPD, print_VEC }, +   { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */ +   { IN4, 1, "IN2", do_IN2, print_IN }, +   { IN4, 1, "IN3", do_IN3, print_IN }, +   { IN4, 1, "IN4", do_IN4, print_IN }, +   { OUT, 1, "OUT", do_OUT, print_OUT }, +   { OUT, 1, "OUM", do_OUM, print_OUT }, +   { SWZ, 1, "RSW", do_RSW, print_RSW }, +   { MSK, 1, "MSK", do_MSK, print_MSK }, +   { IN4, 1, "PAR", do_PAR, print_IN }, +   { IN4, 1, "PRL", do_PRL, print_IN }, +}; + + +static GLuint cvp_load_reg( struct compilation *cp, +			    GLuint file, +			    GLuint index, +			    GLuint rel ) +{ +   GLuint i, op; + +   if (file == PROGRAM_TEMPORARY) +      return index + REG_TMP0; + +   /* Don't try to cache relatively addressed values yet: +    */ +   if (!rel) { +      for (i = 0; i < REG_PAR_NR; i++) { +	 if ((cp->par_active & (1<<i)) && +	     cp->reg[i].file == file && +	     cp->reg[i].idx == index) { +	    cp->par_protected |= (1<<i); +	    return i + REG_PAR0; +	 } +      } +   } + +   /* Not already loaded, so identify a slot and load it.   +    * TODO: preload these values once only! +    * TODO: better eviction strategy! +    */ +   if (cp->par_active == ~0) { +      assert(cp->par_protected != ~0); +      cp->par_active = cp->par_protected; +   } + +   i = ffs(~cp->par_active); +   assert(i); +   i--; + + +   if (file == PROGRAM_INPUT)  +      op = IN1 + cp->VB->AttribPtr[index]->size - 1; +   else if (rel) +      op = PRL; +   else +      op = PAR; + +   cp->csr->dword = 0; +   cp->csr->inr.opcode = op; +   cp->csr->inr.reg = i + REG_PAR0; +   cp->csr->inr.file = file; +   cp->csr->inr.idx = index; +   cp->csr++; + +   cp->reg[i].file = file; +   cp->reg[i].idx = index; +   cp->par_protected |= (1<<i); +   cp->par_active |= (1<<i); +   return i + REG_PAR0; +} + +static void cvp_release_regs( struct compilation *cp ) +{ +   cp->par_protected = 0; +} + + + +static GLuint cvp_emit_arg( struct compilation *cp, +			    const struct vp_src_register *src, +			    GLuint arg ) +{ +   GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr ); +   union instruction rsw, noop; + +   /* Emit any necessary swizzling.   +    */ +   rsw.dword = 0; +   rsw.rsw.neg = src->Negate ? 1 : 0; +   rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) | +		  (GET_SWZ(src->Swizzle, 1) << 2) | +		  (GET_SWZ(src->Swizzle, 2) << 4) | +		  (GET_SWZ(src->Swizzle, 3) << 6)); + +   noop.dword = 0; +   noop.rsw.neg = 0; +   noop.rsw.swz = ((0<<0) | +		   (1<<2) | +		   (2<<4) | +		   (3<<6)); + +   if (rsw.dword != noop.dword) { +      GLuint rsw_reg = arg; +      cp->csr->dword = rsw.dword; +      cp->csr->rsw.opcode = RSW; +      cp->csr->rsw.arg0 = reg; +      cp->csr->rsw.dst = rsw_reg; +      cp->csr++; +      return rsw_reg; +   } +   else +      return reg; +} + +static GLuint cvp_choose_result( struct compilation *cp, +				 const struct vp_dst_register *dst, +				 union instruction *fixup, +				 GLuint maxreg) +{ +   GLuint mask = dst->WriteMask; + +   if (dst->File == PROGRAM_TEMPORARY) { +       +      /* Optimization: When writing (with a writemask) to an undefined +       * value for the first time, the writemask may be ignored.  In +       * practise this means that the MSK instruction to implement the +       * writemask can be dropped. +       */ +      if (dst->Index < maxreg && +	  (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) { +	 fixup->dword = 0; +	 cp->tmp_active |= (1<<dst->Index); +	 return REG_TMP0 + dst->Index; +      } +      else if (mask != 0xf) { +	 fixup->msk.opcode = MSK; +	 fixup->msk.arg0 = REG_RES; +	 fixup->msk.dst = REG_TMP0 + dst->Index; +	 fixup->msk.mask = mask; +	 cp->tmp_active |= (1<<dst->Index); +	 return REG_RES; +      } +      else { +	 fixup->vec.opcode = VP_OPCODE_MOV; +	 fixup->vec.arg0 = REG_RES; +	 fixup->vec.dst = REG_TMP0 + dst->Index; +	 cp->tmp_active |= (1<<dst->Index); +	 return REG_RES; +      } +   } +   else { +      assert(dst->File == PROGRAM_OUTPUT); +      fixup->out.opcode = (mask == 0xf) ? OUT : OUM; +      fixup->out.reg = REG_RES; +      fixup->out.file = dst->File; +      fixup->out.idx = dst->Index; +      fixup->out.mask = mask; +      return REG_RES; +   } +} + + +static void cvp_emit_inst( struct compilation *cp, +			   const struct vp_instruction *inst ) +{ +   const struct opcode_info *info = &opcode_info[inst->Opcode]; +   union instruction fixup; +   GLuint reg[3]; +   GLuint result, i; + +   /* Need to handle SWZ, ARL specially. +    */ +   switch (info->type) { +   case OUT: +      assert(inst->Opcode == VP_OPCODE_ARL); +      reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); + +      cp->csr->dword = 0; +      cp->csr->out.opcode = inst->Opcode; +      cp->csr->out.reg = reg[0]; +      cp->csr->out.file = PROGRAM_ADDRESS; +      cp->csr->out.idx = 0; +      break; +   case SWZ: +      assert(inst->Opcode == VP_OPCODE_SWZ); +      result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX ); + +      reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 ); + +      cp->csr->dword = 0; +      cp->csr->swz.opcode = VP_OPCODE_SWZ; +      cp->csr->swz.arg0 = reg[0]; +      cp->csr->swz.dst = result; +      cp->csr->swz.neg = inst->SrcReg[0].Negate; +      cp->csr->swz.swz = inst->SrcReg[0].Swizzle; +      cp->csr++; + +      if (result == REG_RES) { +	 cp->csr->dword = fixup.dword; +	 cp->csr++; +      } +      break; + +   case VEC: +   case SCL:			/* for now */ +      result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX ); + +      reg[0] = reg[1] = reg[2] = 0; + +      for (i = 0; i < info->nr_args; i++) +	 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i ); + +      cp->csr->dword = 0; +      cp->csr->vec.opcode = inst->Opcode; +      cp->csr->vec.arg0 = reg[0]; +      cp->csr->vec.arg1 = reg[1]; +      cp->csr->vec.arg2 = reg[2]; +      cp->csr->vec.dst = result; +      cp->csr++; + +      if (result == REG_RES) { +	 cp->csr->dword = fixup.dword; +	 cp->csr++; +      }      	  +      break; + + +   case NOP: +      break; + +   default: +      assert(0); +      break; +   } + +   cvp_release_regs( cp ); +} + + +static void compile_vertex_program( struct arb_vp_machine *m, +				    const struct vertex_program *program ) +{  +   struct compilation cp; +   GLuint i; + +   /* Initialize cp: +    */ +   memset(&cp, 0, sizeof(cp)); +   cp.VB = m->VB; +   cp.csr = m->store; + +   /* Compile instructions: +    */ +   for (i = 0; i < program->Base.NumInstructions; i++) { +      cvp_emit_inst(&cp, &program->Instructions[i]); +   } + +   /* Finish up: +    */ +   m->instructions = m->store; +   m->nr_instructions = cp.csr - m->store; + + +   /* Print/disassemble: +    */ +   if (0) { +      for (i = 0; i < m->nr_instructions; i++) { +	 union instruction insn = m->instructions[i]; +	 const struct opcode_info *info = &opcode_info[insn.vec.opcode]; +	 info->print( insn, info ); +      } +      _mesa_printf("\n\n"); +   } +} + + + + +/* ---------------------------------------------------------------------- + * Execution + */ +static void userclip( GLcontext *ctx, +		      GLvector4f *clip, +		      GLubyte *clipmask, +		      GLubyte *clipormask, +		      GLubyte *clipandmask ) +{ +   GLuint p; + +   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) +      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { +	 GLuint nr, i; +	 const GLfloat a = ctx->Transform._ClipUserPlane[p][0]; +	 const GLfloat b = ctx->Transform._ClipUserPlane[p][1]; +	 const GLfloat c = ctx->Transform._ClipUserPlane[p][2]; +	 const GLfloat d = ctx->Transform._ClipUserPlane[p][3]; +         GLfloat *coord = (GLfloat *)clip->data; +         GLuint stride = clip->stride; +         GLuint count = clip->count; + +	 for (nr = 0, i = 0 ; i < count ; i++) { +	    GLfloat dp = (coord[0] * a +  +			  coord[1] * b + +			  coord[2] * c + +			  coord[3] * d); + +	    if (dp < 0) { +	       nr++; +	       clipmask[i] |= CLIP_USER_BIT; +	    } + +	    STRIDE_F(coord, stride); +	 } + +	 if (nr > 0) { +	    *clipormask |= CLIP_USER_BIT; +	    if (nr == count) { +	       *clipandmask |= CLIP_USER_BIT; +	       return; +	    } +	 } +      } +} + + +static GLboolean do_ndc_cliptest( struct arb_vp_machine *m ) +{ +   GLcontext *ctx = m->ctx; +   TNLcontext *tnl = TNL_CONTEXT(ctx); +   struct vertex_buffer *VB = m->VB; + +   /* Cliptest and perspective divide.  Clip functions must clear +    * the clipmask. +    */ +   m->ormask = 0; +   m->andmask = CLIP_ALL_BITS; + +   if (tnl->NeedNdcCoords) { +      VB->NdcPtr = +         _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr, +                                            &m->ndcCoords, +                                            m->clipmask, +                                            &m->ormask, +                                            &m->andmask ); +   } +   else { +      VB->NdcPtr = NULL; +      _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr, +                                            NULL, +                                            m->clipmask, +                                            &m->ormask, +                                            &m->andmask ); +   } + +   if (m->andmask) { +      /* All vertices are outside the frustum */ +      return GL_FALSE; +   } + +   /* Test userclip planes.  This contributes to VB->ClipMask. +    */ +   if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) { +      userclip( ctx, +		VB->ClipPtr, +		m->clipmask, +		&m->ormask, +		&m->andmask ); + +      if (m->andmask) { +	 return GL_FALSE; +      } +   } + +   VB->ClipAndMask = m->andmask; +   VB->ClipOrMask = m->ormask; +   VB->ClipMask = m->clipmask; + +   return GL_TRUE; +} + + + + +/** + * Execute the given vertex program.   + *  + * TODO: Integrate the t_vertex.c code here, to build machine vertices + * directly at this point. + * + * TODO: Eliminate the VB struct entirely and just use + * struct arb_vertex_machine. + */ +static GLboolean +run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage) +{ +   struct vertex_program *program = (ctx->VertexProgram._Enabled ? +				     ctx->VertexProgram.Current : +				     &ctx->_TnlProgram); +   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; +   struct arb_vp_machine *m = ARB_VP_MACHINE(stage); +   GLuint i, j, outputs = program->OutputsWritten; + +   if (program->Parameters) { +      _mesa_load_state_parameters(ctx, program->Parameters); +      m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues; +   }    + +   /* Run the actual program: +    */ +   for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) { +      for (j = 0; j < m->nr_instructions; j++) { +	 union instruction inst = m->instructions[j];	  +	 opcode_info[inst.vec.opcode].func( m, inst ); +      } +   } + +   /* Setup the VB pointers so that the next pipeline stages get +    * their data from the right place (the program output arrays). +    * +    * TODO: 1) Have tnl use these RESULT values for outputs rather +    * than trying to shoe-horn inputs and outputs into one set of +    * values. +    * +    * TODO: 2) Integrate t_vertex.c so that we just go straight ahead +    * and build machine vertices here. +    */ +   VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS]; +   VB->ClipPtr->count = VB->Count; + +   if (outputs & (1<<VERT_RESULT_COL0)) { +      VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0]; +      VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0]; +   } + +   if (outputs & (1<<VERT_RESULT_BFC0)) { +      VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0]; +   } + +   if (outputs & (1<<VERT_RESULT_COL1)) { +      VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1]; +      VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0]; +   } + +   if (outputs & (1<<VERT_RESULT_BFC1)) { +      VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1]; +   } + +   if (outputs & (1<<VERT_RESULT_FOGC)) { +      VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC]; +      VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr; +   } + +   if (outputs & (1<<VERT_RESULT_PSIZ)) { +      VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ]; +      VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ]; +   } + +   for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { +      if (outputs & (1<<(VERT_RESULT_TEX0+i))) { +	 VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i]; +	 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i]; +      } +   } + +#if 0 +   for (i = 0; i < VB->Count; i++) { +      printf("Out %d: %f %f %f %f %f %f %f %f\n", i, +	     VEC_ELT(VB->ClipPtr, GLfloat, i)[0], +	     VEC_ELT(VB->ClipPtr, GLfloat, i)[1], +	     VEC_ELT(VB->ClipPtr, GLfloat, i)[2], +	     VEC_ELT(VB->ClipPtr, GLfloat, i)[3], +	     VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0], +	     VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1], +	     VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2], +	     VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]); +   } +#endif + +   /* Perform NDC and cliptest operations: +    */ +   return do_ndc_cliptest(m); +} + + +static void +validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage ) +{ +   struct arb_vp_machine *m = ARB_VP_MACHINE(stage); +   struct vertex_program *program = (ctx->VertexProgram._Enabled ? +				     ctx->VertexProgram.Current : +				     &ctx->_TnlProgram); + +   compile_vertex_program( m, program ); + +   /* Grab the state GL state and put into registers: +    */ +   m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams; +   m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters; +   m->File[PROGRAM_STATE_VAR] = 0; +} + + + + + + + +/** + * Called the first time stage->run is called.  In effect, don't + * allocate data until the first time the stage is run. + */ +static void init_vertex_program( GLcontext *ctx, +				 struct tnl_pipeline_stage *stage ) +{ +   TNLcontext *tnl = TNL_CONTEXT(ctx); +   struct vertex_buffer *VB = &(tnl->vb); +   struct arb_vp_machine *m; +   const GLuint size = VB->Size; +   GLuint i; + +   stage->privatePtr = MALLOC(sizeof(*m)); +   m = ARB_VP_MACHINE(stage); +   if (!m) +      return; + +   /* arb_vertex_machine struct should subsume the VB: +    */ +   m->VB = VB; +   m->ctx = ctx; + +   /* Allocate arrays of vertex output values */ +   for (i = 0; i < VERT_RESULT_MAX; i++) { +      _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 ); +      m->attribs[i].size = 4; +   } + +   /* a few other misc allocations */ +   _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 ); +   m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 ); +} + + + + +/** + * Destructor for this pipeline stage. + */ +static void dtr( struct tnl_pipeline_stage *stage ) +{ +   struct arb_vp_machine *m = ARB_VP_MACHINE(stage); + +   if (m) { +      GLuint i; + +      /* free the vertex program result arrays */ +      for (i = 0; i < VERT_RESULT_MAX; i++) +         _mesa_vector4f_free( &m->attribs[i] ); + +      /* free misc arrays */ +      _mesa_vector4f_free( &m->ndcCoords ); +      ALIGN_FREE( m->clipmask ); + +      FREE( m ); +      stage->privatePtr = NULL; +   } +} + +/** + * Public description of this pipeline stage. + */ +const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage = +{ +   "vertex-program", +   NULL,			/* private_data */ +   init_vertex_program,		/* create */ +   dtr,				/* destroy */ +   validate_vertex_program,	/* validate */ +   run_arb_vertex_program	/* run */ +}; | 
