diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r-- | src/mesa/drivers/dri/r300/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_cmdbuf.c | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.c | 23 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_context.h | 95 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_emit.c | 73 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_emit.h | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_ioctl.c | 24 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_ioctl.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_reg.h | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_render.c | 30 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_shader.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 405 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.h | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_swtcl.c | 712 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_swtcl.h | 45 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_texmem.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 1679 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/radeon_lock.h | 3 |
19 files changed, 2099 insertions, 1062 deletions
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index c1d223c760..44248964fd 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -41,6 +41,7 @@ DRIVER_SOURCES = \ r300_fragprog.c \ r300_shader.c \ r300_emit.c \ + r300_swtcl.c \ $(EGL_SOURCES) C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 7055286ba9..9eca41fa38 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -321,8 +321,12 @@ void r300InitCmdBuf(r300ContextPtr r300) r300->hw.unk221C.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_221C, 1); ALLOC_STATE(vap_clip, always, 5, 0); r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_CLIP_X_0, 4); - ALLOC_STATE(unk2288, always, 2, 0); - r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + + if (has_tcl) { + ALLOC_STATE(unk2288, always, 2, 0); + r300->hw.unk2288.cmd[0] = cmdpacket0(R300_VAP_UNKNOWN_2288, 1); + } + ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); r300->hw.vof.cmd[R300_VOF_CMD_0] = cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 9ea14ab4c7..14e0f052fd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -63,6 +63,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_swtcl.h" #ifdef USER_BUFFERS #include "r300_mem.h" @@ -317,15 +318,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + if (screen->chip_flags & RADEON_CHIPSET_TCL) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ @@ -363,6 +366,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, radeonInitSpanFuncs(ctx); r300InitCmdBuf(r300); r300InitState(r300); + if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) + r300InitSwtcl(ctx); TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 076bb49a00..be6909724a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -568,38 +568,21 @@ struct r300_vertex_shader_fragment { union { GLuint d[VSF_MAX_FRAGMENT_LENGTH]; float f[VSF_MAX_FRAGMENT_LENGTH]; - VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4]; + GLuint i[VSF_MAX_FRAGMENT_LENGTH]; } body; }; -#define VSF_DEST_PROGRAM 0x0 -#define VSF_DEST_MATRIX0 0x200 -#define VSF_DEST_MATRIX1 0x204 -#define VSF_DEST_MATRIX2 0x208 -#define VSF_DEST_VECTOR0 0x20c -#define VSF_DEST_VECTOR1 0x20d -#define VSF_DEST_UNKNOWN1 0x400 -#define VSF_DEST_UNKNOWN2 0x406 - struct r300_vertex_shader_state { struct r300_vertex_shader_fragment program; - - struct r300_vertex_shader_fragment unknown1; - struct r300_vertex_shader_fragment unknown2; - - int program_start; - int unknown_ptr1; /* pointer within program space */ - int program_end; - - int param_offset; - int param_count; - - int unknown_ptr2; /* pointer within program space */ - int unknown_ptr3; /* pointer within program space */ }; extern int hw_tcl_on; +#define COLOR_IS_RGBA +#define TAG(x) r300##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + //#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) #define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) @@ -796,13 +779,10 @@ struct r300_state { GLuint *Elts; struct r300_dma_region elt_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. + struct r300_dma_region swtcl_dma; + DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. They are the same as tnl->render_inputs for fixed pipeline */ - struct { - int transform_offset; /* Transform matrix offset, -1 if none */ - } vap_param; /* vertex processor parameter allocation - tells where to write parameters */ - struct r300_stencilbuffer_state stencil; }; @@ -811,6 +791,62 @@ struct r300_state { #define R300_FALLBACK_TCL 1 #define R300_FALLBACK_RAST 2 +/* r300_swtcl.c + */ +struct r300_swtcl_info { + GLuint RenderIndex; + + /** + * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is + * installed in the Mesa state vector. + */ + GLuint vertex_size; + + /** + * Attributes instructing the Mesa TCL pipeline where / how to put vertex + * data in the hardware buffer. + */ + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + + /** + * Number of elements of \c ::vertex_attrs that are actually used. + */ + GLuint vertex_attr_count; + + /** + * Cached pointer to the buffer where Mesa will store vertex data. + */ + GLubyte *verts; + + /* Fallback rasterization functions + */ + // r200_point_func draw_point; + // r200_line_func draw_line; + // r200_tri_func draw_tri; + + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + /** + * Offset of the 4UB color data within a hardware (swtcl) vertex. + */ + GLuint coloroffset; + + /** + * Offset of the 3UB specular color data within a hardware (swtcl) vertex. + */ + GLuint specoffset; + + /** + * Should Mesa project vertex data or will the hardware do it? + */ + GLboolean needproj; + + struct r300_dma_region indexed_verts; +}; + + /** * \brief R300 context structure. */ @@ -849,6 +885,9 @@ struct r300_context { GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; GLboolean disable_lowimpact_fallback; + + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + struct r300_swtcl_info swtcl; }; struct r300_buffer_object { diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 4670c28a02..424bf44e59 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -217,14 +217,14 @@ static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[i]] << 8) | (attribptr[tab[i]]->size - 1); dw |= (R300_INPUT_ROUTE_FLOAT | (inputs[tab[i + 1]] << 8) | (attribptr[tab[i + 1]]->size - 1)) << 16; if (i + 2 == nr) { - dw |= (1 << (13 + 16)); + dw |= (R300_VAP_INPUT_ROUTE_END << 16); } dst[i >> 1] = dw; } if (nr & 1) { dw = R300_INPUT_ROUTE_FLOAT | (inputs[tab[nr - 1]] << 8) | (attribptr[tab[nr - 1]]->size - 1); - dw |= 1 << 13; + dw |= R300_VAP_INPUT_ROUTE_END; dst[nr >> 1] = dw; } @@ -239,7 +239,7 @@ static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) (swizzle[3] << R300_INPUT_ROUTE_W_SHIFT); } -static GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) +GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) { GLuint i; @@ -255,14 +255,14 @@ static GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) return (nr + 1) >> 1; } -static GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) +GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { /* No idea what this value means. I have seen other values written to * this register... */ return 0x5555; } -static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) +GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) { r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint i, vic_1 = 0; @@ -286,7 +286,7 @@ static GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) return vic_1; } -static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) { GLuint ret = 0; @@ -299,13 +299,14 @@ static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) if (OutputsWritten & (1 << VERT_RESULT_COL1)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; -#if 0 - if (OutputsWritten & (1 << VERT_RESULT_BFC0)) - ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_BFC1)) - ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + if (OutputsWritten & (1 << VERT_RESULT_BFC0) + || OutputsWritten & (1 << VERT_RESULT_BFC1)) + ret |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; +#if 0 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; #endif @@ -315,7 +316,7 @@ static GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) return ret; } -static GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) { GLuint i, ret = 0; @@ -358,9 +359,11 @@ int r300EmitArrays(GLcontext * ctx) DECLARE_RENDERINPUTS(render_inputs_bitset); RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); + vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; + assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); - assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); + //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { InputsRead |= 1 << VERT_ATTRIB_POS; @@ -392,20 +395,18 @@ int r300EmitArrays(GLcontext * ctx) } } - if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { - /* Fixed, apply to vir0 only */ - memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); - inputs = vir_inputs; - if (InputsRead & VERT_ATTRIB_POS) - inputs[VERT_ATTRIB_POS] = 0; - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - } + /* Fixed, apply to vir0 only */ + memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); + inputs = vir_inputs; + if (InputsRead & VERT_ATTRIB_POS) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); } @@ -532,3 +533,19 @@ void r300ReleaseArrays(GLcontext * ctx) r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); } } + +void r300EmitCacheFlush(r300ContextPtr rmesa) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); + e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); + + reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); + e32(R300_RB3D_ZCACHE_UNKNOWN_03); + + +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 2f79ee3a23..a6d69ec5ff 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -225,5 +225,15 @@ void r300UseArrays(GLcontext * ctx); #endif extern void r300ReleaseArrays(GLcontext * ctx); +extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); +extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); + +extern void r300EmitCacheFlush(r300ContextPtr rmesa); + +extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); +extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); #endif diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 15c2cf3ad7..90f5027c9a 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -172,11 +172,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); } @@ -412,19 +408,22 @@ static void r300Clear(GLcontext * ctx, GLbitfield mask) void r300Flush(GLcontext * ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300->cmdbuf.count_used > r300->cmdbuf.count_reemit) - r300FlushCmdBuf(r300, __FUNCTION__); + if (rmesa->dma.flush) + rmesa->dma.flush( rmesa ); + + if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) + r300FlushCmdBuf(rmesa, __FUNCTION__); } #ifdef USER_BUFFERS #include "r300_mem.h" -static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) +void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) { struct r300_dma_buffer *dmabuf; size = MAX2(size, RADEON_BUFFER_SIZE * 16); @@ -436,9 +435,12 @@ static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) rmesa->dma.flush(rmesa); } - if (rmesa->dma.current.buf) + if (rmesa->dma.current.buf) { +#ifdef USER_BUFFERS + r300_mem_use(rmesa, rmesa->dma.current.buf->id); +#endif r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); - + } if (rmesa->dma.nr_released_bufs > 4) r300FlushCmdBuf(rmesa, __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h index 7a19a2cf3f..e1143fb6c3 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.h +++ b/src/mesa/drivers/dri/r300/r300_ioctl.h @@ -56,4 +56,5 @@ extern void r300AllocDmaRegion(r300ContextPtr rmesa, extern void r300InitIoctlFuncs(struct dd_function_table *functions); +extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size); #endif /* __R300_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 3ce09c16d3..1baa74c526 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -282,9 +282,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 # define R300_PVS_UPLOAD_PROGRAM 0x00000000 +/* gap */ # define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +/* gap */ +# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 +# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 +# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 +# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 +# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 +# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 # define R300_PVS_UPLOAD_POINTSIZE 0x00000406 +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + /* gap */ #define R300_VAP_PVS_UPLOAD_DATA 0x2208 @@ -336,13 +359,15 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * The meaning of the two UNKNOWN fields is obviously not known. However, * experiments so far have shown that both *must* point to an instruction * inside the vertex program, otherwise the GPU locks up. + * * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and - * CNTL_1_UNKNOWN points to instruction where last write to position takes - * place. + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * * Most likely this is used to ignore rest of the program in cases * where group of verts arent visible. For some reason this "section" * is sometimes accepted other instruction that have no relationship with - *position calculations. + * position calculations. */ #define R300_VAP_PVS_CNTL_1 0x22D0 # define R300_PVS_CNTL_1_PROGRAM_START_SHIFT 0 diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 83999307b5..eee1e803a0 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -79,7 +79,7 @@ extern int future_hw_tcl_on; /** * \brief Convert a OpenGL primitive type into a R300 primitive type. */ -static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { switch (prim & PRIM_MODE_MASK) { case GL_POINTS: @@ -119,7 +119,7 @@ static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) } } -static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { int verts_off = 0; @@ -261,7 +261,7 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; - type = r300PrimitiveType(rmesa, ctx, prim); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); if (type < 0 || num_verts <= 0) @@ -287,29 +287,20 @@ static GLboolean r300RunRender(GLcontext * ctx, { r300ContextPtr rmesa = R300_CONTEXT(ctx); int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (hw_tcl_on == GL_FALSE) - vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; r300UpdateShaders(rmesa); if (r300EmitArrays(ctx)) return GL_TRUE; r300UpdateShaderStates(rmesa); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); for (i = 0; i < vb->PrimitiveCount; i++) { @@ -319,11 +310,7 @@ static GLboolean r300RunRender(GLcontext * ctx, r300RunRenderPrimitive(rmesa, ctx, start, end, prim); } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); + r300EmitCacheFlush(rmesa); #ifdef USER_BUFFERS r300UseArrays(ctx); @@ -384,12 +371,17 @@ static int r300Fallback(GLcontext * ctx) static GLboolean r300RunNonTCLRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (r300Fallback(ctx) >= R300_FALLBACK_RAST) return GL_TRUE; + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + return r300RunRender(ctx, stage); } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 5f5ac7c4c7..77abf86a8e 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -1,8 +1,7 @@ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" +#include "main/glheader.h" + +#include "shader/program.h" #include "tnl/tnl.h" #include "r300_context.h" #include "r300_fragprog.h" diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index bdd6855802..088216c76e 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -82,6 +82,8 @@ static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], color[1], color[2]); + rmesa->hw.blend_color.cmd[2] = 0; + rmesa->hw.blend_color.cmd[3] = 0; } /** @@ -317,20 +319,34 @@ static void r300UpdateCulling(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); uint32_t val = 0; - R300_STATECHANGE(r300, cul); if (ctx->Polygon.CullFlag) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - val = R300_CULL_FRONT | R300_CULL_BACK; - else if (ctx->Polygon.CullFaceMode == GL_FRONT) + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: val = R300_CULL_FRONT; - else + break; + case GL_BACK: val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } - if (ctx->Polygon.FrontFace == GL_CW) - val |= R300_FRONT_FACE_CW; - else - val |= R300_FRONT_FACE_CCW; + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; } + + R300_STATECHANGE(r300, cul); r300->hw.cul.cmd[R300_CUL_CULL] = val; } @@ -344,6 +360,20 @@ static void r300SetEarlyZState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zstencil_format); + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); + _mesa_exit(-1); + } + + // r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; @@ -355,6 +385,9 @@ static void r300SetEarlyZState(GLcontext * ctx) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; } + + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; } static void r300SetAlphaState(GLcontext * ctx) @@ -403,6 +436,7 @@ static void r300SetAlphaState(GLcontext * ctx) R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300SetEarlyZState(ctx); } @@ -513,6 +547,9 @@ static void r300UpdatePolygonMode(GLcontext * ctx) R300_STATECHANGE(r300, polygon_mode); r300->hw.polygon_mode.cmd[1] = hw_mode; } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; } /** @@ -696,8 +733,8 @@ static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) static void r300PointSize(GLcontext * ctx, GLfloat size) { r300ContextPtr r300 = R300_CONTEXT(ctx); - - size = ctx->Point._Size; + /* same size limits for AA, non-AA points */ + size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); R300_STATECHANGE(r300, ps); r300->hw.ps.cmd[R300_PS_POINTSIZE] = @@ -712,8 +749,9 @@ static void r300LineWidth(GLcontext * ctx, GLfloat widthf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - widthf = ctx->Line._Width; - + widthf = CLAMP(widthf, + ctx->Const.MinPointSize, + ctx->Const.MaxPointSize); R300_STATECHANGE(r300, lcntl); r300->hw.lcntl.cmd[1] = R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0); @@ -762,6 +800,7 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; switch (mode) { case GL_FLAT: rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; @@ -772,6 +811,8 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) default: return; } + rmesa->hw.shade.cmd[3] = 0x00000000; + rmesa->hw.shade.cmd[4] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, @@ -1477,9 +1518,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ }while(0) -void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, struct - r300_vertex_shader_fragment - *vsf) +static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) { int i; @@ -1487,8 +1526,7 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s return; if (vsf->length & 0x3) { - fprintf(stderr, - "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); + fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); _mesa_exit(-1); } @@ -1496,147 +1534,101 @@ void static inline setup_vertex_shader_fragment(r300ContextPtr r300, int dest, s case 0: R300_STATECHANGE(r300, vpi); for (i = 0; i < vsf->length; i++) - r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpi.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); break; case 2: R300_STATECHANGE(r300, vpp); for (i = 0; i < vsf->length; i++) - r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + - 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpp.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); break; case 4: R300_STATECHANGE(r300, vps); for (i = 0; i < vsf->length; i++) - r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = - (vsf->body.d[i]); - bump_vpu_count(r300->hw.vps.cmd, - vsf->length + 4 * (dest & 0xff)); + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); break; default: - fprintf(stderr, - "%s:%s don't know how to handle dest %04x\n", - __FILE__, __FUNCTION__, dest); + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); _mesa_exit(-1); } } -/* just a skeleton for now.. */ - -/* Generate a vertex shader that simply transforms vertex and texture coordinates, - while leaving colors intact. Nothing fancy (like lights) - - If implementing lights make a copy first, so it is easy to switch between the two versions */ -static void r300GenerateSimpleVertexShader(r300ContextPtr r300) +static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) { - int i; + struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); GLuint o_reg = 0; - - /* Allocate parameters */ - r300->state.vap_param.transform_offset = 0x0; /* transform matrix */ - r300->state.vertex_shader.param_offset = 0x0; - r300->state.vertex_shader.param_count = 0x4; /* 4 vector values - 4x4 matrix */ - - r300->state.vertex_shader.program_start = 0x0; - r300->state.vertex_shader.unknown_ptr1 = 0x4; /* magic value ? */ - r300->state.vertex_shader.program_end = 0x0; - - r300->state.vertex_shader.unknown_ptr2 = 0x0; /* magic value */ - r300->state.vertex_shader.unknown_ptr3 = 0x4; /* magic value */ - - r300->state.vertex_shader.unknown1.length = 0; - r300->state.vertex_shader.unknown2.length = 0; - -#define WRITE_OP(oper,source1,source2,source3) {\ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].op=(oper); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[0]=(source1); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[1]=(source2); \ - r300->state.vertex_shader.program.body.i[r300->state.vertex_shader.program_end].src[2]=(source3); \ - r300->state.vertex_shader.program_end++; \ - } - - for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) - if (r300->state.sw_tcl_inputs[i] != -1) { - WRITE_OP(EASY_VSF_OP(MUL, o_reg++, ALL, RESULT), - VSF_REG(r300->state.sw_tcl_inputs[i]), - VSF_ATTR_UNITY(r300->state. - sw_tcl_inputs[i]), - VSF_UNITY(r300->state.sw_tcl_inputs[i]) - ) - + int i; + int inst_count = 0; + int param_count = 0; + int program_end = 0; + + for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { + if (rmesa->state.sw_tcl_inputs[i] != -1) { + prog->program.body.i[program_end + 0] = EASY_VSF_OP(MUL, o_reg++, ALL, RESULT); + prog->program.body.i[program_end + 1] = VSF_REG(rmesa->state.sw_tcl_inputs[i]); + prog->program.body.i[program_end + 2] = VSF_ATTR_UNITY(rmesa->state.sw_tcl_inputs[i]); + prog->program.body.i[program_end + 3] = VSF_UNITY(rmesa->state.sw_tcl_inputs[i]); + program_end += 4; } + } - r300->state.vertex_shader.program_end--; /* r300 wants program length to be one more - no idea why */ - r300->state.vertex_shader.program.length = - (r300->state.vertex_shader.program_end + 1) * 4; + prog->program.length = program_end; - r300->state.vertex_shader.unknown_ptr1 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr2 = r300->state.vertex_shader.program_end; /* magic value ? */ - r300->state.vertex_shader.unknown_ptr3 = r300->state.vertex_shader.program_end; /* magic value ? */ + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, + &(prog->program)); + inst_count = (prog->program.length / 4) - 1; + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = + (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | + (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | + (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = + (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } -static void r300SetupVertexProgram(r300ContextPtr rmesa) +static void r300SetupRealVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; - int inst_count; - int param_count; - struct r300_vertex_program *prog = - (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + int inst_count = 0; + int param_count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + /* FIXME: r300SetupVertexProgramFragment */ R300_STATECHANGE(rmesa, vpp); param_count = - r300VertexProgUpdateParams(ctx, (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current /*prog */ , + r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, (float *)&rmesa->hw.vpp. cmd[R300_VPP_PARAM_0]); bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - /* Reset state, in case we don't use something */ - ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); - -#if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); -#endif - - inst_count = prog->program.length / 4 - 1; + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); + inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (inst_count /*pos_end */ << R300_PVS_CNTL_1_POS_END_SHIFT) - | (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); + (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) | + (inst_count << R300_PVS_CNTL_1_POS_END_SHIFT) | + (inst_count << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); + (0 << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) | + (param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (0 /*rmesa->state.vertex_shader.unknown_ptr2 */ << - R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (inst_count /*rmesa->state.vertex_shader.unknown_ptr3 */ << - 0); - - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE, 0); - e32(0x00000000); -#endif + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) | + (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT); } -static void r300SetupVertexShader(r300ContextPtr rmesa) +static void r300SetupVertexProgram(r300ContextPtr rmesa) { GLcontext *ctx = rmesa->radeon.glCtx; @@ -1649,46 +1641,16 @@ static void r300SetupVertexShader(r300ContextPtr rmesa) 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); - if (hw_tcl_on - && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))-> - translated) { - r300SetupVertexProgram(rmesa); - return; + if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { + r300SetupRealVertexProgram(rmesa); + } else { + /* FIXME: This needs to be replaced by vertex shader generation code. */ + r300SetupDefaultVertexProgram(rmesa); } - /* This needs to be replaced by vertex shader generation code */ - r300GenerateSimpleVertexShader(rmesa); - - setup_vertex_shader_fragment(rmesa, VSF_DEST_PROGRAM, - &(rmesa->state.vertex_shader.program)); -#if 0 - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN1, - &(rmesa->state.vertex_shader.unknown1)); - setup_vertex_shader_fragment(rmesa, VSF_DEST_UNKNOWN2, - &(rmesa->state.vertex_shader.unknown2)); -#endif - - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (rmesa->state.vertex_shader. - program_start << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) - | (rmesa->state.vertex_shader. - unknown_ptr1 << R300_PVS_CNTL_1_POS_END_SHIFT) - | (rmesa->state.vertex_shader. - program_end << R300_PVS_CNTL_1_PROGRAM_END_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (rmesa->state.vertex_shader. - param_offset << R300_PVS_CNTL_2_PARAM_OFFSET_SHIFT) - | (rmesa->state.vertex_shader. - param_count << R300_PVS_CNTL_2_PARAM_COUNT_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (rmesa->state.vertex_shader. - unknown_ptr2 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT) - | (rmesa->state.vertex_shader.unknown_ptr3 << 0); - - /* This is done for vertex shader fragments, but also needs to be done for vap_pvs, - so I leave it as a reminder */ + /* FIXME: This is done for vertex shader fragments, but also needs to be + * done for vap_pvs, so I leave it as a reminder. */ #if 0 reg_start(R300_VAP_PVS_WAITIDLE, 0); e32(0x00000000); @@ -1862,13 +1824,15 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */ /* XXX: Other families? */ - switch (r300->radeon.radeonScreen->chip_family) { - case CHIP_FAMILY_R300: - r300->hw.unk2288.cmd[1] = R300_2288_R300; - break; - default: - r300->hw.unk2288.cmd[1] = R300_2288_RV350; - break; + if (has_tcl) { + switch (r300->radeon.radeonScreen->chip_family) { + case CHIP_FAMILY_R300: + r300->hw.unk2288.cmd[1] = R300_2288_R300; + break; + default: + r300->hw.unk2288.cmd[1] = R300_2288_RV350; + break; + } } r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE @@ -1926,19 +1890,16 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); - r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.shade.cmd[3] = 0x00000000; - r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.polygon_mode.cmd[2] = 0x00000001; - r300->hw.polygon_mode.cmd[3] = 0x00000000; r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits); + r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint); + r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine); r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill); r300->hw.unk42C0.cmd[1] = 0x4B7FFFFF; @@ -1963,14 +1924,11 @@ static void r300ResetHwState(r300ContextPtr r300) r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); - r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300->hw.unk4BD8.cmd[1] = 0; r300->hw.unk4E00.cmd[1] = 0; r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.blend_color.cmd[2] = 0; - r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = @@ -2001,25 +1959,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4EA0.cmd[1] = 0x00000000; r300->hw.unk4EA0.cmd[2] = 0xffffffff; - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - - } - /* z compress? */ - //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - - r300->hw.zstencil_format.cmd[3] = 0x00000003; - r300->hw.zstencil_format.cmd[4] = 0x00000000; - r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + r300->radeon.radeonScreen->fbLocation; @@ -2092,7 +2031,6 @@ void r300UpdateShaders(r300ContextPtr rmesa) } r300UpdateStateParameters(ctx, _NEW_PROGRAM); } - } static void r300SetupPixelShader(r300ContextPtr rmesa) @@ -2111,62 +2049,61 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } -#define OUTPUT_FIELD(st, reg, field) \ - R300_STATECHANGE(rmesa, st); \ - for(i=0;i<=fp->alu_end;i++) \ - rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=fp->alu.inst[i].field;\ - rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmdpacket0(reg, fp->alu_end+1); - OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0); - OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1); - OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2); - OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3); -#undef OUTPUT_FIELD + R300_STATECHANGE(rmesa, fpi[0]); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR0_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; + } + + R300_STATECHANGE(rmesa, fpi[1]); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR1_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; + } + + R300_STATECHANGE(rmesa, fpi[2]); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR2_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; + } + + R300_STATECHANGE(rmesa, fpi[3]); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_PFS_INSTR3_0, fp->alu_end + 1); + for (i = 0; i <= fp->alu_end; i++) { + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; + } R300_STATECHANGE(rmesa, fp); + rmesa->hw.fp.cmd[R300_FP_CNTL0] = fp->cur_node | (fp->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = + (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | + (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); /* I just want to say, the way these nodes are stored.. weird.. */ for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { if (i < (fp->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i]. - alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) - | (fp->node[i]. - alu_end << R300_PFS_NODE_ALU_END_SHIFT) - | (fp->node[i]. - tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) - | (fp->node[i]. - tex_end << R300_PFS_NODE_TEX_END_SHIFT) - | fp->node[i].flags; + (fp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT) | + (fp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT) | + (fp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT) | + (fp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT) | + fp->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; } } - /* PFS_CNTL_0 */ - rmesa->hw.fp.cmd[R300_FP_CNTL0] = - fp->cur_node | (fp->first_node_has_tex << 3); - /* PFS_CNTL_1 */ - rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; - /* PFS_CNTL_2 */ - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) - | (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) - | (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) - | (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); - R300_STATECHANGE(rmesa, fpp); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = - r300PackFloat24(fp->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = - r300PackFloat24(fp->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = - r300PackFloat24(fp->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = - r300PackFloat24(fp->constant[i][3]); - } - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = - cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + } } void r300UpdateShaderStates(r300ContextPtr rmesa) @@ -2180,7 +2117,7 @@ void r300UpdateShaderStates(r300ContextPtr rmesa) r300SetupTextures(ctx); if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - r300SetupVertexShader(rmesa); + r300SetupVertexProgram(rmesa); r300SetupRSUnit(ctx); } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 21a49b7f36..365f7ecd0c 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -37,8 +37,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" +#define R300_NEWPRIM( rmesa ) \ + do { \ + if ( rmesa->dma.flush ) \ + rmesa->dma.flush( rmesa ); \ + } while (0) + #define R300_STATECHANGE(r300, atom) \ do { \ + R300_NEWPRIM(r300); \ r300->hw.atom.dirty = GL_TRUE; \ r300->hw.is_dirty = GL_TRUE; \ } while(0) diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c new file mode 100644 index 0000000000..a732bdb559 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -0,0 +1,712 @@ +/************************************************************************** + +Copyright (C) 2007 Dave Airlie + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Dave Airlie <airlied@linux.ie> + */ + +/* derived from r200 swtcl path */ + + + +#include "glheader.h" +#include "mtypes.h" +#include "colormac.h" +#include "enums.h" +#include "image.h" +#include "imports.h" +#include "light.h" +#include "macros.h" + +#include "swrast/s_context.h" +#include "swrast/s_fog.h" +#include "swrast_setup/swrast_setup.h" +#include "math/m_translate.h" +#include "tnl/tnl.h" +#include "tnl/t_context.h" +#include "tnl/t_pipeline.h" + +#include "r300_context.h" +#include "r300_swtcl.h" +#include "r300_state.h" +#include "r300_ioctl.h" +#include "r300_emit.h" +#include "r300_mem.h" + +static void flush_last_swtcl_prim( r300ContextPtr rmesa ); + + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); +#define EMIT_ATTR( ATTR, STYLE ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +#define EMIT_PAD( N ) \ +do { \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ + rmesa->swtcl.vertex_attr_count++; \ +} while (0) + +/* this differs from the VIR0 in emit.c - TODO merge them using another option */ +static GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, + int *inputs, GLint * tab, GLuint nr) +{ + GLuint i, dw; + + /* type, inputs, stop bit, size */ + for (i = 0; i + 1 < nr; i += 2) { + dw = (inputs[tab[i]] << 8) | 0x3; + dw |= ((inputs[tab[i + 1]] << 8) | 0x3) << 16; + if (i + 2 == nr) { + dw |= (R300_VAP_INPUT_ROUTE_END << 16); + } + dst[i >> 1] = dw; + } + + if (nr & 1) { + dw = (inputs[tab[nr - 1]] << 8) | 0x3; + dw |= R300_VAP_INPUT_ROUTE_END; + dst[nr >> 1] = dw; + } + + return (nr + 1) >> 1; +} + +static void r300SetVertexFormat( GLcontext *ctx ) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + DECLARE_RENDERINPUTS(index_bitset); + GLuint InputsRead = 0, OutputsWritten = 0; + int vap_fmt_0 = 0; + int vap_vte_cntl = 0; + int offset = 0; + int vte = 0; + GLint inputs[VERT_ATTRIB_MAX]; + GLint tab[VERT_ATTRIB_MAX]; + int swizzle[VERT_ATTRIB_MAX][4]; + GLuint i, nr; + + DECLARE_RENDERINPUTS(render_inputs_bitset); + RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); + RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); + RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + + /* Important: + */ + if ( VB->NdcPtr != NULL ) { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; + } + else { + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + } + + assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); + rmesa->swtcl.vertex_attr_count = 0; + + /* EMIT_ATTR's must be in order as they tell t_vertex.c how to + * build up a hardware vertex. + */ + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { + vap_vte_cntl |= R300_VTX_W0_FMT; + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + } else + EMIT_PAD(4 * sizeof(float)); + + offset = 4; + + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + vap_fmt_0 |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + offset += 1; + } + + if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { + rmesa->swtcl.coloroffset = offset; + InputsRead |= 1 << VERT_ATTRIB_COLOR0; + OutputsWritten |= 1 << VERT_RESULT_COL0; + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4F ); + } + + offset += 4; + + rmesa->swtcl.specoffset = 0; + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { + rmesa->swtcl.specoffset = offset; + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4F ); + InputsRead |= 1 << VERT_ATTRIB_COLOR1; + OutputsWritten |= 1 << VERT_RESULT_COL1; + } + + if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { + InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); + OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_4F ); + } + } + } + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + inputs[i] = nr++; + } else { + inputs[i] = -1; + } + } + + /* Fixed, apply to vir0 only */ + if (InputsRead & (1 << VERT_ATTRIB_POS)) + inputs[VERT_ATTRIB_POS] = 0; + if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) + inputs[VERT_ATTRIB_COLOR0] = 2; + if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) + inputs[VERT_ATTRIB_COLOR1] = 3; + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) + if (InputsRead & (1 << i)) + inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); + + for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) { + tab[nr++] = i; + } + } + + for (i = 0; i < nr; i++) { + int ci; + + swizzle[i][0] = SWIZZLE_ZERO; + swizzle[i][1] = SWIZZLE_ZERO; + swizzle[i][2] = SWIZZLE_ZERO; + swizzle[i][3] = SWIZZLE_ONE; + + for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { + swizzle[i][ci] = ci; + } + } + + R300_NEWPRIM(rmesa); + R300_STATECHANGE(rmesa, vir[0]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = + r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], + VB->AttribPtr, inputs, tab, nr); + R300_STATECHANGE(rmesa, vir[1]); + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = + r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, + nr); + + R300_STATECHANGE(rmesa, vic); + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + + R300_STATECHANGE(rmesa, vof); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten); + + rmesa->swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->swtcl.vertex_attrs, + rmesa->swtcl.vertex_attr_count, + NULL, 0 ); + + rmesa->swtcl.vertex_size /= 4; + + RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + + vte = rmesa->hw.vte.cmd[1]; + R300_STATECHANGE(rmesa, vte); + rmesa->hw.vte.cmd[1] = vte; + rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; +} + + +/* Flush vertices in the current dma region. + */ +static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s\n", __FUNCTION__); + + rmesa->dma.flush = NULL; + + if (rmesa->dma.current.buf) { + struct r300_dma_region *current = &rmesa->dma.current; + GLuint current_offset = GET_START(current); + + assert (current->start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + current->ptr); + + if (rmesa->dma.current.start != rmesa->dma.current.ptr) { + + r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); + + r300EmitState(rmesa); + + r300EmitVertexAOS( rmesa, + rmesa->swtcl.vertex_size, + current_offset); + + r300EmitVbufPrim( rmesa, + rmesa->swtcl.hw_primitive, + rmesa->swtcl.numverts); + + r300EmitCacheFlush(rmesa); + } + + rmesa->swtcl.numverts = 0; + current->start = current->ptr; + } +} + +/* Alloc space in the current dma region. + */ +static void * +r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + + if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) + r300RefillCurrentDmaRegion( rmesa, bytes); + + if (!rmesa->dma.flush) { + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); + ASSERT( rmesa->dma.current.start + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current.ptr ); + + { + GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); + rmesa->dma.current.ptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; + } +} + +static GLuint reduced_prim[] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, +}; + +static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); +static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); +//static void r300ResetLineStipple( GLcontext *ctx ); + +/*********************************************************************** + * Emit primitives as inline vertices * + ***********************************************************************/ + + +#define HAVE_POINTS 1 +#define HAVE_LINES 1 +#define HAVE_LINE_STRIPS 1 +#define HAVE_TRIANGLES 1 +#define HAVE_TRI_STRIPS 1 +#define HAVE_TRI_STRIP_1 0 +#define HAVE_TRI_FANS 1 +#define HAVE_QUADS 0 +#define HAVE_QUAD_STRIPS 0 +#define HAVE_POLYGONS 1 +#define HAVE_ELTS 1 + +#undef LOCAL_VARS +#undef ALLOC_VERTS +#define CTX_ARG r300ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size +#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 ) +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const char *r300verts = (char *)rmesa->swtcl.verts; +#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) +#define VERTEX r300Vertex +#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) +#define PRINT_VERTEX(x) +#undef TAG +#define TAG(x) r300_##x +#include "tnl_dd/t_dd_triemit.h" + + + +/*********************************************************************** + * Macros for t_dd_tritmp.h to draw basic primitives * + ***********************************************************************/ + +#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d ) +#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c ) +#define LINE( a, b ) r300_line( rmesa, a, b ) +#define POINT( a ) r300_point( rmesa, a ) + +/*********************************************************************** + * Build render functions from dd templates * + ***********************************************************************/ + +#define R300_TWOSIDE_BIT 0x01 +#define R300_UNFILLED_BIT 0x02 +#define R300_MAX_TRIFUNC 0x04 + +static struct { + tnl_points_func points; + tnl_line_func line; + tnl_triangle_func triangle; + tnl_quad_func quad; +} rast_tab[R300_MAX_TRIFUNC]; + +#define DO_FALLBACK 0 +#define DO_UNFILLED (IND & R300_UNFILLED_BIT) +#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_FLAT 0 +#define DO_OFFSET 0 +#define DO_TRI 1 +#define DO_QUAD 1 +#define DO_LINE 1 +#define DO_POINTS 1 +#define DO_FULL_QUAD 1 + +#define HAVE_RGBA 1 +#define HAVE_SPEC 1 +#define HAVE_BACK_COLORS 0 +#define HAVE_HW_FLATSHADE 1 +#define TAB rast_tab + +#define DEPTH_SCALE 1.0 +#define UNFILLED_TRI unfilled_tri +#define UNFILLED_QUAD unfilled_quad +#define VERT_X(_v) _v->v.x +#define VERT_Y(_v) _v->v.y +#define VERT_Z(_v) _v->v.z +#define AREA_IS_CCW( a ) (a < 0) +#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) + +/* Only used to pull back colors into vertices (ie, we know color is + * floating point). + */ +#define R300_COLOR( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +} while (0) + +#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) +#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] +#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] + +#define R300_SPEC( dst, src ) \ +do { \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +} while (0) + +#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) +#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] +#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] + +#undef LOCAL_VARS +#undef TAG +#undef INIT + +#define LOCAL_VARS(n) \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + GLuint color[n], spec[n]; \ + GLuint coloroffset = rmesa->swtcl.coloroffset; \ + GLuint specoffset = rmesa->swtcl.specoffset; \ + (void) color; (void) spec; (void) coloroffset; (void) specoffset; + +/*********************************************************************** + * Helpers for rendering unfilled primitives * + ***********************************************************************/ + +#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) +#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#undef TAG +#define TAG(x) x +#include "tnl_dd/t_dd_unfilled.h" +#undef IND + + +/*********************************************************************** + * Generate GL render functions * + ***********************************************************************/ + + +#define IND (0) +#define TAG(x) x +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT) +#define TAG(x) x##_twoside +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_UNFILLED_BIT) +#define TAG(x) x##_unfilled +#include "tnl_dd/t_dd_tritmp.h" + +#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) +#define TAG(x) x##_twoside_unfilled +#include "tnl_dd/t_dd_tritmp.h" + + + +static void init_rast_tab( void ) +{ + init(); + init_twoside(); + init_unfilled(); + init_twoside_unfilled(); +} + +/**********************************************************************/ +/* Render unclipped begin/end objects */ +/**********************************************************************/ + +#define RENDER_POINTS( start, count ) \ + for ( ; start < count ; start++) \ + r300_point( rmesa, VERT(start) ) +#define RENDER_LINE( v0, v1 ) \ + r300_line( rmesa, VERT(v0), VERT(v1) ) +#define RENDER_TRI( v0, v1, v2 ) \ + r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) ) +#define RENDER_QUAD( v0, v1, v2, v3 ) \ + r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) ) +#define INIT(x) do { \ + r300RenderPrimitive( ctx, x ); \ +} while (0) +#undef LOCAL_VARS +#define LOCAL_VARS \ + r300ContextPtr rmesa = R300_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->swtcl.verts; \ + const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ + const GLboolean stipple = ctx->Line.StippleFlag; \ + (void) elt; (void) stipple; +#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx ); +#define RESET_OCCLUSION +#define PRESERVE_VB_DEFS +#define ELT(x) (x) +#define TAG(x) r300_##x##_verts +#include "tnl/t_vb_rendertmp.h" +#undef ELT +#undef TAG +#define TAG(x) r300_##x##_elts +#define ELT(x) elt[x] +#include "tnl/t_vb_rendertmp.h" + + + + +/**********************************************************************/ +/* Choose render functions */ +/**********************************************************************/ +static void r300ChooseRenderState( GLcontext *ctx ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint index = 0; + GLuint flags = ctx->_TriangleCaps; + + if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; + if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; + + if (index != rmesa->swtcl.RenderIndex) { + tnl->Driver.Render.Points = rast_tab[index].points; + tnl->Driver.Render.Line = rast_tab[index].line; + tnl->Driver.Render.ClippedLine = rast_tab[index].line; + tnl->Driver.Render.Triangle = rast_tab[index].triangle; + tnl->Driver.Render.Quad = rast_tab[index].quad; + + if (index == 0) { + tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts; + tnl->Driver.Render.PrimTabElts = r300_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly; + } else { + tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts; + tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts; + tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; + } + + rmesa->swtcl.RenderIndex = index; + } +} + + +static void r300RenderStart(GLcontext *ctx) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + // fprintf(stderr, "%s\n", __FUNCTION__); + + r300ChooseRenderState(ctx); + r300SetVertexFormat(ctx); + + r300UpdateShaderStates(rmesa); + + r300EmitCacheFlush(rmesa); + + if (rmesa->dma.flush != 0 && + rmesa->dma.flush != flush_last_swtcl_prim) + rmesa->dma.flush( rmesa ); + +} + +static void r300RenderFinish(GLcontext *ctx) +{ +} + +static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + if (rmesa->swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->swtcl.hw_primitive = hwprim; + } +} + +static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +{ + + r300ContextPtr rmesa = R300_CONTEXT(ctx); + rmesa->swtcl.render_primitive = prim; + + if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) + return; + + r300RasterPrimitive( ctx, reduced_prim[prim] ); + // fprintf(stderr, "%s\n", __FUNCTION__); + +} + +static void r300ResetLineStipple(GLcontext *ctx) +{ + + +} + +void r300InitSwtcl(GLcontext *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + static int firsttime = 1; + + if (firsttime) { + init_rast_tab(); + firsttime = 0; + } + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + /* FIXME: what are these numbers? */ + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + 48 * sizeof(GLfloat) ); + + rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->swtcl.RenderIndex = ~0; + rmesa->swtcl.render_primitive = GL_TRIANGLES; + rmesa->swtcl.hw_primitive = 0; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + + _tnl_need_projected_coords( ctx, GL_FALSE ); + r300ChooseRenderState(ctx); + + _mesa_validate_all_lighting_tables( ctx ); + + tnl->Driver.NotifyMaterialChange = + _mesa_validate_all_lighting_tables; +} + +void r300DestroySwtcl(GLcontext *ctx) +{ +} + +void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) +{ + int cmd_reserved = 0; + int cmd_written = 0; + + drm_radeon_cmd_header_t *cmd = NULL; + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); + e32(1); + e32(vertex_size | (vertex_size << 8)); + e32(offset); +} + +void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +{ + + int cmd_reserved = 0; + int cmd_written = 0; + int type, num_verts; + drm_radeon_cmd_header_t *cmd = NULL; + + type = r300PrimitiveType(rmesa, primitive); + num_verts = r300NumVerts(rmesa, vertex_nr, primitive); + + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); +} diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h new file mode 100644 index 0000000000..2ea6ceded7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h @@ -0,0 +1,45 @@ +/* +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> - original r200 code + * Dave Airlie <airlied@linux.ie> + */ + +#ifndef __R300_SWTCL_H__ +#define __R300_SWTCL_H__ + +#include "mtypes.h" +#include "swrast/swrast.h" +#include "r300_context.h" + +extern void r300InitSwtcl( GLcontext *ctx ); +extern void r300DestroySwtcl( GLcontext *ctx ); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 38f0da8b7c..723601ac4a 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -505,7 +505,7 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) t->base.lastLevel); } - if (!t || t->base.totalSize == 0) + if (t->base.totalSize == 0) return 0; if (RADEON_DEBUG & DEBUG_SYNC) { diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 16dddf6557..4dd3fd6a67 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -29,12 +29,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * \file * * \author Aapo Tahkola <aet@rasterburn.org> + * + * \author Oliver McFadden <z3ro.geek@gmail.com> + * + * For a description of the vertex program instruction set see r300_reg.h. */ -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "program.h" +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" @@ -55,54 +59,58 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #error Cannot change these! #endif -#define SCALAR_FLAG (1<<31) -#define FLAG_MASK (1<<31) -#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} - -static struct { - char *name; - int opcode; - unsigned long ip; /* number of input operands and flags */ -} op_names[] = { - /* *INDENT-OFF* */ - OPN(ABS, 1), - OPN(ADD, 2), - OPN(ARL, 1 | SCALAR_FLAG), - OPN(DP3, 2), - OPN(DP4, 2), - OPN(DPH, 2), - OPN(DST, 2), - OPN(EX2, 1 | SCALAR_FLAG), - OPN(EXP, 1 | SCALAR_FLAG), - OPN(FLR, 1), - OPN(FRC, 1), - OPN(LG2, 1 | SCALAR_FLAG), - OPN(LIT, 1), - OPN(LOG, 1 | SCALAR_FLAG), - OPN(MAD, 3), - OPN(MAX, 2), - OPN(MIN, 2), - OPN(MOV, 1), - OPN(MUL, 2), - OPN(POW, 2 | SCALAR_FLAG), - OPN(RCP, 1 | SCALAR_FLAG), - OPN(RSQ, 1 | SCALAR_FLAG), - OPN(SGE, 2), - OPN(SLT, 2), - OPN(SUB, 2), - OPN(SWZ, 1), - OPN(XPD, 2), - OPN(RCC, 0), //extra - OPN(PRINT, 0), - OPN(END, 0) - /* *INDENT-ON* */ -}; - -#undef OPN +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ + +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +#define FREE_TEMPS() \ + do { \ + if(u_temp_i < vp->num_temporaries) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) + struct r300_vertex_program_cont *vp, + float *dst) { int pi; struct gl_vertex_program *mesa_vp = &vp->mesa_program; @@ -234,8 +242,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) int i; if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); + fprintf(stderr, "vp null in call to %s from %s\n", + __FUNCTION__, caller); return; } @@ -276,6 +284,8 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, } } +/* these two functions should probably be merged... */ + static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { @@ -294,7 +304,9 @@ static unsigned long t_src(struct r300_vertex_program *vp, static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ return MAKE_VSF_SOURCE(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), @@ -306,128 +318,741 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, (src->RelAddr << 4); } -static unsigned long t_opcode(enum prog_opcode opcode) +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) { + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } - switch (opcode) { - /* *INDENT-OFF* */ - case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; - case OPCODE_DST: return R300_VPI_OUT_OP_DST; - case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; - case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; - case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; - case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; - case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; - case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; - case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; - case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; - case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; - case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; - case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; - case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; - case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; - /* *INDENT-ON* */ + return GL_TRUE; +} - default: - fprintf(stderr, "%s: Should not be called with opcode %d!", - __FUNCTION__, opcode); - } - _mesa_exit(-1); - return 0; +/* + * Instruction Inputs Output Description + * ----------- ------ ------ -------------------------------- + * ABS v v absolute value + * ADD v,v v add + * ARL s a address register load + * DP3 v,v ssss 3-component dot product + * DP4 v,v ssss 4-component dot product + * DPH v,v ssss homogeneous dot product + * DST v,v v distance vector + * EX2 s ssss exponential base 2 + * EXP s v exponential base 2 (approximate) + * FLR v v floor + * FRC v v fraction + * LG2 s ssss logarithm base 2 + * LIT v v compute light coefficients + * LOG s v logarithm base 2 (approximate) + * MAD v,v,v v multiply and add + * MAX v,v v maximum + * MIN v,v v minimum + * MOV v v move + * MUL v,v v multiply + * POW s,s ssss exponentiate + * RCP s ssss reciprocal + * RSQ s ssss reciprocal square root + * SGE v,v v set on greater than or equal + * SLT v,v v set on less than + * SUB v,v v subtract + * SWZ v v extended swizzle + * XPD v,v v cross product + * + * Table X.5: Summary of vertex program instructions. "v" indicates a + * floating-point vector input or output, "s" indicates a floating-point + * scalar input, "ssss" indicates a scalar output replicated across a + * 4-component result vector, and "a" indicates a single address register + * component. + */ + +static GLuint *t_opcode_abs(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; } -static unsigned long op_operands(enum prog_opcode opcode) +static GLuint *t_opcode_add(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - int i; + unsigned long hw_op; - /* Can we trust mesas opcodes to be in order ? */ - for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++) - if (op_names[i].opcode == opcode) - return op_names[i].ip; +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = ONE_SRC_0; + inst[2] = t_src(vp, &src[0]); + inst[3] = t_src(vp, &src[1]); +#else + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; - fprintf(stderr, "op %d not found in op_names\n", opcode); - _mesa_exit(-1); - return 0; +#endif + + return inst; } -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) +static GLuint *t_opcode_arl(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ARL, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); - return GL_TRUE; + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; } -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ - t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ - (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ +static GLuint *t_opcode_dp3(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + + inst[3] = ZERO_SRC_1; + + return inst; +} -#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static GLuint *t_opcode_dp4(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; -#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + return inst; +} -#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static GLuint *t_opcode_dph(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} -#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) +static GLuint *t_opcode_dst(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_DST, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 + return inst; +} -#define FREE_TEMPS() \ - do { \ - if(u_temp_i < vp->num_temporaries) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) +static GLuint *t_opcode_ex2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_EX2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_exp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_EXP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_flr(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3], int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + inst += 4; + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(*u_temp_i, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + + inst[3] = ZERO_SRC_0; + (*u_temp_i)--; + + return inst; +} + +static GLuint *t_opcode_frc(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_lg2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_lit(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + inst[3] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *t_opcode_log(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_LOG, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_mad(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) { - int i, cur_reg = 0; - VERTEX_SHADER_INSTRUCTION *o_inst; - unsigned long operands; - int are_srcs_scalar; unsigned long hw_op; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == PROGRAM_TEMPORARY + && src[2].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *t_opcode_max(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_min(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MIN, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_mov(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = ZERO_SRC_0; +#endif + + return inst; +} + +static GLuint *t_opcode_mul(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + // HW mul can take third arg but appears to have some other limitations. + + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_pow(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *t_opcode_rcp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_RCP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_rsq(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_RSQ, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + + return inst; +} + +static GLuint *t_opcode_sge(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_SGE, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_slt(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_SLT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = ZERO_SRC_1; + + return inst; +} + +static GLuint *t_opcode_sub(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); +#else + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = t_src(vp, &src[0]); + inst[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#endif + + return inst; +} + +static GLuint *t_opcode_swz(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + inst[0] = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = ONE_SRC_0; + inst[3] = ZERO_SRC_0; +#endif + + return inst; +} + +static GLuint *t_opcode_xpd(struct r300_vertex_program *vp, + struct prog_instruction *vpi, GLuint * inst, + struct prog_src_register src[3], int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + inst[3] = ZERO_SRC_1; + inst += 4; + (*u_temp_i)--; + + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + inst[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1].RelAddr << 4); + + inst[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0].RelAddr << 4); + + inst[3] = + MAKE_VSF_SOURCE(*u_temp_i + 1, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program *vp) +{ + int i; + int cur_reg = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; @@ -437,39 +1062,71 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); - /* Assign outputs */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - -#if 0 /* Not supported yet */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = + vp->outputs[VERT_RESULT_COL0] + 1; + cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = + vp->outputs[VERT_RESULT_COL0] + 2; + cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = + vp->outputs[VERT_RESULT_COL0] + 3; + cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + } +#if 0 + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } #endif - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) - if (vp->key.OutputsWritten & (1 << i)) + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; + } + } +} + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ vp->translated = GL_TRUE; vp->native = GL_TRUE; - o_inst = vp->program.body.i; - for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) { + t_inputs_outputs(vp); + + for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + FREE_TEMPS(); if (!valid_dst(vp, &vpi->DstReg)) { @@ -478,61 +1135,62 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, vpi->DstReg.Index = u_temp_i; } - operands = op_operands(vpi->Opcode); - are_srcs_scalar = operands & SCALAR_FLAG; - operands &= OP_MASK; + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - for (i = 0; i < operands; i++) + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { src[i] = vpi->SrcReg[i]; + } - if (operands == 3) { /* TODO: scalars */ + if (num_operands == 3) { /* TODO: scalars */ if (CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + inst[1] = + MAKE_VSF_SOURCE(t_src_index + (vp, &src[2]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); + t_src_class(src[2]. + File), + VSF_FLAG_NONE) | + (src[2].RelAddr << 4); - o_inst->src[1] = ZERO_SRC_2; - o_inst->src[2] = ZERO_SRC_2; - o_inst++; + inst[2] = ZERO_SRC_2; + inst[3] = ZERO_SRC_2; + inst += 4; src[2].File = PROGRAM_TEMPORARY; src[2].Index = u_temp_i; src[2].RelAddr = 0; u_temp_i--; } - } - if (operands >= 2) { + if (num_operands >= 2) { if (CMP_SRCS(src[1], src[0])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + inst[0] = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + inst[1] = + MAKE_VSF_SOURCE(t_src_index + (vp, &src[0]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); + t_src_class(src[0]. + File), + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; + inst[2] = ZERO_SRC_0; + inst[3] = ZERO_SRC_0; + inst += 4; src[0].File = PROGRAM_TEMPORARY; src[0].Index = u_temp_i; @@ -541,517 +1199,103 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } - /* These ops need special handling. */ switch (vpi->Opcode) { - case OPCODE_POW: - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_POW, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = t_src_scalar(vp, &src[1]); - goto next; - - case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - case OPCODE_SWZ: -#if 1 - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#else - hw_op = - (src[0].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#endif - - goto next; - + case OPCODE_ABS: + inst = t_opcode_abs(vp, vpi, inst, src); + break; case OPCODE_ADD: -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = ONE_SRC_0; - o_inst->src[1] = t_src(vp, &src[0]); - o_inst->src[2] = t_src(vp, &src[1]); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - -#endif - goto next; - - case OPCODE_MAD: - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY && - src[2].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - goto next; - - case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - o_inst->src[2] = 0; -#endif - goto next; - - case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[2] = 0; - goto next; - + inst = t_opcode_add(vp, vpi, inst, src); + break; + case OPCODE_ARL: + inst = t_opcode_arl(vp, vpi, inst, src); + break; + case OPCODE_DP3: + inst = t_opcode_dp3(vp, vpi, inst, src); + break; + case OPCODE_DP4: + inst = t_opcode_dp4(vp, vpi, inst, src); + break; + case OPCODE_DPH: + inst = t_opcode_dph(vp, vpi, inst, src); + break; + case OPCODE_DST: + inst = t_opcode_dst(vp, vpi, inst, src); + break; + case OPCODE_EX2: + inst = t_opcode_ex2(vp, vpi, inst, src); + break; + case OPCODE_EXP: + inst = t_opcode_exp(vp, vpi, inst, src); + break; case OPCODE_FLR: - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - /* Not 100% sure about this */ - (!src[0]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - - o_inst->src[2] = ZERO_SRC_0; - u_temp_i--; - goto next; - - case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - goto next; - - case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - goto next; - - case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - VSF_IN_COMPONENT_ONE, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_XPD: - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 - */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - src[1]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - o_inst++; - u_temp_i--; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - (!src[1]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - VSF_FLAG_NONE); - - goto next; - - case OPCODE_RCC: - fprintf(stderr, "Dont know how to handle op %d yet\n", - vpi->Opcode); - _mesa_exit(-1); + inst = + t_opcode_flr(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = t_opcode_frc(vp, vpi, inst, src); + break; + case OPCODE_LG2: + inst = t_opcode_lg2(vp, vpi, inst, src); + break; + case OPCODE_LIT: + inst = t_opcode_lit(vp, vpi, inst, src); + break; + case OPCODE_LOG: + inst = t_opcode_log(vp, vpi, inst, src); + break; + case OPCODE_MAD: + inst = t_opcode_mad(vp, vpi, inst, src); + break; + case OPCODE_MAX: + inst = t_opcode_max(vp, vpi, inst, src); + break; + case OPCODE_MIN: + inst = t_opcode_min(vp, vpi, inst, src); + break; + case OPCODE_MOV: + inst = t_opcode_mov(vp, vpi, inst, src); break; - case OPCODE_END: + case OPCODE_MUL: + inst = t_opcode_mul(vp, vpi, inst, src); + break; + case OPCODE_POW: + inst = t_opcode_pow(vp, vpi, inst, src); + break; + case OPCODE_RCP: + inst = t_opcode_rcp(vp, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = t_opcode_rsq(vp, vpi, inst, src); + break; + case OPCODE_SGE: + inst = t_opcode_sge(vp, vpi, inst, src); + break; + case OPCODE_SLT: + inst = t_opcode_slt(vp, vpi, inst, src); + break; + case OPCODE_SUB: + inst = t_opcode_sub(vp, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = t_opcode_swz(vp, vpi, inst, src); + break; + case OPCODE_XPD: + inst = + t_opcode_xpd(vp, vpi, inst, src, /* FIXME */ + &u_temp_i); break; default: + assert(0); break; } - - o_inst->op = - MAKE_VSF_OP(t_opcode(vpi->Opcode), - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - if (are_srcs_scalar) { - switch (operands) { - case 1: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = t_src_scalar(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } else { - switch (operands) { - case 1: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } - next:; } - /* Will most likely segfault before we get here... fix later. */ - if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) { + vp->program.length = (inst - vp->program.body.i); + if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { vp->program.length = 0; vp->native = GL_FALSE; - return; } - vp->program.length = (o_inst - vp->program.body.i) * 4; #if 0 fprintf(stderr, "hw program:\n"); for (i = 0; i < vp->program.length; i++) @@ -1065,7 +1309,8 @@ static void position_invariant(struct gl_program *prog) struct gl_program_parameter_list *paramList; int i; - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + gl_state_index tokens[STATE_LENGTH] = + { STATE_MVP_MATRIX, 0, 0, 0, 0 }; /* tokens[4] = matrix modifier */ #ifdef PREFER_DP4 @@ -1159,8 +1404,8 @@ static void insert_wpos(struct r300_vertex_program *vp, prog->NumInstructions - 1); /* END */ _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); + &prog->Instructions[prog->NumInstructions - + 1], 1); vpi_insert = &vpi[prog->NumInstructions - 1]; vpi_insert[i].Opcode = OPCODE_MOV; @@ -1206,8 +1451,8 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, prog->NumTemporaries++; for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && - vpi->DstReg.Index == VERT_RESULT_HPOS) { + if (vpi->DstReg.File == PROGRAM_OUTPUT + && vpi->DstReg.Index == VERT_RESULT_HPOS) { vpi->DstReg.File = PROGRAM_TEMPORARY; vpi->DstReg.Index = tempregi; } @@ -1223,20 +1468,18 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key vp = _mesa_calloc(sizeof(*vp)); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; if (mesa_vp->IsPositionInvariant) { position_invariant(&mesa_vp->Base); } - if (wpos_idx > -1) + if (wpos_idx > -1) { pos_as_texcoord(vp, &mesa_vp->Base); + } assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); return vp; @@ -1252,11 +1495,10 @@ void r300SelectVertexShader(r300ContextPtr r300) struct r300_vertex_program *vp; GLint wpos_idx; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + vpc = + (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - wpos_idx = -1; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1271,28 +1513,35 @@ void r300SelectVertexShader(r300ContextPtr r300) InputsRead |= (FRAG_BIT_TEX0 << i); wpos_idx = i; } + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - if (InputsRead & FRAG_BIT_COL0) + if (InputsRead & FRAG_BIT_COL0) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + } - if ((InputsRead & FRAG_BIT_COL1) /*|| - (InputsRead & FRAG_BIT_FOGC) */ ) + if ((InputsRead & FRAG_BIT_COL1)) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (InputsRead & (FRAG_BIT_TEX0 << i)) + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; if (vpc->mesa_program.IsPositionInvariant) { /* we wan't position don't we ? */ wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); } for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == - 0) { + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { r300->selected_vp = vp; return; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 252d5a901f..3df0eee799 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,11 +3,6 @@ #include "r300_reg.h" -typedef struct { - GLuint op; - GLuint src[3]; -} VERTEX_SHADER_INSTRUCTION; - #define VSF_FLAG_X 1 #define VSF_FLAG_Y 2 #define VSF_FLAG_Z 4 diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h index c47adc9575..a344837f47 100644 --- a/src/mesa/drivers/dri/r300/radeon_lock.h +++ b/src/mesa/drivers/dri/r300/radeon_lock.h @@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_LOCK_H__ #define __RADEON_LOCK_H__ -#if 0 -#include "r200_ioctl.h" -#endif #include "radeon_context.h" extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); |