From 11cd795940723e79f99e7887a2e2dd8410352572 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Fri, 24 Jul 2009 00:32:41 +0200 Subject: r300: Cleanup vertex_program structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/r300_draw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 9769ff5399..e2e92fde48 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -341,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { int i, tmp; - tmp = r300->selected_vp->Base->Base.InputsRead; + tmp = r300->selected_vp->InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -437,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->Base->Base.InputsRead, r300->selected_vp->Base->Base.OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->InputsRead, r300->selected_vp->OutputsWritten); r300UpdateShaderStates(r300); -- cgit v1.2.3 From 84445273ed554ea6fa65c894bbe098eb3f3d1230 Mon Sep 17 00:00:00 2001 From: Nicolai Hähnle Date: Thu, 23 Jul 2009 18:40:41 +0200 Subject: r300: Move vertex program compilation to compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just the first step of refactoring. The separation is not yet clean enough with this commit. Signed-off-by: Nicolai Hähnle --- src/mesa/drivers/dri/r300/compiler/Makefile | 1 + src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 1533 +++++++++++++++++++ src/mesa/drivers/dri/r300/compiler/radeon_code.h | 25 + .../drivers/dri/r300/compiler/radeon_compiler.h | 10 + src/mesa/drivers/dri/r300/r300_context.h | 31 +- src/mesa/drivers/dri/r300/r300_draw.c | 4 +- src/mesa/drivers/dri/r300/r300_ioctl.c | 9 +- src/mesa/drivers/dri/r300/r300_reg.h | 18 + src/mesa/drivers/dri/r300/r300_state.c | 4 +- src/mesa/drivers/dri/r300/r300_vertprog.c | 1549 +------------------- src/mesa/drivers/dri/r300/r300_vertprog.h | 28 - src/mesa/shader/prog_instruction.h | 9 +- 12 files changed, 1625 insertions(+), 1596 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index c0fd85c181..4e2ff50c69 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -17,6 +17,7 @@ C_SOURCES = \ r300_fragprog_emit.c \ r500_fragprog.c \ r500_fragprog_emit.c \ + r3xx_vertprog.c \ \ memory_pool.c diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c new file mode 100644 index 0000000000..b074c98ee9 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -0,0 +1,1533 @@ +/* + * Copyright 2009 Nicolai Hähnle + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_compiler.h" + +#include "../r300_reg.h" + +#include "radeon_nqssadce.h" + +#include "shader/prog_optimize.h" +#include "shader/prog_print.h" + + +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ + t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ + (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ + t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ + +/* + * Take an already-setup and valid source then swizzle it appropriately to + * obtain a constant ZERO or ONE source. + */ +#define __CONST(x, y) \ + (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_swizzle(y), \ + t_src_class(src[x].File), \ + NEGATE_NONE) | (src[x].RelAddr << 4)) + + + + +static unsigned long t_dst_mask(GLuint mask) +{ + /* WRITEMASK_* is equivalent to VSF_FLAG_* */ + return mask & WRITEMASK_XYZW; +} + +static unsigned long t_dst_class(gl_register_file file) +{ + + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_DST_REG_TEMPORARY; + case PROGRAM_OUTPUT: + return PVS_DST_REG_OUT; + case PROGRAM_ADDRESS: + return PVS_DST_REG_A0; + /* + case PROGRAM_INPUT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static unsigned long t_dst_index(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT) + return vp->outputs[dst->Index]; + + return dst->Index; +} + +static unsigned long t_src_class(gl_register_file file) +{ + switch (file) { + case PROGRAM_TEMPORARY: + return PVS_SRC_REG_TEMPORARY; + case PROGRAM_INPUT: + return PVS_SRC_REG_INPUT; + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + case PROGRAM_STATE_VAR: + return PVS_SRC_REG_CONSTANT; + /* + case PROGRAM_OUTPUT: + case PROGRAM_WRITE_ONLY: + case PROGRAM_ADDRESS: + */ + default: + fprintf(stderr, "problem in %s", __FUNCTION__); + _mesa_exit(-1); + return -1; + } +} + +static INLINE unsigned long t_swizzle(GLubyte swizzle) +{ + /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + return swizzle; +} + +static unsigned long t_src_index(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + if (src->File == PROGRAM_INPUT) { + assert(vp->inputs[src->Index] != -1); + return vp->inputs[src->Index]; + } else { + if (src->Index < 0) { + fprintf(stderr, + "negative offsets for indirect addressing do not work.\n"); + return 0; + } + return src->Index; + } +} + +/* these two functions should probably be merged... */ + +static unsigned long t_src(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 1)), + t_swizzle(GET_SWZ(src->Swizzle, 2)), + t_swizzle(GET_SWZ(src->Swizzle, 3)), + t_src_class(src->File), + src->Negate) | (src->RelAddr << 4); +} + +static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, + struct prog_src_register *src) +{ + /* src->Negate uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ + return PVS_SRC_OPERAND(t_src_index(vp, src), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_swizzle(GET_SWZ(src->Swizzle, 0)), + t_src_class(src->File), + src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src->RelAddr << 4); +} + +static GLboolean valid_dst(struct r300_vertex_program_code *vp, + struct prog_dst_register *dst) +{ + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} + +static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = 0; + + return inst; +} + +static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = + PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + PVS_SRC_SELECT_FORCE_1, + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZ : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(*u_temp_i, + PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, + /* Not 100% sure about this */ + (!src[0]. + Negate) ? NEGATE_XYZW : NEGATE_NONE); + inst[3] = __CONST(0, SWIZZLE_ZERO); + (*u_temp_i)--; + + return inst; +} + +static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + PVS_SRC_SELECT_FORCE_0, // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + + return inst; +} + +static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, + GL_FALSE, + GL_TRUE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = t_src(vp, &src[2]); + + return inst; +} + +static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = t_src_scalar(vp, &src[1]); + + return inst; +} + +static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, + GL_TRUE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src_scalar(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = t_src(vp, &src[1]); + inst[3] = __CONST(1, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 0 + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = 0; +#else + inst[0] = + PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ONE); + inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); +#endif + + return inst; +} + +static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = t_src(vp, &src[0]); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + + return inst; +} + +static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program_code *vp, + struct prog_instruction *vpi, + GLuint * inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + */ + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + PVS_DST_REG_TEMPORARY); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + src[1].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[3] = __CONST(1, SWIZZLE_ZERO); + inst += 4; + + inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, + GL_FALSE, + GL_FALSE, + t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W + t_src_class(src[1].File), + (!src[1]. + Negate) ? NEGATE_XYZW : NEGATE_NONE) | + (src[1].RelAddr << 4); + inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W + t_src_class(src[0].File), + src[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + (src[0].RelAddr << 4); + inst[3] = + PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_TEMPORARY, NEGATE_NONE); + + (*u_temp_i)--; + + return inst; +} + +static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp) +{ + int i; + int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = glvp->OutputsWritten; + InputsRead = glvp->InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + + cur_reg = 0; + for (i = 0; i < VERT_RESULT_MAX; i++) + vp->outputs[i] = -1; + + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (OutputsWritten & (1 << i)) { + vp->outputs[i] = cur_reg++; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler) +{ + struct prog_instruction *vpi = compiler->program->Instructions; + int i; + GLuint *inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + struct r300_vertex_program_code * vp = compiler->code; + + compiler->code->pos_end = 0; /* Not supported yet */ + compiler->code->length = 0; + + t_inputs_outputs(compiler->code, compiler->program); + + for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END; + vpi++, inst += 4) { + + { + int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; + if((compiler->code->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { + fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", compiler->code->num_temporaries, u_temp_used); + return GL_FALSE; + } + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; + } + + if (!valid_dst(compiler->code, &vpi->DstReg)) { + /* redirect result to unused temp */ + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = u_temp_i; + } + + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); + + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { + src[i] = vpi->SrcReg[i]; + } + + if (num_operands == 3) { /* TODO: scalars */ + if (CMP_SRCS(src[1], src[2]) + || CMP_SRCS(src[0], src[2])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[2]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[2].File), + NEGATE_NONE) | (src[2]. + RelAddr << + 4); + inst[2] = __CONST(2, SWIZZLE_ZERO); + inst[3] = __CONST(2, SWIZZLE_ZERO); + inst += 4; + + src[2].File = PROGRAM_TEMPORARY; + src[2].Index = u_temp_i; + src[2].RelAddr = 0; + u_temp_i--; + } + } + + if (num_operands >= 2) { + if (CMP_SRCS(src[1], src[0])) { + inst[0] = PVS_OP_DST_OPERAND(VE_ADD, + GL_FALSE, + GL_FALSE, + u_temp_i, + WRITEMASK_XYZW, + PVS_DST_REG_TEMPORARY); + inst[1] = + PVS_SRC_OPERAND(t_src_index(compiler->code, &src[0]), + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_W, + t_src_class(src[0].File), + NEGATE_NONE) | (src[0]. + RelAddr << + 4); + inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[3] = __CONST(0, SWIZZLE_ZERO); + inst += 4; + + src[0].File = PROGRAM_TEMPORARY; + src[0].Index = u_temp_i; + src[0].RelAddr = 0; + u_temp_i--; + } + } + + switch (vpi->Opcode) { + case OPCODE_ABS: + inst = r300TranslateOpcodeABS(compiler->code, vpi, inst, src); + break; + case OPCODE_ADD: + inst = r300TranslateOpcodeADD(compiler->code, vpi, inst, src); + break; + case OPCODE_ARL: + inst = r300TranslateOpcodeARL(compiler->code, vpi, inst, src); + break; + case OPCODE_DP3: + inst = r300TranslateOpcodeDP3(compiler->code, vpi, inst, src); + break; + case OPCODE_DP4: + inst = r300TranslateOpcodeDP4(compiler->code, vpi, inst, src); + break; + case OPCODE_DPH: + inst = r300TranslateOpcodeDPH(compiler->code, vpi, inst, src); + break; + case OPCODE_DST: + inst = r300TranslateOpcodeDST(compiler->code, vpi, inst, src); + break; + case OPCODE_EX2: + inst = r300TranslateOpcodeEX2(compiler->code, vpi, inst, src); + break; + case OPCODE_EXP: + inst = r300TranslateOpcodeEXP(compiler->code, vpi, inst, src); + break; + case OPCODE_FLR: + inst = r300TranslateOpcodeFLR(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + case OPCODE_FRC: + inst = r300TranslateOpcodeFRC(compiler->code, vpi, inst, src); + break; + case OPCODE_LG2: + inst = r300TranslateOpcodeLG2(compiler->code, vpi, inst, src); + break; + case OPCODE_LIT: + inst = r300TranslateOpcodeLIT(compiler->code, vpi, inst, src); + break; + case OPCODE_LOG: + inst = r300TranslateOpcodeLOG(compiler->code, vpi, inst, src); + break; + case OPCODE_MAD: + inst = r300TranslateOpcodeMAD(compiler->code, vpi, inst, src); + break; + case OPCODE_MAX: + inst = r300TranslateOpcodeMAX(compiler->code, vpi, inst, src); + break; + case OPCODE_MIN: + inst = r300TranslateOpcodeMIN(compiler->code, vpi, inst, src); + break; + case OPCODE_MOV: + inst = r300TranslateOpcodeMOV(compiler->code, vpi, inst, src); + break; + case OPCODE_MUL: + inst = r300TranslateOpcodeMUL(compiler->code, vpi, inst, src); + break; + case OPCODE_POW: + inst = r300TranslateOpcodePOW(compiler->code, vpi, inst, src); + break; + case OPCODE_RCP: + inst = r300TranslateOpcodeRCP(compiler->code, vpi, inst, src); + break; + case OPCODE_RSQ: + inst = r300TranslateOpcodeRSQ(compiler->code, vpi, inst, src); + break; + case OPCODE_SGE: + inst = r300TranslateOpcodeSGE(compiler->code, vpi, inst, src); + break; + case OPCODE_SLT: + inst = r300TranslateOpcodeSLT(compiler->code, vpi, inst, src); + break; + case OPCODE_SUB: + inst = r300TranslateOpcodeSUB(compiler->code, vpi, inst, src); + break; + case OPCODE_SWZ: + inst = r300TranslateOpcodeSWZ(compiler->code, vpi, inst, src); + break; + case OPCODE_XPD: + inst = r300TranslateOpcodeXPD(compiler->code, vpi, inst, src, /* FIXME */ + &u_temp_i); + break; + default: + return GL_FALSE; + } + } + + compiler->code->length = (inst - compiler->code->body.d); + if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) { + return GL_FALSE; + } + + return GL_TRUE; +} + +static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) +{ + struct prog_instruction *vpi; + + _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); + + vpi = &prog->Instructions[prog->NumInstructions - 3]; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_HPOS; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_MOV; + + vpi->DstReg.File = PROGRAM_OUTPUT; + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_XYZW; + vpi->DstReg.CondMask = COND_TR; + + vpi->SrcReg[0].File = PROGRAM_TEMPORARY; + vpi->SrcReg[0].Index = temp_index; + vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++vpi; + + vpi->Opcode = OPCODE_END; +} + +static void pos_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + GLuint tempregi = prog->NumTemporaries; + + prog->NumTemporaries++; + + for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { + vpi->DstReg.File = PROGRAM_TEMPORARY; + vpi->DstReg.Index = tempregi; + } + } + + insert_wpos(prog, tempregi, tex_id); + + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +/** + * The fogcoord attribute is special in that only the first component + * is relevant, and the remaining components are always fixed (when read + * from by the fragment program) to yield an X001 pattern. + * + * We need to enforce this either in the vertex program or in the fragment + * program, and this code chooses not to enforce it in the vertex program. + * This is slightly cheaper, as long as the fragment program does not use + * weird swizzles. + * + * And it seems that usually, weird swizzles are not used, so... + * + * See also the counterpart rewriting for fragment programs. + */ +static void fog_as_texcoord(struct gl_program *prog, int tex_id) +{ + struct prog_instruction *vpi; + + vpi = prog->Instructions; + while (vpi->Opcode != OPCODE_END) { + if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { + vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; + vpi->DstReg.WriteMask = WRITEMASK_X; + } + + ++vpi; + } + + prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); + prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); +} + +static int translateABS(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_MAX; + inst->SrcReg[1] = inst->SrcReg[0]; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateDP3(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + return 0; +} + +static int translateDPH(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_DP4; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + + return 0; +} + +static int translateFLR(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + struct prog_dst_register dst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + dst = inst->DstReg; + + inst->Opcode = OPCODE_FRC; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + ++inst; + + inst->Opcode = OPCODE_ADD; + inst->DstReg = dst; + inst->SrcReg[0] = (inst-1)->SrcReg[0]; + inst->SrcReg[1].File = PROGRAM_TEMPORARY; + inst->SrcReg[1].Index = tmp_idx; + inst->SrcReg[1].Negate = NEGATE_XYZW; + + return 1; +} + +static int translateSUB(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + + inst = &prog->Instructions[pos]; + + inst->Opcode = OPCODE_ADD; + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + + return 0; +} + +static int translateSWZ(struct gl_program *prog, int pos) +{ + prog->Instructions[pos].Opcode = OPCODE_MOV; + + return 0; +} + +static int translateXPD(struct gl_program *prog, int pos) +{ + struct prog_instruction *inst; + int tmp_idx; + + tmp_idx = prog->NumTemporaries++; + + _mesa_insert_instructions(prog, pos + 1, 1); + + inst = &prog->Instructions[pos]; + + *(inst+1) = *inst; + + inst->Opcode = OPCODE_MUL; + inst->DstReg.File = PROGRAM_TEMPORARY; + inst->DstReg.Index = tmp_idx; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + ++inst; + + inst->Opcode = OPCODE_MAD; + inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); + inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); + inst->SrcReg[1].Negate ^= NEGATE_XYZW; + inst->SrcReg[2].File = PROGRAM_TEMPORARY; + inst->SrcReg[2].Index = tmp_idx; + + return 1; +} + +static void translateInsts(struct gl_program *prog) +{ + struct prog_instruction *inst; + int i; + + for (i = 0; i < prog->NumInstructions; ++i) { + inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ABS: + i += translateABS(prog, i); + break; + case OPCODE_DP3: + i += translateDP3(prog, i); + break; + case OPCODE_DPH: + i += translateDPH(prog, i); + break; + case OPCODE_FLR: + i += translateFLR(prog, i); + break; + case OPCODE_SUB: + i += translateSUB(prog, i); + break; + case OPCODE_SWZ: + i += translateSWZ(prog, i); + break; + case OPCODE_XPD: + i += translateXPD(prog, i); + break; + default: + break; + } + } +} + +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \ + OutputsAdded |= 1 << (vp_result); \ + count++; \ + } \ + } while (0) + +static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) +{ + GLuint OutputsAdded, FpReads; + int i, count; + + OutputsAdded = 0; + count = 0; + FpReads = compiler->state.FpReads; + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i < 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count); + inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + + compiler->program->OutputsWritten |= OutputsAdded; + } +} + +#undef ADD_OUTPUT + +static void nqssadceInit(struct nqssadce_state* s) +{ + struct r300_vertex_program_compiler * compiler = s->UserData; + GLuint fp_reads; + + fp_reads = compiler->state.FpReads; + { + if (fp_reads & FRAG_BIT_COL0) { + s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; + } + + if (fp_reads & FRAG_BIT_COL1) { + s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; + s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; + } + } + + { + int i; + for (i = 0; i < 8; ++i) { + if (fp_reads & FRAG_BIT_TEX(i)) { + s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; + } + } + } + + s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; + if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) + s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; +} + +static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +{ + (void) opcode; + (void) reg; + + return GL_TRUE; +} + + + +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx) +{ + GLboolean success; + + if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) { + pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0); + } + + if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) { + fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0); + } + + addArtificialOutputs(compiler); + + translateInsts(compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + + { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadceInit, + .IsNativeSwizzle = &swizzleIsNative, + .BuildSwizzle = NULL + }; + radeonNqssaDce(compiler->program, &nqssadce, compiler); + + /* We need this step for reusing temporary registers */ + _mesa_optimize_program(ctx, compiler->program); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after NQSSADCE:\n"); + _mesa_print_program(compiler->program); + fflush(stdout); + } + } + + assert(compiler->program->NumInstructions); + { + struct prog_instruction *inst; + int max, i, tmp; + + inst = compiler->program->Instructions; + max = -1; + while (inst->Opcode != OPCODE_END) { + tmp = _mesa_num_inst_src_regs(inst->Opcode); + for (i = 0; i < tmp; ++i) { + if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { + if ((int) inst->SrcReg[i].Index > max) { + max = inst->SrcReg[i].Index; + } + } + } + + if (_mesa_num_inst_dst_regs(inst->Opcode)) { + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + if ((int) inst->DstReg.Index > max) { + max = inst->DstReg.Index; + } + } + } + ++inst; + } + + /* We actually want highest index of used temporary register, + * not the number of temporaries used. + * These values aren't always the same. + */ + compiler->code->num_temporaries = max + 1; + } + + success = translate_vertex_program(compiler); + + compiler->code->InputsRead = compiler->program->InputsRead; + compiler->code->OutputsWritten = compiler->program->OutputsWritten; + + return success; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 7d8bf483e7..e89e7bc17b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -147,4 +147,29 @@ struct rX00_fragment_program_code { }; +#define VSF_MAX_FRAGMENT_LENGTH (255*4) +#define VSF_MAX_FRAGMENT_TEMPS (14) + +struct r300_vertex_program_external_state { + GLuint FpReads; + GLuint FogAttr; + GLuint WPosAttr; +}; + +struct r300_vertex_program_code { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + + int pos_end; + int num_temporaries; /* Number of temp vars used by program */ + int inputs[VERT_ATTRIB_MAX]; + int outputs[VERT_RESULT_MAX]; + + GLbitfield InputsRead; + GLbitfield OutputsWritten; +}; + #endif /* RADEON_CODE_H */ \ No newline at end of file diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index a5f70173b7..f8e4b3c681 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -66,4 +66,14 @@ struct r300_fragment_program_compiler { GLboolean r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); + +struct r300_vertex_program_compiler { + struct radeon_compiler Base; + struct r300_vertex_program_code *code; + struct r300_vertex_program_external_state state; + struct gl_program *program; +}; + +GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c, GLcontext * ctx); + #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index d14d992366..5c575441d7 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -390,46 +390,19 @@ struct r300_hw_state { /* Vertex shader state */ -/* Perhaps more if we store programs in vmem? */ -/* drm_r300_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG -struct r300_vertex_program_key { - GLuint FpReads; - GLuint FogAttr; - GLuint WPosAttr; -}; - struct r300_vertex_program { struct gl_vertex_program *Base; struct r300_vertex_program *next; - struct r300_vertex_program_key key; - GLbitfield InputsRead; - GLbitfield OutputsWritten; - - struct r300_vertex_shader_hw_code { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - } body; - } hw_code; + struct r300_vertex_program_external_state key; + struct r300_vertex_program_code code; GLboolean error; - - int pos_end; - int num_temporaries; /* Number of temp vars used by program */ - int inputs[VERT_ATTRIB_MAX]; - int outputs[VERT_RESULT_MAX]; }; struct r300_vertex_program_cont { diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index e2e92fde48..fcfd309933 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -341,7 +341,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { int i, tmp; - tmp = r300->selected_vp->InputsRead; + tmp = r300->selected_vp->code.InputsRead; i = 0; vbuf->num_attribs = 0; while (tmp) { @@ -437,7 +437,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, if (r300->fallback) return GL_FALSE; - r300SetupVAP(ctx, r300->selected_vp->InputsRead, r300->selected_vp->OutputsWritten); + r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten); r300UpdateShaderStates(r300); diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 2fa626bab2..5bded642ef 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -567,12 +567,12 @@ static void r300EmitClearState(GLcontext * ctx) 0, 0xf, PVS_DST_REG_OUT); vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[4] = 0x0; vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, @@ -580,13 +580,12 @@ static void r300EmitClearState(GLcontext * ctx) vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, - - VSF_FLAG_NONE); + NEGATE_NONE); vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, - PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + PVS_SRC_REG_INPUT, NEGATE_NONE); vpu.cmd[8] = 0x0; r300->vap_flush_needed = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 357c600af9..dd32e6c730 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -2667,6 +2667,24 @@ enum { PVS_SRC_ADDR_MODE_1_SHIFT = 32, }; + +#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ + (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ + | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ + | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ + | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ + | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) + +#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ + (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ + | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ + | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ + | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ + | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ + | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ + | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) + /*\}*/ /* BEGIN: Packet 3 commands */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ad57b7e2f1..e3e8a6fb3d 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1458,7 +1458,7 @@ static void r300SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); @@ -1552,7 +1552,7 @@ static void r500SetupRSUnit(GLcontext * ctx) hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = r300->selected_vp->OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten; else RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 95cedd9d91..ec4ba9ca7d 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -40,39 +40,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "tnl/tnl.h" +#include "compiler/radeon_compiler.h" #include "compiler/radeon_nqssadce.h" #include "r300_context.h" #include "r300_state.h" -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \ - t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \ - (t_src_class(a.File) == PVS_SRC_REG_INPUT && \ - t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \ - -/* - * Take an already-setup and valid source then swizzle it appropriately to - * obtain a constant ZERO or ONE source. - */ -#define __CONST(x, y) \ - (PVS_SRC_OPERAND(t_src_index(vp, &src[x]), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_swizzle(y), \ - t_src_class(src[x].File), \ - VSF_FLAG_NONE) | (src[x].RelAddr << 4)) - -#define FREE_TEMPS() \ - do { \ - int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ - if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->error = GL_TRUE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst) { @@ -125,1513 +97,38 @@ static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program return dst - dst_o; } -static unsigned long t_dst_mask(GLuint mask) -{ - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & VSF_FLAG_ALL; -} - -static unsigned long t_dst_class(gl_register_file file) -{ - - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: - return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: - return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static unsigned long t_dst_index(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT) - return vp->outputs[dst->Index]; - - return dst->Index; -} - -static unsigned long t_src_class(gl_register_file file) -{ - switch (file) { - case PROGRAM_TEMPORARY: - return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: - return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: - return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; - } -} - -static INLINE unsigned long t_swizzle(GLubyte swizzle) -{ -/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ - return swizzle; -} - -#if 0 -static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) -{ - int i; - - if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); - return; - } - - fprintf(stderr, "%s:<", caller); - for (i = 0; i < VERT_ATTRIB_MAX; i++) - fprintf(stderr, "%d ", vp->inputs[i]); - fprintf(stderr, ">\n"); - -} -#endif - -static unsigned long t_src_index(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - if (src->File == PROGRAM_INPUT) { - assert(vp->inputs[src->Index] != -1); - return vp->inputs[src->Index]; - } else { - if (src->Index < 0) { - fprintf(stderr, - "negative offsets for indirect addressing do not work.\n"); - return 0; - } - return src->Index; - } -} - -/* these two functions should probably be merged... */ - -static unsigned long t_src(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 1)), - t_swizzle(GET_SWZ(src->Swizzle, 2)), - t_swizzle(GET_SWZ(src->Swizzle, 3)), - t_src_class(src->File), - src->Negate) | (src->RelAddr << 4); -} - -static unsigned long t_src_scalar(struct r300_vertex_program *vp, - struct prog_src_register *src) -{ - /* src->Negate uses the NEGATE_ flags from program_instruction.h, - * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. - */ - return PVS_SRC_OPERAND(t_src_index(vp, src), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_swizzle(GET_SWZ(src->Swizzle, 0)), - t_src_class(src->File), - src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src->RelAddr << 4); -} - -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) -{ - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } - - return GL_TRUE; -} - -static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = 0; - - return inst; -} - -static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = - PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), - PVS_SRC_SELECT_FORCE_1, - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(*u_temp_i, - PVS_SRC_SELECT_X, - PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, - PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY, - /* Not 100% sure about this */ - (!src[0]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - inst[3] = __CONST(0, SWIZZLE_ZERO); - (*u_temp_i)--; - - return inst; -} - -static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - - inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - PVS_SRC_SELECT_FORCE_0, // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - return inst; -} - -static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = t_src(vp, &src[2]); - - return inst; -} - -static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = t_src_scalar(vp, &src[1]); - - return inst; -} - -static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX, - GL_TRUE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src_scalar(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = t_src(vp, &src[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - -#if 0 - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = 0; -#else - inst[0] = - PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ONE); - inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); -#endif - - return inst; -} - -static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3]) -{ - //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = t_src(vp, &src[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - - return inst; -} - -static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, - struct prog_instruction *vpi, - GLuint * inst, - struct prog_src_register src[3], - int *u_temp_i) -{ - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - */ - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - *u_temp_i, - t_dst_mask(vpi->DstReg.WriteMask), - PVS_DST_REG_TEMPORARY); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[3] = __CONST(1, SWIZZLE_ZERO); - inst += 4; - - inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // W - t_src_class(src[1].File), - (!src[1]. - Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // Z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // X - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // Y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // W - t_src_class(src[0].File), - src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - inst[3] = - PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, - PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, - PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE); - - (*u_temp_i)--; - - return inst; -} - -static void t_inputs_outputs(struct r300_vertex_program *vp, struct gl_program * glvp) -{ - int i; - int cur_reg; - GLuint OutputsWritten, InputsRead; - - OutputsWritten = glvp->OutputsWritten; - InputsRead = glvp->InputsRead; - - cur_reg = -1; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) - vp->inputs[i] = ++cur_reg; - else - vp->inputs[i] = -1; - } - - cur_reg = 0; - for (i = 0; i < VERT_RESULT_MAX; i++) - vp->outputs[i] = -1; - - assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); - - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { - vp->outputs[VERT_RESULT_HPOS] = cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { - vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; - } - - /* If we're writing back facing colors we need to send - * four colors to make front/back face colors selection work. - * If the vertex program doesn't write all 4 colors, lets - * pretend it does by skipping output index reg so the colors - * get written into appropriate output vectors. - */ - if (OutputsWritten & (1 << VERT_RESULT_COL0)) { - vp->outputs[VERT_RESULT_COL0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || - OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - cur_reg++; - } - - if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; - } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { - cur_reg++; - } - - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { - if (OutputsWritten & (1 << i)) { - vp->outputs[i] = cur_reg++; - } - } - - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - vp->outputs[VERT_RESULT_FOGC] = cur_reg++; - } -} - -static void translate_vertex_program(struct r300_vertex_program *vp, struct gl_program * glvp) -{ - struct prog_instruction *vpi = glvp->Instructions; - int i; - GLuint *inst; - unsigned long num_operands; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - - vp->pos_end = 0; /* Not supported yet */ - vp->hw_code.length = 0; - vp->error = GL_FALSE; - - t_inputs_outputs(vp, glvp); - - for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; - vpi++, inst += 4) { - - FREE_TEMPS(); - - if (!valid_dst(vp, &vpi->DstReg)) { - /* redirect result to unused temp */ - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = u_temp_i; - } - - num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - - /* copy the sources (src) from mesa into a local variable... is this needed? */ - for (i = 0; i < num_operands; i++) { - src[i] = vpi->SrcReg[i]; - } - - if (num_operands == 3) { /* TODO: scalars */ - if (CMP_SRCS(src[1], src[2]) - || CMP_SRCS(src[0], src[2])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[2]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); - inst[2] = __CONST(2, SWIZZLE_ZERO); - inst[3] = __CONST(2, SWIZZLE_ZERO); - inst += 4; - - src[2].File = PROGRAM_TEMPORARY; - src[2].Index = u_temp_i; - src[2].RelAddr = 0; - u_temp_i--; - } - } - - if (num_operands >= 2) { - if (CMP_SRCS(src[1], src[0])) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - u_temp_i, - VSF_FLAG_ALL, - PVS_DST_REG_TEMPORARY); - inst[1] = - PVS_SRC_OPERAND(t_src_index(vp, &src[0]), - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - - src[0].File = PROGRAM_TEMPORARY; - src[0].Index = u_temp_i; - src[0].RelAddr = 0; - u_temp_i--; - } - } - - switch (vpi->Opcode) { - case OPCODE_ABS: - inst = r300TranslateOpcodeABS(vp, vpi, inst, src); - break; - case OPCODE_ADD: - inst = r300TranslateOpcodeADD(vp, vpi, inst, src); - break; - case OPCODE_ARL: - inst = r300TranslateOpcodeARL(vp, vpi, inst, src); - break; - case OPCODE_DP3: - inst = r300TranslateOpcodeDP3(vp, vpi, inst, src); - break; - case OPCODE_DP4: - inst = r300TranslateOpcodeDP4(vp, vpi, inst, src); - break; - case OPCODE_DPH: - inst = r300TranslateOpcodeDPH(vp, vpi, inst, src); - break; - case OPCODE_DST: - inst = r300TranslateOpcodeDST(vp, vpi, inst, src); - break; - case OPCODE_EX2: - inst = r300TranslateOpcodeEX2(vp, vpi, inst, src); - break; - case OPCODE_EXP: - inst = r300TranslateOpcodeEXP(vp, vpi, inst, src); - break; - case OPCODE_FLR: - inst = r300TranslateOpcodeFLR(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - case OPCODE_FRC: - inst = r300TranslateOpcodeFRC(vp, vpi, inst, src); - break; - case OPCODE_LG2: - inst = r300TranslateOpcodeLG2(vp, vpi, inst, src); - break; - case OPCODE_LIT: - inst = r300TranslateOpcodeLIT(vp, vpi, inst, src); - break; - case OPCODE_LOG: - inst = r300TranslateOpcodeLOG(vp, vpi, inst, src); - break; - case OPCODE_MAD: - inst = r300TranslateOpcodeMAD(vp, vpi, inst, src); - break; - case OPCODE_MAX: - inst = r300TranslateOpcodeMAX(vp, vpi, inst, src); - break; - case OPCODE_MIN: - inst = r300TranslateOpcodeMIN(vp, vpi, inst, src); - break; - case OPCODE_MOV: - inst = r300TranslateOpcodeMOV(vp, vpi, inst, src); - break; - case OPCODE_MUL: - inst = r300TranslateOpcodeMUL(vp, vpi, inst, src); - break; - case OPCODE_POW: - inst = r300TranslateOpcodePOW(vp, vpi, inst, src); - break; - case OPCODE_RCP: - inst = r300TranslateOpcodeRCP(vp, vpi, inst, src); - break; - case OPCODE_RSQ: - inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src); - break; - case OPCODE_SGE: - inst = r300TranslateOpcodeSGE(vp, vpi, inst, src); - break; - case OPCODE_SLT: - inst = r300TranslateOpcodeSLT(vp, vpi, inst, src); - break; - case OPCODE_SUB: - inst = r300TranslateOpcodeSUB(vp, vpi, inst, src); - break; - case OPCODE_SWZ: - inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src); - break; - case OPCODE_XPD: - inst = r300TranslateOpcodeXPD(vp, vpi, inst, src, /* FIXME */ - &u_temp_i); - break; - default: - vp->error = GL_TRUE; - break; - } - } - - vp->hw_code.length = (inst - vp->hw_code.body.d); - if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->error = GL_TRUE; - } -} - -static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id) -{ - struct prog_instruction *vpi; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2); - - vpi = &prog->Instructions[prog->NumInstructions - 3]; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_HPOS; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_MOV; - - vpi->DstReg.File = PROGRAM_OUTPUT; - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_XYZW; - vpi->DstReg.CondMask = COND_TR; - - vpi->SrcReg[0].File = PROGRAM_TEMPORARY; - vpi->SrcReg[0].Index = temp_index; - vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++vpi; - - vpi->Opcode = OPCODE_END; -} - -static void pos_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - GLuint tempregi = prog->NumTemporaries; - - prog->NumTemporaries++; - - for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) { - vpi->DstReg.File = PROGRAM_TEMPORARY; - vpi->DstReg.Index = tempregi; - } - } - - insert_wpos(prog, tempregi, tex_id); - - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -/** - * The fogcoord attribute is special in that only the first component - * is relevant, and the remaining components are always fixed (when read - * from by the fragment program) to yield an X001 pattern. - * - * We need to enforce this either in the vertex program or in the fragment - * program, and this code chooses not to enforce it in the vertex program. - * This is slightly cheaper, as long as the fragment program does not use - * weird swizzles. - * - * And it seems that usually, weird swizzles are not used, so... - * - * See also the counterpart rewriting for fragment programs. - */ -static void fog_as_texcoord(struct gl_program *prog, int tex_id) -{ - struct prog_instruction *vpi; - - vpi = prog->Instructions; - while (vpi->Opcode != OPCODE_END) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) { - vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id; - vpi->DstReg.WriteMask = WRITEMASK_X; - } - - ++vpi; - } - - prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC); - prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id); -} - -static int translateABS(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_MAX; - inst->SrcReg[1] = inst->SrcReg[0]; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateDP3(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - return 0; -} - -static int translateDPH(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_DP4; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); - - return 0; -} - -static int translateFLR(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - struct prog_dst_register dst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - dst = inst->DstReg; - - inst->Opcode = OPCODE_FRC; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - ++inst; - - inst->Opcode = OPCODE_ADD; - inst->DstReg = dst; - inst->SrcReg[0] = (inst-1)->SrcReg[0]; - inst->SrcReg[1].File = PROGRAM_TEMPORARY; - inst->SrcReg[1].Index = tmp_idx; - inst->SrcReg[1].Negate = NEGATE_XYZW; - - return 1; -} - -static int translateSUB(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - - inst = &prog->Instructions[pos]; - - inst->Opcode = OPCODE_ADD; - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - - return 0; -} - -static int translateSWZ(struct gl_program *prog, int pos) -{ - prog->Instructions[pos].Opcode = OPCODE_MOV; - - return 0; -} - -static int translateXPD(struct gl_program *prog, int pos) -{ - struct prog_instruction *inst; - int tmp_idx; - - tmp_idx = prog->NumTemporaries++; - - _mesa_insert_instructions(prog, pos + 1, 1); - - inst = &prog->Instructions[pos]; - - *(inst+1) = *inst; - - inst->Opcode = OPCODE_MUL; - inst->DstReg.File = PROGRAM_TEMPORARY; - inst->DstReg.Index = tmp_idx; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - ++inst; - - inst->Opcode = OPCODE_MAD; - inst->SrcReg[0].Swizzle = combine_swizzles4(inst->SrcReg[0].Swizzle, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W); - inst->SrcReg[1].Swizzle = combine_swizzles4(inst->SrcReg[1].Swizzle, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W); - inst->SrcReg[1].Negate ^= NEGATE_XYZW; - inst->SrcReg[2].File = PROGRAM_TEMPORARY; - inst->SrcReg[2].Index = tmp_idx; - - return 1; -} - -static void translateInsts(struct gl_program *prog) -{ - struct prog_instruction *inst; - int i; - - for (i = 0; i < prog->NumInstructions; ++i) { - inst = &prog->Instructions[i]; - - switch (inst->Opcode) { - case OPCODE_ABS: - i += translateABS(prog, i); - break; - case OPCODE_DP3: - i += translateDP3(prog, i); - break; - case OPCODE_DPH: - i += translateDPH(prog, i); - break; - case OPCODE_FLR: - i += translateFLR(prog, i); - break; - case OPCODE_SUB: - i += translateSUB(prog, i); - break; - case OPCODE_SWZ: - i += translateSWZ(prog, i); - break; - case OPCODE_XPD: - i += translateXPD(prog, i); - break; - default: - break; - } - } -} - -#define ADD_OUTPUT(fp_attr, vp_result) \ - do { \ - if ((FpReads & (1 << (fp_attr))) && !(prog->OutputsWritten & (1 << (vp_result)))) { \ - OutputsAdded |= 1 << (vp_result); \ - count++; \ - } \ - } while (0) - -static void addArtificialOutputs(GLcontext *ctx, struct gl_program *prog) -{ - r300ContextPtr r300 = R300_CONTEXT(ctx); - GLuint OutputsAdded, FpReads; - int i, count; - - OutputsAdded = 0; - count = 0; - FpReads = r300->selected_fp->InputsRead; - - ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); - ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); - - for (i = 0; i < 7; ++i) { - ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); - } - - /* Some outputs may be artificially added, to match the inputs of the fragment program. - * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by - * vertex program are undefined, so just use MOV [vertex_result], CONST[0] - */ - if (count > 0) { - struct prog_instruction *inst; - - _mesa_insert_instructions(prog, prog->NumInstructions - 1, count); - inst = &prog->Instructions[prog->NumInstructions - 1 - count]; - - for (i = 0; i < VERT_RESULT_MAX; ++i) { - if (OutputsAdded & (1 << i)) { - inst->Opcode = OPCODE_MOV; - - inst->DstReg.File = PROGRAM_OUTPUT; - inst->DstReg.Index = i; - inst->DstReg.WriteMask = WRITEMASK_XYZW; - inst->DstReg.CondMask = COND_TR; - - inst->SrcReg[0].File = PROGRAM_CONSTANT; - inst->SrcReg[0].Index = 0; - inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; - - ++inst; - } - } - - prog->OutputsWritten |= OutputsAdded; - } -} - -#undef ADD_OUTPUT - -static void nqssadceInit(struct nqssadce_state* s) -{ - r300ContextPtr r300 = (r300ContextPtr)(s->UserData); - GLuint fp_reads; - - fp_reads = r300->selected_fp->InputsRead; - { - if (fp_reads & FRAG_BIT_COL0) { - s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW; - } - - if (fp_reads & FRAG_BIT_COL1) { - s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW; - s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW; - } - } - - { - int i; - for (i = 0; i < 8; ++i) { - if (fp_reads & FRAG_BIT_TEX(i)) { - s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW; - } - } - } - - s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW; - if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ)) - s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X; -} - -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) -{ - (void) opcode; - (void) reg; - - return GL_TRUE; -} - static struct r300_vertex_program *build_program(GLcontext *ctx, - struct r300_vertex_program_key *wanted_key, + struct r300_vertex_program_external_state *wanted_key, const struct gl_vertex_program *mesa_vp) { - r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - struct gl_vertex_program * glvp = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); - struct gl_program *prog; + struct r300_vertex_program_compiler compiler; vp = _mesa_calloc(sizeof(*vp)); - vp->Base = glvp; + vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - prog = &glvp->Base; - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Initial vertex program:\n"); - _mesa_print_program(prog); - fflush(stdout); - } - - if (glvp->IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, glvp); - } - - if (r300->selected_fp->code.wpos_attr != FRAG_ATTRIB_MAX) { - pos_as_texcoord(&glvp->Base, r300->selected_fp->code.wpos_attr - FRAG_ATTRIB_TEX0); - } - - if (r300->selected_fp->code.fog_attr != FRAG_ATTRIB_MAX) { - fog_as_texcoord(&glvp->Base, r300->selected_fp->code.fog_attr - FRAG_ATTRIB_TEX0); - } - - addArtificialOutputs(ctx, prog); + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE; - translateInsts(prog); + compiler.code = &vp->code; + compiler.state = vp->key; + compiler.program = vp->Base; - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after native rewrite:\n"); - _mesa_print_program(prog); + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(compiler.program); fflush(stdout); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(prog, &nqssadce, r300); - - /* We need this step for reusing temporary registers */ - _mesa_optimize_program(ctx, prog); - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - _mesa_print_program(prog); - fflush(stdout); - } - } - - assert(prog->NumInstructions); - { - struct prog_instruction *inst; - int max, i, tmp; - - inst = prog->Instructions; - max = -1; - while (inst->Opcode != OPCODE_END) { - tmp = _mesa_num_inst_src_regs(inst->Opcode); - for (i = 0; i < tmp; ++i) { - if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) { - if ((int) inst->SrcReg[i].Index > max) { - max = inst->SrcReg[i].Index; - } - } - } - - if (_mesa_num_inst_dst_regs(inst->Opcode)) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - if ((int) inst->DstReg.Index > max) { - max = inst->DstReg.Index; - } - } - } - ++inst; - } - - /* We actually want highest index of used temporary register, - * not the number of temporaries used. - * These values aren't always the same. - */ - vp->num_temporaries = max + 1; + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program); } - translate_vertex_program(vp, &glvp->Base); + if (!r3xx_compile_vertex_program(&compiler, ctx)) + vp->error = GL_TRUE; - vp->InputsRead = glvp->Base.InputsRead; - vp->OutputsWritten = glvp->Base.OutputsWritten; + rc_destroy(&compiler.Base); return vp; } @@ -1639,7 +136,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_external_state wanted_key = { 0 }; struct r300_vertex_program_cont *vpc; struct r300_vertex_program *vp; @@ -1669,7 +166,7 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ } while(0) -static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) { int i; @@ -1717,11 +214,11 @@ void r300SetupVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); - inst_count = (prog->hw_code.length / 4) - 1; + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; - r300VapCntl(rmesa, _mesa_bitcount(prog->InputsRead), - _mesa_bitcount(prog->OutputsWritten), prog->num_temporaries); + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); R300_STATECHANGE(rmesa, pvs); rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 896699ffe2..ccec896be4 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,34 +3,6 @@ #include "r300_reg.h" -#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \ - (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \ - | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \ - | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \ - | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \ - | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT)) - -#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \ - (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \ - | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \ - | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \ - | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \ - | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \ - | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \ - | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT)) - -#if 1 - -#define VSF_FLAG_X 1 -#define VSF_FLAG_Y 2 -#define VSF_FLAG_Z 4 -#define VSF_FLAG_W 8 -#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z) -#define VSF_FLAG_ALL 0xf -#define VSF_FLAG_NONE 0 - -#endif void r300SetupVertexProgram(r300ContextPtr rmesa); diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index 40ad998f79..39a221eeab 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -133,6 +133,7 @@ #define NEGATE_Y 0x2 #define NEGATE_Z 0x4 #define NEGATE_W 0x8 +#define NEGATE_XYZ 0x7 #define NEGATE_XYZW 0xf #define NEGATE_NONE 0x0 /*@}*/ @@ -303,11 +304,11 @@ struct prog_dst_register * Condition code swizzle value. */ GLuint CondSwizzle:12; - + /** * Selects the condition code register to use for conditional destination * update masking. In NV_fragmnet_program or NV_vertex_program2 mode, only - * condition code register 0 is available. In NV_vertex_program3 mode, + * condition code register 0 is available. In NV_vertex_program3 mode, * condition code registers 0 and 1 are available. */ GLuint CondSrc:1; @@ -359,7 +360,7 @@ struct prog_instruction * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3. */ GLuint SaturateMode:2; - + /** * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. * @@ -374,7 +375,7 @@ struct prog_instruction /*@{*/ /** Source texture unit. */ GLuint TexSrcUnit:5; - + /** Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX */ GLuint TexSrcTarget:3; -- cgit v1.2.3 From 2708ddfb06a36d8568e2aa130bf1f7d551fcd309 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Aug 2009 12:31:01 -0700 Subject: vbo: Avoid extra validation of DrawElements. This saves mapping the index buffer to get a bounds on the indices that drivers just drop on the floor in the VBO case (cache win), saves a bonus walk of the indices in the CheckArrayBounds case, and other miscellaneous validation. On intel it's a particularly a large win (50-100% in my app) because even though we let the indices stay in both CPU and GPU caches, we still end up waiting for the GPU to be done with the buffer before reading from it. Drivers that want the min/max_index fields must now check index_bounds_valid and use vbo_get_minmax_index before using them. --- src/mesa/drivers/dri/i965/brw_draw.c | 53 +++------- src/mesa/drivers/dri/i965/brw_draw.h | 1 + src/mesa/drivers/dri/r300/r300_draw.c | 7 ++ src/mesa/state_tracker/st_cb_rasterpos.c | 2 +- src/mesa/state_tracker/st_draw.c | 5 + src/mesa/state_tracker/st_draw.h | 2 + src/mesa/state_tracker/st_draw_feedback.c | 4 + src/mesa/tnl/t_context.c | 2 +- src/mesa/tnl/t_draw.c | 18 +++- src/mesa/tnl/tnl.h | 10 ++ src/mesa/vbo/vbo.h | 6 +- src/mesa/vbo/vbo_exec_array.c | 158 ++++++++++++++++-------------- src/mesa/vbo/vbo_exec_draw.c | 1 + src/mesa/vbo/vbo_rebase.c | 1 + src/mesa/vbo/vbo_save_draw.c | 1 + src/mesa/vbo/vbo_split_copy.c | 1 + src/mesa/vbo/vbo_split_inplace.c | 1 + 17 files changed, 156 insertions(+), 117 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5152c3f3a5..8c94c904c1 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -422,54 +422,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return retval; } -static GLboolean brw_need_rebase( GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_index_buffer *ib, - GLuint min_index ) -{ - if (min_index == 0) - return GL_FALSE; - - if (ib) { - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } - else { - /* Hmm. This isn't quite what I wanted. BRW can actually - * handle the mixed case well enough that we shouldn't need to - * rebase. However, it's probably not very common, nor hugely - * expensive to do it this way: - */ - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } -} - - void brw_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ) { GLboolean retval; - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (brw_need_rebase( ctx, arrays, ib, min_index )) { - vbo_rebase_prims( ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - - return; + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } } /* Make a first attempt at drawing: diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 9aebbdb1b8..2a14db217f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index fcfd309933..aedc6cfb2a 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -462,6 +462,7 @@ static void r300DrawPrims(GLcontext *ctx, const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { @@ -476,6 +477,12 @@ static void r300DrawPrims(GLcontext *ctx, limits.max_indices = 65535; limits.max_vb_size = 1024*1024; + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 3bcccd0df4..d82b2a2035 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -251,7 +251,7 @@ st_RasterPos(GLcontext *ctx, const GLfloat v[4]) rs->array[0].Ptr = (GLubyte *) v; /* draw the point */ - st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, 0, 1); + st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, GL_TRUE, 0, 1); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 914a507bef..503a5f34a3 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -533,6 +533,7 @@ st_draw_vbo(GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { @@ -545,6 +546,10 @@ st_draw_vbo(GLcontext *ctx, unsigned num_vbuffers, num_velements; GLboolean userSpace; + /* Gallium probably doesn't want this in some cases. */ + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + /* sanity check for pointer arithmetic below */ assert(sizeof(arrays[0]->Ptr[0]) == 1); diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index dcfe7e1536..3e0face656 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -47,6 +47,7 @@ st_draw_vbo(GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index); @@ -56,6 +57,7 @@ st_feedback_draw_vbo(GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index); diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 2712c131c0..b2d682ef64 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -96,6 +96,7 @@ st_feedback_draw_vbo(GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index) { @@ -114,6 +115,9 @@ st_feedback_draw_vbo(GLcontext *ctx, st_validate_state(ctx->st); + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + /* must get these after state validation! */ vp = ctx->st->vp; vs = &st->vp->state; diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index f69b122046..f2771cde09 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -81,7 +81,7 @@ _tnl_CreateContext( GLcontext *ctx ) tnl->nr_blocks = 0; /* plug in the VBO drawing function */ - vbo_set_draw_func(ctx, _tnl_draw_prims); + vbo_set_draw_func(ctx, _tnl_vbo_draw_prims); _math_init_transformation(); _math_init_translate(); diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c index 2ec65d5323..c64c2c2077 100644 --- a/src/mesa/tnl/t_draw.c +++ b/src/mesa/tnl/t_draw.c @@ -360,6 +360,20 @@ static void unmap_vbos( GLcontext *ctx, } +void _tnl_vbo_draw_prims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} /* This is the main entrypoint into the slimmed-down software tnl * module. In a regular swtnl driver, this can be plugged straight @@ -393,7 +407,7 @@ void _tnl_draw_prims( GLcontext *ctx, */ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, - _tnl_draw_prims ); + _tnl_vbo_draw_prims ); return; } else if (max_index > max) { @@ -411,7 +425,7 @@ void _tnl_draw_prims( GLcontext *ctx, */ vbo_split_prims( ctx, arrays, prim, nr_prims, ib, 0, max_index, - _tnl_draw_prims, + _tnl_vbo_draw_prims, &limits ); } else { diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h index 4d628aa9a6..9c66d3b019 100644 --- a/src/mesa/tnl/tnl.h +++ b/src/mesa/tnl/tnl.h @@ -81,6 +81,16 @@ _tnl_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index); +void +_tnl_vbo_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index); + extern void _mesa_load_tracked_matrices(GLcontext *ctx); diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 5362226c2f..5986e93576 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -69,6 +69,7 @@ typedef void (*vbo_draw_func)( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); @@ -112,7 +113,10 @@ void vbo_rebase_prims( GLcontext *ctx, GLuint min_index, GLuint max_index, vbo_draw_func draw ); - +void +vbo_get_minmax_index(GLcontext *ctx, const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint *min_index, GLuint *max_index); void vbo_use_buffer_objects(GLcontext *ctx); diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index f4b9b2f744..4fb4845f6b 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -41,15 +41,29 @@ /** * Compute min and max elements for glDraw[Range]Elements() calls. */ -static void -get_minmax_index(GLuint count, GLuint type, const GLvoid *indices, - GLuint *min_index, GLuint *max_index) +void +vbo_get_minmax_index(GLcontext *ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint *min_index, GLuint *max_index) { GLuint i; + GLsizei count = prim->count; + const void *indices; + + if (ib->obj->Name) { + const GLvoid *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, + ib->obj); + indices = ADD_POINTERS(map, ib->ptr); + } else { + indices = ib->ptr; + } - switch(type) { + switch (ib->type) { case GL_UNSIGNED_INT: { - const GLuint *ui_indices = (const GLuint *)indices; + const GLuint *ui_indices = (const GLuint *)ib->ptr; GLuint max_ui = ui_indices[count-1]; GLuint min_ui = ui_indices[0]; for (i = 0; i < count; i++) { @@ -88,6 +102,12 @@ get_minmax_index(GLuint count, GLuint type, const GLvoid *indices, assert(0); break; } + + if (ib->obj->Name != 0) { + ctx->Driver.UnmapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + ib->obj); + } } @@ -500,7 +520,7 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) prim[0].indexed = 0; vbo->draw_prims( ctx, exec->array.inputs, prim, 1, NULL, - start, start + count - 1 ); + GL_TRUE, start, start + count - 1 ); #if 0 print_draw_arrays(ctx, exec, mode, start, count); @@ -566,53 +586,19 @@ dump_element_buffer(GLcontext *ctx, GLenum type) ctx->Array.ElementArrayBufferObj); } - -static void GLAPIENTRY -vbo_exec_DrawRangeElements(GLenum mode, - GLuint start, GLuint end, - GLsizei count, GLenum type, const GLvoid *indices) +/* Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements */ +static void +vbo_validated_drawrangeelements(GLcontext *ctx, GLenum mode, + GLboolean index_bounds_valid, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices) { - GET_CURRENT_CONTEXT(ctx); struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; struct _mesa_index_buffer ib; struct _mesa_prim prim[1]; - if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, - type, indices )) - return; - - if (end >= ctx->Array.ArrayObj->_MaxElement) { - /* the max element is out of bounds of one or more enabled arrays */ - _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " - "type 0x%x, indices=%p)\n" - "\tindex=%u is out of bounds (max=%u) " - "Element Buffer %u (size %d)", - start, end, count, type, indices, end, - ctx->Array.ArrayObj->_MaxElement - 1, - ctx->Array.ElementArrayBufferObj->Name, - ctx->Array.ElementArrayBufferObj->Size); - - if (0) - dump_element_buffer(ctx, type); - - if (0) - _mesa_print_arrays(ctx); - return; - } - else if (0) { - _mesa_printf("glDraw[Range]Elements" - "(start %u, end %u, type 0x%x, count %d) ElemBuf %u\n", - start, end, type, count, - ctx->Array.ElementArrayBufferObj->Name); - } - -#if 0 - check_draw_elements_data(ctx, count, type, indices); -#else - (void) check_draw_elements_data; -#endif - FLUSH_CURRENT( ctx, 0 ); if (ctx->NewState) @@ -623,13 +609,13 @@ vbo_exec_DrawRangeElements(GLenum mode, return; } - bind_arrays( ctx ); - if (ctx->NewState) _mesa_update_state( ctx ); + bind_arrays( ctx ); + ib.count = count; - ib.type = type; + ib.type = type; ib.obj = ctx->Array.ElementArrayBufferObj; ib.ptr = indices; @@ -673,44 +659,68 @@ vbo_exec_DrawRangeElements(GLenum mode, * for the latter case elsewhere. */ - vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, start, end ); + vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, + index_bounds_valid, start, end ); } - static void GLAPIENTRY -vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices) +vbo_exec_DrawRangeElements(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid *indices) { GET_CURRENT_CONTEXT(ctx); - GLuint min_index = 0; - GLuint max_index = 0; - if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) + if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, + type, indices )) return; - if (!vbo_validate_shaders(ctx)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawElements(bad shader)"); + if (end >= ctx->Array.ArrayObj->_MaxElement) { + /* the max element is out of bounds of one or more enabled arrays */ + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " + "type 0x%x, indices=%p)\n" + "\tindex=%u is out of bounds (max=%u) " + "Element Buffer %u (size %d)", + start, end, count, type, indices, end, + ctx->Array.ArrayObj->_MaxElement - 1, + ctx->Array.ElementArrayBufferObj->Name, + ctx->Array.ElementArrayBufferObj->Size); + + if (0) + dump_element_buffer(ctx, type); + + if (0) + _mesa_print_arrays(ctx); return; } + else if (0) { + _mesa_printf("glDraw[Range]Elements" + "(start %u, end %u, type 0x%x, count %d) ElemBuf %u\n", + start, end, type, count, + ctx->Array.ElementArrayBufferObj->Name); + } - if (ctx->Array.ElementArrayBufferObj->Name) { - const GLvoid *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); +#if 0 + check_draw_elements_data(ctx, count, type, indices); +#else + (void) check_draw_elements_data; +#endif + + vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end, + count, type, indices); +} - get_minmax_index(count, type, ADD_POINTERS(map, indices), - &min_index, &max_index); - ctx->Driver.UnmapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); - } - else { - get_minmax_index(count, type, indices, &min_index, &max_index); - } +static void GLAPIENTRY +vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) + return; - vbo_exec_DrawRangeElements(mode, min_index, max_index, count, type, indices); + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices); } diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 18419928b2..a988424d21 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -378,6 +378,7 @@ vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ) exec->vtx.prim, exec->vtx.prim_count, NULL, + GL_TRUE, 0, exec->vtx.vert_count - 1); diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c index ea87dede64..3bf7ef580f 100644 --- a/src/mesa/vbo/vbo_rebase.c +++ b/src/mesa/vbo/vbo_rebase.c @@ -208,6 +208,7 @@ void vbo_rebase_prims( GLcontext *ctx, prim, nr_prims, ib, + GL_TRUE, 0, max_index - min_index ); diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index 5110648c28..d834fa1f2e 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -279,6 +279,7 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data ) node->prim, node->prim_count, NULL, + GL_TRUE, 0, /* Node is a VBO, so this is ok */ node->count - 1); } diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c index d7ffebf607..3f8a222805 100644 --- a/src/mesa/vbo/vbo_split_copy.c +++ b/src/mesa/vbo/vbo_split_copy.c @@ -194,6 +194,7 @@ flush( struct copy_context *copy ) copy->dstprim, copy->dstprim_nr, ©->dstib, + GL_TRUE, 0, copy->dstbuf_nr ); diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c index 9628227e7c..da84eaa6ea 100644 --- a/src/mesa/vbo/vbo_split_inplace.c +++ b/src/mesa/vbo/vbo_split_inplace.c @@ -85,6 +85,7 @@ static void flush_vertex( struct split_context *split ) split->dstprim, split->dstprim_nr, NULL, + GL_TRUE, min_index, max_index); -- cgit v1.2.3 From 5fb5ea97f4439184f03075f57fa1fda56caf51b4 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 11 Jul 2009 20:40:51 +0200 Subject: r300: use VBOs for vertex attributes --- src/mesa/drivers/dri/r300/r300_context.c | 2 + src/mesa/drivers/dri/r300/r300_context.h | 5 +- src/mesa/drivers/dri/r300/r300_draw.c | 264 +++++++++++++++++++++---------- 3 files changed, 187 insertions(+), 84 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index db404b3847..799e6134cd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -67,6 +67,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_render.h" #include "r300_swtcl.h" #include "radeon_bocs_wrapper.h" +#include "radeon_buffer_objects.h" #include "vblank.h" @@ -398,6 +399,7 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 24dc6bc6a3..09de898748 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -478,11 +478,12 @@ struct r300_vertex_buffer { struct vertex_attribute { /* generic */ GLubyte element; - GLvoid *data; - GLboolean free_needed; GLuint stride; GLuint dwords; GLubyte size; /* number of components */ + GLboolean is_named_bo; + struct radeon_bo *bo; + GLint bo_offset; /* hw specific */ uint32_t data_type:4; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index fcfd309933..99c73d27a2 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -39,17 +39,20 @@ #include "r300_state.h" #include "r300_tex.h" +#include "radeon_buffer_objects.h" + #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "vbo/vbo_context.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" -static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; + GLboolean mapped_bo = GL_FALSE; if (!mesa_ind_buf) { ind_buf->ptr = NULL; @@ -58,9 +61,8 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { - bo[*nr_bo] = mesa_ind_buf->obj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); @@ -110,6 +112,10 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer ind_buf->free_needed = GL_FALSE; ind_buf->is_32bit = GL_TRUE; } + + if (mapped_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } } static int getTypeSize(GLenum type) @@ -161,26 +167,118 @@ static int getTypeSize(GLenum type) } \ } while (0) -static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_vertex_buffer *vbuf = &r300->vbuf; - struct vertex_attribute r300_attr; - const void *src_ptr; - GLenum type; + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; GLuint stride; + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + count = 1; + if (input->BufferObj->Name) { if (!input->BufferObj->Pointer) { - bo[*nr_bo] = input->BufferObj; - (*nr_bo)++; ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); - assert(input->BufferObj->Pointer != NULL); + mapped_named_bo = GL_TRUE; } src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); - } else + } else { src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32); + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); + fprintf(stderr, "stride %d, components %d\n", stride, input->Size); + } + + assert(src_ptr != NULL); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32); + + if (!input->BufferObj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + GLenum type; + GLuint stride; stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; @@ -189,62 +287,57 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st getTypeSize(input->Type) != 4 || #endif stride < 4) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) { - fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); - fprintf(stderr, "stride %d, components %d\n", stride, input->Size); - } - - GLfloat *dst_ptr, *tmp; - - /* Convert value for first element only */ - if (input->StrideB == 0) - count = 1; - - tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); - - switch (input->Type) { - case GL_DOUBLE: - CONVERT(GLdouble, (GLfloat)); - break; - case GL_UNSIGNED_INT: - CONVERT(GLuint, UINT_TO_FLOAT); - break; - case GL_INT: - CONVERT(GLint, INT_TO_FLOAT); - break; - case GL_UNSIGNED_SHORT: - CONVERT(GLushort, USHORT_TO_FLOAT); - break; - case GL_SHORT: - CONVERT(GLshort, SHORT_TO_FLOAT); - break; - case GL_UNSIGNED_BYTE: - assert(input->Format != GL_BGRA); - CONVERT(GLubyte, UBYTE_TO_FLOAT); - break; - case GL_BYTE: - CONVERT(GLbyte, BYTE_TO_FLOAT); - break; - default: - assert(0); - break; - } type = GL_FLOAT; - r300_attr.free_needed = GL_TRUE; - r300_attr.data = tmp; + + r300ConvertAttrib(ctx, count, input, &r300_attr); if (input->StrideB == 0) { r300_attr.stride = 0; } else { r300_attr.stride = sizeof(GLfloat) * input->Size; } r300_attr.dwords = input->Size; + r300_attr.is_named_bo = GL_FALSE; } else { type = input->Type; - r300_attr.free_needed = GL_FALSE; - r300_attr.data = (GLvoid *)src_ptr; - r300_attr.stride = input->StrideB; - r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + if (input->BufferObj->Name) { + if (stride % 4 != 0) { + assert(((int) input->Ptr) % input->StrideB == 0); + r300AlignDataToDword(ctx, input, count, &r300_attr); + r300_attr.is_named_bo = GL_FALSE; + } else { + r300_attr.stride = input->StrideB; + r300_attr.bo_offset = (GLuint) input->Ptr; + r300_attr.bo = get_radeon_buffer_object(input->BufferObj)->bo; + r300_attr.is_named_bo = GL_TRUE; + } + } else { + int size; + uint32_t *dst; + + if (input->StrideB == 0) { + size = getTypeSize(input->Type) * input->Size; + count = 1; + r300_attr.stride = 0; + } else { + size = getTypeSize(input->Type) * input->Size * count; + r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3; + } + + radeonAllocDmaRegion(&r300->radeon, &r300_attr.bo, &r300_attr.bo_offset, size, 32); + assert(r300_attr.bo->ptr != NULL); + dst = (uint32_t *)ADD_POINTERS(r300_attr.bo->ptr, r300_attr.bo_offset); + switch (r300_attr.dwords) { + case 1: radeonEmitVec4(dst, input->Ptr, input->StrideB, count); break; + case 2: radeonEmitVec8(dst, input->Ptr, input->StrideB, count); break; + case 3: radeonEmitVec12(dst, input->Ptr, input->StrideB, count); break; + case 4: radeonEmitVec16(dst, input->Ptr, input->StrideB, count); break; + default: assert(0); break; + } + + r300_attr.is_named_bo = GL_FALSE; + } } r300_attr.size = input->Size; @@ -333,7 +426,7 @@ static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const st ++vbuf->num_attribs; } -static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; @@ -351,7 +444,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar ++i; } - r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + r300TranslateAttrib(ctx, i, count, arrays[i]); tmp >>= 1; ++i; @@ -366,39 +459,49 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar int i; for (i = 0; i < vbuf->num_attribs; i++) { - rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], - vbuf->attribs[i].data, vbuf->attribs[i].dwords, - vbuf->attribs[i].stride, count); + struct radeon_aos *aos = &r300->radeon.tcl.aos[i]; + + aos->count = vbuf->attribs[i].stride == 0 ? 1 : count; + aos->stride = vbuf->attribs[i].stride / sizeof(float); + aos->offset = vbuf->attribs[i].bo_offset; + aos->components = vbuf->attribs[i].dwords; + aos->bo = vbuf->attribs[i].bo; + + if (vbuf->attribs[i].is_named_bo) { + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + aos->bo, + RADEON_GEM_DOMAIN_GTT, 0); + } } r300->radeon.tcl.aos_count = vbuf->num_attribs; } } -static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +static void r300FreeData(GLcontext *ctx) { + r300ContextPtr r300 = R300_CONTEXT(ctx); { - struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; int i; - for (i = 0; i < vbuf->num_attribs; i++) { - if (vbuf->attribs[i].free_needed) - _mesa_free(vbuf->attribs[i].data); + for (i = 0; i < r300->vbuf.num_attribs; i++) { + if (!r300->vbuf.attribs[i].is_named_bo) { + radeon_bo_unref(r300->vbuf.attribs[i].bo); + } + r300->radeon.tcl.aos[i].bo = NULL; } } { struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) + if (ind_buf->free_needed) { _mesa_free(ind_buf->ptr); - } - - { - int i; + } - for (i = 0; i < nr_bo; ++i) { - ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); + if (r300->radeon.tcl.elt_dma_bo) { + radeon_bo_unref(r300->radeon.tcl.elt_dma_bo); } + r300->radeon.tcl.elt_dma_bo = NULL; } } @@ -411,8 +514,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, GLuint max_index ) { struct r300_context *r300 = R300_CONTEXT(ctx); - struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; - GLuint i, nr_bo = 0; + GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); @@ -424,7 +526,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); + r300FixupIndexBuffer(ctx, ib); /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ @@ -432,7 +534,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300->radeon.hw.max_state_size + (50*sizeof(int)), __FUNCTION__); - r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + r300SetVertexFormat(ctx, arrays, max_index + 1); if (r300->fallback) return GL_FALSE; @@ -450,9 +552,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300EmitCacheFlush(r300); - radeonReleaseArrays(ctx, ~0); - - r300FreeData(ctx, bo, nr_bo); + r300FreeData(ctx); return GL_TRUE; } -- cgit v1.2.3 From 9e018d822523e559fa8d92c3b5a83dd5554a0676 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 14 Aug 2009 16:59:26 +0200 Subject: r300: rework index buffer setup Copy elements directly to DMA bo to get rid of one memcpy, and prepare for using VBOs for index buffer. --- src/mesa/drivers/dri/r300/r300_context.h | 5 +- src/mesa/drivers/dri/r300/r300_draw.c | 155 ++++++++++++++++++------------- src/mesa/drivers/dri/r300/r300_render.c | 85 +++++++---------- 3 files changed, 126 insertions(+), 119 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 09de898748..d620417422 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -498,9 +498,10 @@ struct r300_vertex_buffer { }; struct r300_index_buffer { - GLvoid *ptr; + struct radeon_bo *bo; + int bo_offset; + GLboolean is_32bit; - GLboolean free_needed; GLuint count; }; diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 99c73d27a2..1d6e6db773 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -47,32 +47,53 @@ #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_index_buffer *ind_buf = &r300->ind_buf; GLvoid *src_ptr; - GLboolean mapped_bo = GL_FALSE; + GLuint *out; + int i; - if (!mesa_ind_buf) { - ind_buf->ptr = NULL; - return; - } - - ind_buf->count = mesa_ind_buf->count; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); - mapped_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); - int i; - ind_buf->ptr = out; + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -82,16 +103,15 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *out++ = in[i]; } - ind_buf->free_needed = GL_TRUE; - ind_buf->is_32bit = GL_FALSE; - } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { #if MESA_BIG_ENDIAN + } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; - GLuint *out = _mesa_malloc(sizeof(GLushort) * - ((mesa_ind_buf->count + 1) & ~1)); - int i; + size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offet, size, 4); - ind_buf->ptr = out; + assert(r300->ind_buf.bo->ptr != NULL) + out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; @@ -100,46 +120,52 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer if (i < mesa_ind_buf->count) { *out++ = in[i]; } - - ind_buf->free_needed = GL_TRUE; -#else - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; #endif - ind_buf->is_32bit = GL_FALSE; - } else { - ind_buf->ptr = src_ptr; - ind_buf->free_needed = GL_FALSE; - ind_buf->is_32bit = GL_TRUE; } - if (mapped_bo) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); - } + r300->ind_buf.is_32bit = GL_FALSE; + r300->ind_buf.count = mesa_ind_buf->count; } -static int getTypeSize(GLenum type) + +static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { - switch (type) { - case GL_DOUBLE: - return sizeof(GLdouble); - case GL_FLOAT: - return sizeof(GLfloat); - case GL_INT: - return sizeof(GLint); - case GL_UNSIGNED_INT: - return sizeof(GLuint); - case GL_SHORT: - return sizeof(GLshort); - case GL_UNSIGNED_SHORT: - return sizeof(GLushort); - case GL_BYTE: - return sizeof(GLbyte); - case GL_UNSIGNED_BYTE: - return sizeof(GLubyte); - default: - assert(0); - return 0; + r300ContextPtr r300 = R300_CONTEXT(ctx); + GLboolean mapped_named_bo = GL_FALSE; + + if (!mesa_ind_buf) { + r300->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4); + + assert(r300->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset); + _mesa_memcpy(dst_ptr, src_ptr, size); + + r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + r300->ind_buf.count = mesa_ind_buf->count; + } else { + r300FixupIndexBuffer(ctx, mesa_ind_buf); } } @@ -473,13 +499,22 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar RADEON_GEM_DOMAIN_GTT, 0); } } - r300->radeon.tcl.aos_count = vbuf->num_attribs; + + if (r300->ind_buf.bo) { + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + r300->ind_buf.bo, + RADEON_GEM_DOMAIN_GTT, 0); + } } } static void r300FreeData(GLcontext *ctx) { + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ r300ContextPtr r300 = R300_CONTEXT(ctx); { int i; @@ -493,15 +528,9 @@ static void r300FreeData(GLcontext *ctx) } { - struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; - if (ind_buf->free_needed) { - _mesa_free(ind_buf->ptr); - } - - if (r300->radeon.tcl.elt_dma_bo) { - radeon_bo_unref(r300->radeon.tcl.elt_dma_bo); + if (r300->ind_buf.bo != NULL) { + radeon_bo_unref(r300->ind_buf.bo); } - r300->radeon.tcl.elt_dma_bo = NULL; } } @@ -526,7 +555,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300FixupIndexBuffer(ctx, ib); + r300SetupIndexBuffer(ctx, ib); /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 22b0d316cf..196cb47fef 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -172,64 +172,42 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - void *out; - GLuint size; - - size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - - radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, - &rmesa->radeon.tcl.elt_dma_offset, size, 4); - radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); - out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - memcpy(out, rmesa->ind_buf.ptr, size); - radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); -} - static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); + int size; - r300_emit_scissor(rmesa->radeon.glCtx); - if (vertex_count > 0) { - int size; - - BEGIN_BATCH(10); - OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); - if (rmesa->ind_buf.is_32bit) { - size = vertex_count; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - size = (vertex_count + 1) >> 1; - OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - ((vertex_count + 0) << 16) | type); - } + r300_emit_scissor(rmesa->radeon.glCtx); - if (!rmesa->radeon.radeonScreen->kernel_mm) { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, - rmesa->radeon.tcl.elt_dma_bo, - rmesa->radeon.tcl.elt_dma_offset, - RADEON_GEM_DOMAIN_GTT, 0, 0); - OUT_BATCH(size); - } else { - OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); - OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); - OUT_BATCH(size); - radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, - rmesa->radeon.tcl.elt_dma_bo, - RADEON_GEM_DOMAIN_GTT, 0, 0); - } - END_BATCH(); + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + (vertex_count << 16) | type); + } + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->ind_buf.bo_offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); } + END_BATCH(); } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) @@ -365,8 +343,7 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) */ rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); - if (rmesa->ind_buf.ptr) { - r300EmitElts(ctx, num_verts); + if (rmesa->ind_buf.bo) { r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); -- cgit v1.2.3 From cdaf63d0eac3787c2e153c91925ced5237ed7941 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 14 Aug 2009 17:04:08 +0200 Subject: r300: remove broken vertex splitting Revert to previous behaviour of dropping to big render operations. --- src/mesa/drivers/dri/r300/r300_draw.c | 13 ------------- src/mesa/drivers/dri/r300/r300_render.c | 5 +++++ 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 1d6e6db773..e261d94eb0 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -594,25 +594,12 @@ static void r300DrawPrims(GLcontext *ctx, GLuint min_index, GLuint max_index) { - struct split_limits limits; GLboolean retval; - if (ib) - limits.max_verts = 0xffffffff; - else - limits.max_verts = 65535; - - limits.max_indices = 65535; - limits.max_vb_size = 1024*1024; - if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); return; } - if ((ib && ib->count > 65535)) { - vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits); - return; - } /* Make an attempt at drawing */ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 196cb47fef..26953cd9d1 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -337,6 +337,11 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) if (type < 0 || num_verts <= 0) return; + if (num_verts > 65535) { + WARN_ONCE("Can't handle more then 65535 vertices at once\n"); + return; + } + /* Make space for at least 128 dwords. * This is supposed to ensure that we can get all rendering * commands into a single command buffer. -- cgit v1.2.3 From cd703049db2adaeecc6149dfa224cc17d4613142 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 14 Aug 2009 22:48:03 +0200 Subject: r300: unmap buffer objects after usage --- src/mesa/drivers/dri/r300/r300_draw.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index e261d94eb0..37445af1ad 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -79,9 +79,11 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer GLvoid *src_ptr; GLuint *out; int i; + GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; assert(mesa_ind_buf->obj->Pointer != NULL); } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); @@ -125,13 +127,16 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer r300->ind_buf.is_32bit = GL_FALSE; r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } } static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) { r300ContextPtr r300 = R300_CONTEXT(ctx); - GLboolean mapped_named_bo = GL_FALSE; if (!mesa_ind_buf) { r300->ind_buf.bo = NULL; @@ -145,6 +150,7 @@ static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #endif const GLvoid *src_ptr; GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); @@ -164,6 +170,10 @@ static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); r300->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } } else { r300FixupIndexBuffer(ctx, mesa_ind_buf); } -- cgit v1.2.3 From 7fe0dd2e6e927e4ec3e532e08aa0551ebaec4cc1 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Fri, 14 Aug 2009 22:32:57 +0200 Subject: r300: mark VBO buffer objects as persistent --- src/mesa/drivers/dri/r300/r300_draw.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index 37445af1ad..cebb9a10d8 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -503,10 +503,13 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar aos->components = vbuf->attribs[i].dwords; aos->bo = vbuf->attribs[i].bo; + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + r300->vbuf.attribs[i].bo, + RADEON_GEM_DOMAIN_GTT, 0); if (vbuf->attribs[i].is_named_bo) { - radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - aos->bo, - RADEON_GEM_DOMAIN_GTT, 0); + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, + r300->vbuf.attribs[i].bo, + RADEON_GEM_DOMAIN_GTT, 0); } } r300->radeon.tcl.aos_count = vbuf->num_attribs; -- cgit v1.2.3 From 5e4e8effecb1914b31b869e2aa91f2299e57229d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 15 Aug 2009 20:19:09 +1000 Subject: radeon: enable vertex splitting for IBs Based on Maciej's code, just fixed up the alignments for INDX_BUFFER ut2004 runs AS-Convoy --- src/mesa/drivers/dri/r300/r300_draw.c | 2 +- src/mesa/drivers/dri/r300/r300_render.c | 58 ++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 13 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index ab2287a5e2..cb0e62ae49 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -573,7 +573,7 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ rcommonEnsureCmdBufSpace(&r300->radeon, - r300->radeon.hw.max_state_size + (50*sizeof(int)), + r300->radeon.hw.max_state_size + (60*sizeof(int)), __FUNCTION__); r300SetVertexFormat(ctx, arrays, max_index + 1); diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 26953cd9d1..8e6b4967ef 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -64,6 +64,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" +#include "vbo/vbo_split.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" #include "radeon_reg.h" @@ -172,21 +173,24 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset) { BATCH_LOCALS(&rmesa->radeon); int size; - r300_emit_scissor(rmesa->radeon.glCtx); - + /* offset is in indices */ BEGIN_BATCH(10); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); if (rmesa->ind_buf.is_32bit) { + /* convert to bytes */ + offset *= 4; size = vertex_count; OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); } else { + /* convert to bytes */ + offset *= 2; size = (vertex_count + 1) >> 1; OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type); @@ -196,13 +200,13 @@ static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0); OUT_BATCH(size); } else { OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | (R300_VAP_PORT_IDX0 >> 2)); - OUT_BATCH(rmesa->ind_buf.bo_offset); + OUT_BATCH(rmesa->ind_buf.bo_offset + offset); OUT_BATCH(size); radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0); @@ -318,7 +322,7 @@ static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { BATCH_LOCALS(&rmesa->radeon); - r300_emit_scissor(rmesa->radeon.glCtx); + r300_emit_scissor(rmesa->radeon.glCtx); BEGIN_BATCH(3); OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); @@ -337,11 +341,6 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) if (type < 0 || num_verts <= 0) return; - if (num_verts > 65535) { - WARN_ONCE("Can't handle more then 65535 vertices at once\n"); - return; - } - /* Make space for at least 128 dwords. * This is supposed to ensure that we can get all rendering * commands into a single command buffer. @@ -349,6 +348,15 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); if (rmesa->ind_buf.bo) { + GLuint first, incr, offset = 0; + + if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) && + num_verts > 65500) { + WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim); + return; + } + + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); if (rmesa->radeon.radeonScreen->kernel_mm) { BEGIN_BATCH_NO_AUTOSTATE(2); @@ -356,8 +364,34 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) OUT_BATCH(rmesa->radeon.tcl.aos[0].count); END_BATCH(); } - r300FireEB(rmesa, num_verts, type); + + r300_emit_scissor(rmesa->radeon.glCtx); + while (num_verts > 0) { + int nr; + int align; + + nr = MIN2(num_verts, 65535); + nr -= (nr - first) % incr; + + /* get alignment for IB correct */ + if (nr != num_verts) { + do { + align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2); + if (align % 4) + nr -= incr; + } while(align % 4); + } + r300FireEB(rmesa, nr, type, offset); + + num_verts -= nr; + offset += nr; + } + } else { + if (num_verts > 65535) { + WARN_ONCE("Fixme: can't handle more then 65535 vertices"); + return; + } r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); r300FireAOS(rmesa, num_verts, type); } -- cgit v1.2.3 From a6cc45e135fbcf2360950c59ddef94e1f5574f2a Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 15 Aug 2009 21:18:30 +1000 Subject: r300: fixup space checks since VBO code Hopefully this gets the ordering correct so the space checks don't fail. --- src/mesa/drivers/dri/r300/r300_draw.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index cb0e62ae49..d6ebdcbfe9 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -466,7 +466,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar { r300ContextPtr r300 = R300_CONTEXT(ctx); struct r300_vertex_buffer *vbuf = &r300->vbuf; - + int ret; { int i, tmp; @@ -503,22 +503,15 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar aos->components = vbuf->attribs[i].dwords; aos->bo = vbuf->attribs[i].bo; - radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - r300->vbuf.attribs[i].bo, - RADEON_GEM_DOMAIN_GTT, 0); if (vbuf->attribs[i].is_named_bo) { - radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, - r300->vbuf.attribs[i].bo, - RADEON_GEM_DOMAIN_GTT, 0); + radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0); } } + r300->radeon.tcl.aos_count = vbuf->num_attribs; - - if (r300->ind_buf.bo) { - radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, - r300->ind_buf.bo, - RADEON_GEM_DOMAIN_GTT, 0); - } + ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, r300->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + if (ret) + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, GL_TRUE); } } @@ -568,13 +561,13 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); - r300SetupIndexBuffer(ctx, ib); - /* ensure we have the cmd buf space in advance to cover * the state + DMA AOS pointers */ rcommonEnsureCmdBufSpace(&r300->radeon, r300->radeon.hw.max_state_size + (60*sizeof(int)), - __FUNCTION__); + __FUNCTION__); + + r300SetupIndexBuffer(ctx, ib); r300SetVertexFormat(ctx, arrays, max_index + 1); -- cgit v1.2.3 From 0d0f01e2e0b37ed5152614ceeff34da8e46b5e37 Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Thu, 11 Jun 2009 16:13:23 +0200 Subject: r300: add occlusion queries support TODO: - use proper interface for checking if bo is idle when it's available - disable ZTOP only when needed - make it work under KMS --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r300_context.c | 8 ++ src/mesa/drivers/dri/r300/r300_context.h | 17 +++ src/mesa/drivers/dri/r300/r300_draw.c | 7 +- src/mesa/drivers/dri/r300/r300_queryobj.c | 229 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_queryobj.h | 34 +++++ src/mesa/drivers/dri/r300/r300_reg.h | 12 ++ src/mesa/drivers/dri/r300/r300_render.c | 1 + 8 files changed, 308 insertions(+), 1 deletion(-) create mode 100644 src/mesa/drivers/dri/r300/r300_queryobj.c create mode 100644 src/mesa/drivers/dri/r300/r300_queryobj.h (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 2390d1896a..77b3d168f3 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -54,6 +54,7 @@ DRIVER_SOURCES = \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ + r300_queryobj.c \ $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) \ $(CS_SOURCES) diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 1baae8fc76..d37a37ca46 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -74,6 +74,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlpool.h" /* for symbolic values of enum-type options */ #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate @@ -310,6 +311,11 @@ static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; } + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) + r300->num_z_pipes = 2; + else + r300->num_z_pipes = r300->radeon.radeonScreen->num_gb_pipes; } static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) @@ -439,6 +445,8 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, r300InitGLExtensions(ctx); + make_empty_list(&r300->query.not_flushed_head); + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index d620417422..3ba3426608 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -505,6 +505,16 @@ struct r300_index_buffer { GLuint count; }; +struct r300_query_object { + struct gl_query_object Base; + struct radeon_bo *bo; + int curr_offset; + GLboolean emitted_begin; + + /* Double linked list of not flushed query objects */ + struct r300_query_object *prev, *next; +}; + /** * \brief R300 context structure. */ @@ -539,6 +549,13 @@ struct r300_context { uint32_t fallback; DECLARE_RENDERINPUTS(render_inputs_bitset); + + struct { + struct r300_query_object *current; + struct r300_query_object not_flushed_head; + } query; + + int num_z_pipes; }; #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index d6ebdcbfe9..fb416a05c0 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -36,6 +36,7 @@ #include "r300_context.h" #include "r300_emit.h" #include "r300_render.h" +#include "r300_queryobj.h" #include "r300_state.h" #include "r300_tex.h" @@ -507,7 +508,7 @@ static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *ar radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[i].bo, RADEON_GEM_DOMAIN_GTT, 0); } } - + r300->radeon.tcl.aos_count = vbuf->num_attribs; ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, r300->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); if (ret) @@ -581,12 +582,16 @@ static GLboolean r300TryDrawPrims(GLcontext *ctx, r300EmitCacheFlush(r300); radeonEmitState(&r300->radeon); + r300EmitQueryBegin(ctx); + for (i = 0; i < nr_prims; ++i) { r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); } r300EmitCacheFlush(r300); + r300EmitQueryEnd(ctx); + r300FreeData(ctx); return GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.c b/src/mesa/drivers/dri/r300/r300_queryobj.c new file mode 100644 index 0000000000..830a9ed737 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_queryobj.c @@ -0,0 +1,229 @@ +/* + * Copyright © 2008-2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora + * + */ + +#include "r300_queryobj.h" +#include "r300_emit.h" + +#include "main/imports.h" +#include "main/simple_list.h" + +#define PAGE_SIZE 4096 + +static void r300QueryGetResult(GLcontext *ctx, struct gl_query_object *q) +{ + struct r300_query_object *query = (struct r300_query_object *)q; + uint32_t *result; + int i; + + radeon_bo_map(query->bo, GL_FALSE); + + result = query->bo->ptr; + + query->Base.Result = 0; + for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) { + query->Base.Result += result[i]; + } + + radeon_bo_unmap(query->bo); +} + +static struct gl_query_object * r300NewQueryObject(GLcontext *ctx, GLuint id) +{ + struct r300_query_object *query; + + query = _mesa_calloc(sizeof(struct r300_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + return &query->Base; +} + +static void r300DeleteQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct r300_query_object *query = (struct r300_query_object *)q; + + if (query->bo) { + radeon_bo_unref(query->bo); + } + + _mesa_free(query); +} + +static void r300BeginQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = (struct r300_query_object *)q; + + assert(r300->query.current == NULL); + + if (!query->bo) { + query->bo = radeon_bo_open(r300->radeon.radeonScreen->bom, 0, PAGE_SIZE, PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0); + } + query->curr_offset = 0; + + r300->query.current = query; + insert_at_tail(&r300->query.not_flushed_head, query); +} + +static void r300EndQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + r300EmitQueryEnd(ctx); + + r300->query.current = NULL; +} + +static void r300WaitQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *tmp, *query = (struct r300_query_object *)q; + + /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */ + { + GLboolean found = GL_FALSE; + foreach(tmp, &r300->query.not_flushed_head) { + if (tmp == query) { + found = GL_TRUE; + break; + } + } + + if (found) + ctx->Driver.Flush(ctx); + } + + r300QueryGetResult(ctx, q); + + query->Base.Ready = GL_TRUE; +} + + +/** + * TODO: + * should check if bo is idle, bo there's no interface to do it + * just wait for result now + */ +static void r300CheckQuery(GLcontext *ctx, struct gl_query_object *q) +{ + r300WaitQuery(ctx, q); +} + +void r300EmitQueryBegin(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = r300->query.current; + BATCH_LOCALS(&r300->radeon); + + if (!query || query->emitted_begin) + return; + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + OUT_BATCH_REGVAL(R300_ZB_ZPASS_DATA, 0); + END_BATCH(); + } + + query->emitted_begin = GL_TRUE; +} + +void r300EmitQueryEnd(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_query_object *query = r300->query.current; + BATCH_LOCALS(&r300->radeon); + + if (!query || !query->emitted_begin) + return; + + radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) { + BEGIN_BATCH_NO_AUTOSTATE(14); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->num_z_pipes + 2); + switch (r300->num_z_pipes) { + case 4: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 3: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 2: + if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3); + } else { + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1); + } + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0); + case 1: + default: + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0); + OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1); + OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0); + break; + } + OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); + END_BATCH(); + } + + query->curr_offset += r300->num_z_pipes * sizeof(uint32_t); + assert(query->curr_offset < PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + +void r300InitQueryObjFunctions(struct dd_function_table *functions) +{ + functions->NewQueryObject = r300NewQueryObject; + functions->DeleteQuery = r300DeleteQuery; + functions->BeginQuery = r300BeginQuery; + functions->EndQuery = r300EndQuery; + functions->CheckQuery = r300CheckQuery; + functions->WaitQuery = r300WaitQuery; +} diff --git a/src/mesa/drivers/dri/r300/r300_queryobj.h b/src/mesa/drivers/dri/r300/r300_queryobj.h new file mode 100644 index 0000000000..f301f0b113 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_queryobj.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2008 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Maciej Cencora + * + */ + +#include "main/imports.h" +#include "r300_context.h" + +extern void r300EmitQueryBegin(GLcontext *ctx); +extern void r300EmitQueryEnd(GLcontext *ctx); + +extern void r300InitQueryObjFunctions(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index dd32e6c730..39b4b61a10 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1128,6 +1128,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* SU Depth Offset value */ #define R300_SU_DEPTH_OFFSET 0x42c4 +#define R300_SU_REG_DEST 0x42c8 +# define R300_RASTER_PIPE_SELECT_0 (1 << 0) +# define R300_RASTER_PIPE_SELECT_1 (1 << 1) +# define R300_RASTER_PIPE_SELECT_2 (1 << 2) +# define R300_RASTER_PIPE_SELECT_3 (1 << 3) +# define R300_RASTER_PIPE_SELECT_ALL 0xf + /* BEGIN: Rasterization / Interpolators - many guesses */ @@ -2014,6 +2021,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R500_FG_ALPHA_VALUE 0x4be0 # define R500_FG_ALPHA_VALUE_MASK 0x0000ffff +#define RV530_FG_ZBREG_DEST 0x4be8 +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1) +# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0) + /* gap */ /* Fragment program parameters in 7.16 floating point */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 4bf09c2e89..369c3edcd0 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -76,6 +76,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_tex.h" #include "r300_emit.h" #include "r300_fragprog_common.h" +#include "r300_queryobj.h" #include "r300_swtcl.h" /** -- cgit v1.2.3 From c80bc3abcd3939e5e2d45aea4b01ff22bfec244b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 18 Aug 2009 13:55:12 +1000 Subject: r300: fix big endian build --- src/mesa/drivers/dri/r300/r300_draw.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/r300/r300_draw.c') diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c index fb416a05c0..d524d60299 100644 --- a/src/mesa/drivers/dri/r300/r300_draw.c +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -108,6 +108,7 @@ static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer #if MESA_BIG_ENDIAN } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLuint size; GLushort *in = (GLushort *)src_ptr; size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); -- cgit v1.2.3