diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/r300_vertprog.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_vertprog.c | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c new file mode 100644 index 0000000000..a1fe378029 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -0,0 +1,414 @@ +/************************************************************************** + +Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org> +Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com> + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* Radeon R5xx Acceleration, Revision 1.2 */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" +#include "shader/programopt.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" +#include "tnl/tnl.h" + +#include "compiler/radeon_compiler.h" +#include "radeon_mesa_to_rc.h" +#include "r300_context.h" +#include "r300_fragprog_common.h" +#include "r300_state.h" + +/** + * Write parameter array for the given vertex program into dst. + * Return the total number of components written. + */ +static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst) +{ + int i; + + if (vp->Base->IsNVProgram) { + _mesa_load_tracked_matrices(ctx); + } else { + if (vp->Base->Base.Parameters) { + _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters); + } + } + + for(i = 0; i < vp->code.constants.Count; ++i) { + const float * src = 0; + const struct rc_constant * constant = &vp->code.constants.Constants[i]; + + switch(constant->Type) { + case RC_CONSTANT_EXTERNAL: + if (vp->Base->IsNVProgram) { + src = ctx->VertexProgram.Parameters[constant->u.External]; + } else { + src = vp->Base->Base.Parameters->ParameterValues[constant->u.External]; + } + break; + + case RC_CONSTANT_IMMEDIATE: + src = constant->u.Immediate; + break; + } + + assert(src); + dst[4*i] = src[0]; + dst[4*i + 1] = src[1]; + dst[4*i + 2] = src[2]; + dst[4*i + 3] = src[3]; + } + + return 4 * vp->code.constants.Count; +} + +static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads) +{ + GLbitfield outputs = 0; + int i; + +#define ADD_OUTPUT(fp_attr, vp_result) \ + do { \ + if (fpreads & (1 << (fp_attr))) \ + outputs |= (1 << (vp_result)); \ + } while (0) + + ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0); + ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1); + + for (i = 0; i <= 7; ++i) { + ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i); + } + +#undef ADD_OUTPUT + + if ((fpreads & (1 << FRAG_ATTRIB_COL0)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0))) + outputs |= 1 << VERT_RESULT_BFC0; + if ((fpreads & (1 << FRAG_ATTRIB_COL1)) && + (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1))) + outputs |= 1 << VERT_RESULT_BFC1; + + outputs |= 1 << VERT_RESULT_HPOS; + if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + outputs |= 1 << VERT_RESULT_PSIZ; + + return outputs; +} + + +static void t_inputs_outputs(struct r300_vertex_program_compiler * c) +{ + int i; + int cur_reg; + GLuint OutputsWritten, InputsRead; + + OutputsWritten = c->Base.Program.OutputsWritten; + InputsRead = c->Base.Program.InputsRead; + + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (InputsRead & (1 << i)) + c->code->inputs[i] = ++cur_reg; + else + c->code->inputs[i] = -1; + } + + cur_reg = 0; + for (i = 0; i < VERT_RESULT_MAX; i++) + c->code->outputs[i] = -1; + + assert(OutputsWritten & (1 << VERT_RESULT_HPOS)); + + if (OutputsWritten & (1 << VERT_RESULT_HPOS)) { + c->code->outputs[VERT_RESULT_HPOS] = cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } + + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + if (OutputsWritten & (1 << VERT_RESULT_COL0)) { + c->code->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_COL1)) { + c->code->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) || + OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + c->code->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; + } + + if (OutputsWritten & (1 << VERT_RESULT_BFC1)) { + c->code->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; + } + + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (OutputsWritten & (1 << i)) { + c->code->outputs[i] = cur_reg++; + } + } + + if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { + c->code->outputs[VERT_RESULT_FOGC] = cur_reg++; + } +} + +/** + * The NV_vertex_program spec mandates that all registers be + * initialized to zero. We do this here unconditionally. + * + * \note We rely on dead-code elimination in the compiler. + */ +static void initialize_NV_registers(struct radeon_compiler * compiler) +{ + unsigned int reg; + struct rc_instruction * inst; + + for(reg = 0; reg < 12; ++reg) { + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = reg; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + } + + inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions); + inst->U.I.Opcode = RC_OPCODE_ARL; + inst->U.I.DstReg.File = RC_FILE_ADDRESS; + inst->U.I.DstReg.Index = 0; + inst->U.I.DstReg.WriteMask = WRITEMASK_X; + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; +} + +static struct r300_vertex_program *build_program(GLcontext *ctx, + struct r300_vertex_program_key *wanted_key, + const struct gl_vertex_program *mesa_vp) +{ + struct r300_vertex_program *vp; + struct r300_vertex_program_compiler compiler; + + vp = calloc(1, sizeof(*vp)); + vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp); + memcpy(&vp->key, wanted_key, sizeof(vp->key)); + + rc_init(&compiler.Base); + compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE; + + compiler.code = &vp->code; + compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); + compiler.SetHwInputOutput = &t_inputs_outputs; + + if (compiler.Base.Debug) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vp->Base->Base); + fflush(stderr); + } + + if (mesa_vp->IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, vp->Base); + } + + radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base); + + if (mesa_vp->IsNVProgram) + initialize_NV_registers(&compiler.Base); + + rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X); + + if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) { + unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_wpos_attr + * before moving the wpos attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0); + rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr); + } + + if (vp->key.FogAttr != FRAG_ATTRIB_MAX) { + unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0; + + /* Set empty writemask for instructions writing to vp_fog_attr + * before moving the fog attr there. + * Such instructions will be removed by DCE. + */ + rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0); + rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X); + } + + r3xx_compile_vertex_program(&compiler); + + if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) { + rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n"); + } + + vp->error = compiler.Base.Error; + + vp->Base->Base.InputsRead = vp->code.InputsRead; + vp->Base->Base.OutputsWritten = vp->code.OutputsWritten; + + rc_destroy(&compiler.Base); + + return vp; +} + +struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_program_key wanted_key = { 0 }; + struct r300_vertex_program_cont *vpc; + struct r300_vertex_program *vp; + + vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + + if (!r300->selected_fp) { + /* This can happen when GetProgramiv is called to check + * whether the program runs natively. + * + * To be honest, this is not a very good solution, + * but solving the problem of reporting good values + * for those queries is tough anyway considering that + * we recompile vertex programs based on the precise + * fragment program that is in use. + */ + r300SelectAndTranslateFragmentShader(ctx); + } + + assert(r300->selected_fp); + wanted_key.FpReads = r300->selected_fp->InputsRead; + wanted_key.FogAttr = r300->selected_fp->fog_attr; + wanted_key.WPosAttr = r300->selected_fp->wpos_attr; + + for (vp = vpc->progs; vp; vp = vp->next) { + if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { + return r300->selected_vp = vp; + } + } + + vp = build_program(ctx, &wanted_key, &vpc->mesa_program); + vp->next = vpc->progs; + vpc->progs = vp; + + return r300->selected_vp = vp; +} + +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) + +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code) +{ + int i; + + assert((code->length > 0) && (code->length % 4 == 0)); + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + exit(-1); + } +} + +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + R300_STATECHANGE(rmesa, vap_cntl); + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) { + WARN_ONCE("Too many VP params, expect rendering errors\n"); + } + /* Prevent the overflow (vpu.count is u8) */ + bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count)); + param_count /= 4; + + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code)); + inst_count = (prog->code.length / 4) - 1; + + r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead), + _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} |