From e34dc8227c1fa8bc9ffcd311de701053a633a7ec Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 01:46:19 +0200 Subject: r300_fragprog: Refactor TEX transformation Streamlining source and destination registers, as well as texcoord scaling for RECT textures is now done in a radeon_program based transformation. The idea is that this will allow us to optimize away unnecessary indirections more easily. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 131 +++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_fragprog.h | 1 + src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 103 ++----------------- src/mesa/drivers/dri/r300/radeon_program.c | 98 ++++++++++++++++++ src/mesa/drivers/dri/r300/radeon_program.h | 53 ++++++++++ 5 files changed, 293 insertions(+), 93 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 94cb11afec..4c6289298e 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -50,6 +50,130 @@ #include "r300_state.h" +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +static GLboolean transform_TEX( + struct radeon_program_transform_context* context, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = + _mesa_add_state_reference( + compiler->fp->mesa_program.Base.Parameters, tokens); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || + inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + } + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (destredirect) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + } + + return GL_TRUE; +} + + static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; @@ -170,6 +294,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); + struct radeon_program_transformation transformations[1] = { + { &transform_TEX, &compiler } + }; + radeonClauseLocalTransform(&compiler.compiler, + &compiler.compiler.Clauses[0], + 1, transformations); + if (!r300FragmentProgramEmit(&compiler)) fp->error = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 8c836c4bda..7c1e210b04 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -149,6 +149,7 @@ struct r300_fragment_program_compiler { struct radeon_compiler compiler; }; +extern void r300FPTransformTextures(struct r300_fragment_program_compiler *compiler); extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index fe8a347a62..aec202a129 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -527,32 +527,6 @@ static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) return r; } -/** - * Create a new Mesa temporary register that will act as the destination - * register for a texture read. - */ -static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(cs); - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - /** * Free a Mesa temporary and the associated R300 temporary. */ @@ -847,6 +821,15 @@ static GLuint t_src(struct r300_pfs_compile_state *cs, fp->mesa_program.Base.Parameters-> ParameterValues[fpsrc.Index]); break; + case PROGRAM_BUILTIN: + switch(fpsrc.Swizzle) { + case SWIZZLE_1111: r = pfs_one; break; + case SWIZZLE_0000: r = pfs_zero; break; + default: + ERROR("bad PROGRAM_BUILTIN swizzle %u\n", fpsrc.Swizzle); + break; + } + break; default: ERROR("unknown SrcReg->File %x\n", fpsrc.File); return r; @@ -1003,56 +986,10 @@ static void emit_tex(struct r300_pfs_compile_state *cs, { COMPILE_STATE; GLuint coord = t_src(cs, fpi->SrcReg[0]); - GLuint dest = undef, rdest = undef; + GLuint dest = undef; GLuint din, uin; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; - GLuint tempreg = 0; - - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(cs, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(cs)); - - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - coord = tempreg; - } - - /* Texture operations do not support swizzles etc. in hardware, - * so emit an additional arithmetic operation if necessary. - */ - if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ || - REG_GET_SSWZ(coord) != SWIZZLE_W || - coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { - assert(tempreg == 0); - tempreg = keep(get_temp_reg(cs)); - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, pfs_one, pfs_zero, 0); - coord = tempreg; - } /* Ensure correct node indirection */ uin = cs->used_in_node; @@ -1064,15 +1001,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs, if (opcode != R300_TEX_OP_KIL) { dest = t_dst(cs, fpi->DstReg); - /* r300 doesn't seem to be able to do TEX->output reg */ - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - rdest = dest; - dest = get_temp_reg_tex(cs); - } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { - /* in case write mask isn't XYZW */ - rdest = dest; - dest = get_temp_reg_tex(cs); - } hwdest = t_hw_dst(cs, dest, GL_TRUE, code->node[code->cur_node].alu_offset); @@ -1132,17 +1060,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs, cs->used_in_node |= (1 << hwsrc); code->node[code->cur_node].tex_end++; - - /* Copy from temp to output if needed */ - if (REG_GET_VALID(rdest)) { - emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, - pfs_one, pfs_zero, 0); - free_temp(cs, dest); - } - - /* Free temp register */ - if (tempreg != 0) - free_temp(cs, tempreg); } /** diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c index 7b03fa6523..41cedbe61d 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ b/src/mesa/drivers/dri/r300/radeon_program.c @@ -149,3 +149,101 @@ void radeonCompilerEraseClauses( _mesa_free(oldClauses); } + + +/** + * Insert new instructions at the given position, initialize them as NOPs + * and return a pointer to the first new instruction. + */ +struct prog_instruction* radeonClauseInsertInstructions( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int position, int count) +{ + int newNumInstructions = clause->NumInstructions + count; + + assert(position >= 0 && position <= clause->NumInstructions); + + if (newNumInstructions <= clause->ReservedInstructions) { + memmove(clause->Instructions + position + count, clause->Instructions + position, + (clause->NumInstructions - position) * sizeof(struct prog_instruction)); + } else { + struct prog_instruction *oldInstructions = clause->Instructions; + + clause->ReservedInstructions *= 2; + if (newNumInstructions > clause->ReservedInstructions) + clause->ReservedInstructions = newNumInstructions; + + clause->Instructions = (struct prog_instruction*) + _mesa_malloc(clause->ReservedInstructions * sizeof(struct prog_instruction)); + + if (oldInstructions) { + _mesa_memcpy(clause->Instructions, oldInstructions, + position * sizeof(struct prog_instruction)); + _mesa_memcpy(clause->Instructions + position + count, oldInstructions + position, + (clause->NumInstructions - position) * sizeof(struct prog_instruction)); + + _mesa_free(oldInstructions); + } + } + + clause->NumInstructions = newNumInstructions; + _mesa_init_instructions(clause->Instructions + position, count); + return clause->Instructions + position; +} + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonClauseLocalTransform( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_program_transform_context context; + struct radeon_clause source; + int ip; + + source = *clause; + clause->Instructions = 0; + clause->NumInstructions = 0; + clause->ReservedInstructions = 0; + + context.compiler = compiler; + context.dest = clause; + context.src = &source; + + for(ip = 0; ip < source.NumInstructions; ++ip) { + struct prog_instruction *instr = source.Instructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&context, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction *tgt = + radeonClauseInsertInstructions(compiler, clause, clause->NumInstructions, 1); + _mesa_copy_instructions(tgt, instr, 1); + } + } + + _mesa_free_instructions(source.Instructions, source.NumInstructions); +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index 18091ac02a..3cde4d4f6f 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -41,6 +41,13 @@ enum { CLAUSE_TEX }; +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + /** * A clause is simply a sequence of instructions that are executed * in order. @@ -107,4 +114,50 @@ void radeonCompilerEraseClauses( int start, int end); +struct prog_instruction* radeonClauseInsertInstructions( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int position, int count); + +/** + * + */ +struct radeon_program_transform_context { + struct radeon_compiler *compiler; + + /** + * Destination clause where new instructions must be written. + */ + struct radeon_clause *dest; + + /** + * Original clause that is currently being transformed. + */ + struct radeon_clause *src; +}; + +/** + * A transformation that can be passed to \ref radeonClauseLinearTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_program_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonClauseLocalTransform( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int num_transformations, + struct radeon_program_transformation* transformations); + #endif -- cgit v1.2.3