summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2008-06-14 01:46:19 +0200
committerNicolai Haehnle <nhaehnle@gmail.com>2008-06-14 04:14:56 +0200
commite34dc8227c1fa8bc9ffcd311de701053a633a7ec (patch)
treec4337d9c41714bd771199cf3d061967e7dc28ca4
parentb5170bc9d32530ec93dae4b543d3552e83d6b4a1 (diff)
r300_fragprog: Refactor TEX transformation
Streamlining source and destination registers, as well as texcoord scaling for RECT textures is now done in a radeon_program based transformation. The idea is that this will allow us to optimize away unnecessary indirections more easily.
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.c131
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog.h1
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_emit.c103
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.c98
-rw-r--r--src/mesa/drivers/dri/r300/radeon_program.h53
5 files changed, 293 insertions, 93 deletions
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index 94cb11afec..4c6289298e 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -50,6 +50,130 @@
#include "r300_state.h"
+static void reset_srcreg(struct prog_src_register* reg)
+{
+ _mesa_bzero(reg, sizeof(*reg));
+ reg->Swizzle = SWIZZLE_NOOP;
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following way:
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ *
+ * \todo If/when r5xx uses the radeon_program architecture, this can probably
+ * be reused.
+ */
+static GLboolean transform_TEX(
+ struct radeon_program_transform_context* context,
+ struct prog_instruction* orig_inst, void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+ struct prog_instruction inst = *orig_inst;
+ struct prog_instruction* tgt;
+ GLboolean destredirect = GL_FALSE;
+
+ if (inst.Opcode != OPCODE_TEX &&
+ inst.Opcode != OPCODE_TXB &&
+ inst.Opcode != OPCODE_TXP &&
+ inst.Opcode != OPCODE_KIL)
+ return GL_FALSE;
+
+ /* Hardware uses [0..1]x[0..1] range for rectangle textures
+ * instead of [0..Width]x[0..Height].
+ * Add a scaling instruction.
+ */
+ if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ gl_state_index tokens[STATE_LENGTH] = {
+ STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+ 0
+ };
+
+ int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+ int factor_index;
+
+ tokens[2] = inst.TexSrcUnit;
+ factor_index =
+ _mesa_add_state_reference(
+ compiler->fp->mesa_program.Base.Parameters, tokens);
+
+ tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+ context->dest->NumInstructions, 1);
+
+ tgt->Opcode = OPCODE_MAD;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tempreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+ tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
+ tgt->SrcReg[1].Index = factor_index;
+ tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tempreg;
+ }
+
+ /* Texture operations do not support swizzles etc. in hardware,
+ * so emit an additional arithmetic operation if necessary.
+ */
+ if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
+ inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
+ int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+
+ tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+ context->dest->NumInstructions, 1);
+
+ tgt->Opcode = OPCODE_MAD;
+ tgt->DstReg.File = PROGRAM_TEMPORARY;
+ tgt->DstReg.Index = tempreg;
+ tgt->SrcReg[0] = inst.SrcReg[0];
+ tgt->SrcReg[1].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
+ tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+
+ reset_srcreg(&inst.SrcReg[0]);
+ inst.SrcReg[0].File = PROGRAM_TEMPORARY;
+ inst.SrcReg[0].Index = tempreg;
+ }
+
+ if (inst.Opcode != OPCODE_KIL) {
+ if (inst.DstReg.File != PROGRAM_TEMPORARY ||
+ inst.DstReg.WriteMask != WRITEMASK_XYZW) {
+ int tempreg = radeonCompilerAllocateTemporary(context->compiler);
+
+ inst.DstReg.File = PROGRAM_TEMPORARY;
+ inst.DstReg.Index = tempreg;
+ inst.DstReg.WriteMask = WRITEMASK_XYZW;
+ destredirect = GL_TRUE;
+ }
+ }
+
+ tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+ context->dest->NumInstructions, 1);
+ _mesa_copy_instructions(tgt, &inst, 1);
+
+ if (destredirect) {
+ tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
+ context->dest->NumInstructions, 1);
+
+ tgt->Opcode = OPCODE_MAD;
+ tgt->DstReg = orig_inst->DstReg;
+ tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
+ tgt->SrcReg[0].Index = inst.DstReg.Index;
+ tgt->SrcReg[1].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
+ tgt->SrcReg[2].File = PROGRAM_BUILTIN;
+ tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
+ }
+
+ return GL_TRUE;
+}
+
+
static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
{
struct gl_fragment_program *mp = &fp->mesa_program;
@@ -170,6 +294,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
insert_WPOS_trailer(&compiler);
+ struct radeon_program_transformation transformations[1] = {
+ { &transform_TEX, &compiler }
+ };
+ radeonClauseLocalTransform(&compiler.compiler,
+ &compiler.compiler.Clauses[0],
+ 1, transformations);
+
if (!r300FragmentProgramEmit(&compiler))
fp->error = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h
index 8c836c4bda..7c1e210b04 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.h
@@ -149,6 +149,7 @@ struct r300_fragment_program_compiler {
struct radeon_compiler compiler;
};
+extern void r300FPTransformTextures(struct r300_fragment_program_compiler *compiler);
extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
index fe8a347a62..aec202a129 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c
@@ -528,32 +528,6 @@ static GLuint get_temp_reg(struct r300_pfs_compile_state *cs)
}
/**
- * Create a new Mesa temporary register that will act as the destination
- * register for a texture read.
- */
-static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs)
-{
- COMPILE_STATE;
- GLuint r = undef;
- GLuint index;
-
- index = ffs(~cs->temp_in_use);
- if (!index) {
- ERROR("Out of program temps\n");
- return r;
- }
-
- cs->temp_in_use |= (1 << --index);
- cs->temps[index].refcount = 0xFFFFFFFF;
- cs->temps[index].reg = get_hw_temp_tex(cs);
-
- REG_SET_TYPE(r, REG_TYPE_TEMP);
- REG_SET_INDEX(r, index);
- REG_SET_VALID(r, GL_TRUE);
- return r;
-}
-
-/**
* Free a Mesa temporary and the associated R300 temporary.
*/
static void free_temp(struct r300_pfs_compile_state *cs, GLuint r)
@@ -847,6 +821,15 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,
fp->mesa_program.Base.Parameters->
ParameterValues[fpsrc.Index]);
break;
+ case PROGRAM_BUILTIN:
+ switch(fpsrc.Swizzle) {
+ case SWIZZLE_1111: r = pfs_one; break;
+ case SWIZZLE_0000: r = pfs_zero; break;
+ default:
+ ERROR("bad PROGRAM_BUILTIN swizzle %u\n", fpsrc.Swizzle);
+ break;
+ }
+ break;
default:
ERROR("unknown SrcReg->File %x\n", fpsrc.File);
return r;
@@ -1003,56 +986,10 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
{
COMPILE_STATE;
GLuint coord = t_src(cs, fpi->SrcReg[0]);
- GLuint dest = undef, rdest = undef;
+ GLuint dest = undef;
GLuint din, uin;
int unit = fpi->TexSrcUnit;
int hwsrc, hwdest;
- GLuint tempreg = 0;
-
- /**
- * Hardware uses [0..1]x[0..1] range for rectangle textures
- * instead of [0..Width]x[0..Height].
- * Add a scaling instruction.
- *
- * \todo Refactor this once we have proper rewriting/optimization
- * support for programs.
- */
- if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
- gl_state_index tokens[STATE_LENGTH] = {
- STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
- 0
- };
- int factor_index;
- GLuint factorreg;
-
- tokens[2] = unit;
- factor_index =
- _mesa_add_state_reference(fp->mesa_program.Base.
- Parameters, tokens);
- factorreg =
- emit_const4fv(cs,
- fp->mesa_program.Base.Parameters->
- ParameterValues[factor_index]);
- tempreg = keep(get_temp_reg(cs));
-
- emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
- coord, factorreg, pfs_zero, 0);
-
- coord = tempreg;
- }
-
- /* Texture operations do not support swizzles etc. in hardware,
- * so emit an additional arithmetic operation if necessary.
- */
- if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ ||
- REG_GET_SSWZ(coord) != SWIZZLE_W ||
- coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) {
- assert(tempreg == 0);
- tempreg = keep(get_temp_reg(cs));
- emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
- coord, pfs_one, pfs_zero, 0);
- coord = tempreg;
- }
/* Ensure correct node indirection */
uin = cs->used_in_node;
@@ -1064,15 +1001,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
if (opcode != R300_TEX_OP_KIL) {
dest = t_dst(cs, fpi->DstReg);
- /* r300 doesn't seem to be able to do TEX->output reg */
- if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
- rdest = dest;
- dest = get_temp_reg_tex(cs);
- } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) {
- /* in case write mask isn't XYZW */
- rdest = dest;
- dest = get_temp_reg_tex(cs);
- }
hwdest =
t_hw_dst(cs, dest, GL_TRUE,
code->node[code->cur_node].alu_offset);
@@ -1132,17 +1060,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
cs->used_in_node |= (1 << hwsrc);
code->node[code->cur_node].tex_end++;
-
- /* Copy from temp to output if needed */
- if (REG_GET_VALID(rdest)) {
- emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest,
- pfs_one, pfs_zero, 0);
- free_temp(cs, dest);
- }
-
- /* Free temp register */
- if (tempreg != 0)
- free_temp(cs, tempreg);
}
/**
diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c
index 7b03fa6523..41cedbe61d 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/radeon_program.c
@@ -149,3 +149,101 @@ void radeonCompilerEraseClauses(
_mesa_free(oldClauses);
}
+
+
+/**
+ * Insert new instructions at the given position, initialize them as NOPs
+ * and return a pointer to the first new instruction.
+ */
+struct prog_instruction* radeonClauseInsertInstructions(
+ struct radeon_compiler *compiler,
+ struct radeon_clause *clause,
+ int position, int count)
+{
+ int newNumInstructions = clause->NumInstructions + count;
+
+ assert(position >= 0 && position <= clause->NumInstructions);
+
+ if (newNumInstructions <= clause->ReservedInstructions) {
+ memmove(clause->Instructions + position + count, clause->Instructions + position,
+ (clause->NumInstructions - position) * sizeof(struct prog_instruction));
+ } else {
+ struct prog_instruction *oldInstructions = clause->Instructions;
+
+ clause->ReservedInstructions *= 2;
+ if (newNumInstructions > clause->ReservedInstructions)
+ clause->ReservedInstructions = newNumInstructions;
+
+ clause->Instructions = (struct prog_instruction*)
+ _mesa_malloc(clause->ReservedInstructions * sizeof(struct prog_instruction));
+
+ if (oldInstructions) {
+ _mesa_memcpy(clause->Instructions, oldInstructions,
+ position * sizeof(struct prog_instruction));
+ _mesa_memcpy(clause->Instructions + position + count, oldInstructions + position,
+ (clause->NumInstructions - position) * sizeof(struct prog_instruction));
+
+ _mesa_free(oldInstructions);
+ }
+ }
+
+ clause->NumInstructions = newNumInstructions;
+ _mesa_init_instructions(clause->Instructions + position, count);
+ return clause->Instructions + position;
+}
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonClauseLocalTransform(
+ struct radeon_compiler *compiler,
+ struct radeon_clause *clause,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct radeon_program_transform_context context;
+ struct radeon_clause source;
+ int ip;
+
+ source = *clause;
+ clause->Instructions = 0;
+ clause->NumInstructions = 0;
+ clause->ReservedInstructions = 0;
+
+ context.compiler = compiler;
+ context.dest = clause;
+ context.src = &source;
+
+ for(ip = 0; ip < source.NumInstructions; ++ip) {
+ struct prog_instruction *instr = source.Instructions + ip;
+ int i;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(&context, instr, t->userData))
+ break;
+ }
+
+ if (i >= num_transformations) {
+ struct prog_instruction *tgt =
+ radeonClauseInsertInstructions(compiler, clause, clause->NumInstructions, 1);
+ _mesa_copy_instructions(tgt, instr, 1);
+ }
+ }
+
+ _mesa_free_instructions(source.Instructions, source.NumInstructions);
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h
index 18091ac02a..3cde4d4f6f 100644
--- a/src/mesa/drivers/dri/r300/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/radeon_program.h
@@ -41,6 +41,13 @@ enum {
CLAUSE_TEX
};
+enum {
+ PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+};
+
+#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
+#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
+
/**
* A clause is simply a sequence of instructions that are executed
* in order.
@@ -107,4 +114,50 @@ void radeonCompilerEraseClauses(
int start,
int end);
+struct prog_instruction* radeonClauseInsertInstructions(
+ struct radeon_compiler *compiler,
+ struct radeon_clause *clause,
+ int position, int count);
+
+/**
+ *
+ */
+struct radeon_program_transform_context {
+ struct radeon_compiler *compiler;
+
+ /**
+ * Destination clause where new instructions must be written.
+ */
+ struct radeon_clause *dest;
+
+ /**
+ * Original clause that is currently being transformed.
+ */
+ struct radeon_clause *src;
+};
+
+/**
+ * A transformation that can be passed to \ref radeonClauseLinearTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+ GLboolean (*function)(
+ struct radeon_program_transform_context*,
+ struct prog_instruction*,
+ void*);
+ void *userData;
+};
+
+void radeonClauseLocalTransform(
+ struct radeon_compiler *compiler,
+ struct radeon_clause *clause,
+ int num_transformations,
+ struct radeon_program_transformation* transformations);
+
#endif