summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c321
1 files changed, 249 insertions, 72 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index d694725c9b..b1b14394b6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -37,10 +37,6 @@
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
- * \todo Depth write, WPOS/FOGC inputs
- *
- * \todo FogOption
- *
*/
#include "r500_fragprog.h"
@@ -51,7 +47,6 @@
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
@@ -60,63 +55,80 @@
} while(0)
-static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+struct branch_info {
+ int If;
+ int Else;
+ int Endif;
+};
+
+struct emit_state {
+ struct radeon_compiler * C;
+ struct r500_fragment_program_code * Code;
+
+ struct branch_info * Branches;
+ unsigned int CurrentBranchDepth;
+ unsigned int BranchesReserved;
+
+ unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
- case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
- case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
- case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
- case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
- case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
- case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
- case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
- case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
-static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALPHA_OP_CMP;
- case OPCODE_COS: return R500_ALPHA_OP_COS;
- case OPCODE_DDX: return R500_ALPHA_OP_MDH;
- case OPCODE_DDY: return R500_ALPHA_OP_MDV;
- case OPCODE_DP3: return R500_ALPHA_OP_DP;
- case OPCODE_DP4: return R500_ALPHA_OP_DP;
- case OPCODE_EX2: return R500_ALPHA_OP_EX2;
- case OPCODE_FRC: return R500_ALPHA_OP_FRC;
- case OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+ case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALPHA_OP_MAD;
- case OPCODE_MAX: return R500_ALPHA_OP_MAX;
- case OPCODE_MIN: return R500_ALPHA_OP_MIN;
- case OPCODE_RCP: return R500_ALPHA_OP_RCP;
- case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
- case OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
-static GLuint fix_hw_swizzle(GLuint swz)
+static unsigned int fix_hw_swizzle(unsigned int swz)
{
if (swz == 5) swz = 6;
- if (swz == SWIZZLE_NIL) swz = 4;
+ if (swz == RC_SWIZZLE_UNUSED) swz = 4;
return swz;
}
-static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
{
- GLuint t = inst->RGB.Arg[arg].Source;
+ unsigned int t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
@@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
return t;
}
-static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
- GLuint t = inst->Alpha.Arg[i].Source;
+ unsigned int t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
-static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+ switch(func) {
+ case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+ case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+ case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+ case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+ default:
+ rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+ return 0;
+ }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
-static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
{
- if (!src.Constant)
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | 0x100;
+ } else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
- return src.Index | src.Constant << 8;
+ return src.Index;
+ }
+
+ return 0;
}
/**
* Emit a paired ALU instruction.
*/
-static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_alu: Too many instructions");
- return GL_FALSE;
+ return;
}
int ip = ++code->inst_end;
@@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
- if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
- else
+ if (inst->WriteALUResult) {
+ error("%s: cannot write output and ALU result at the same time");
+ return;
+ }
+ } else {
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ }
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->code->writes_depth = GL_TRUE;
+ c->code->writes_depth = 1;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
@@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
- return GL_TRUE;
+ if (inst->WriteALUResult) {
+ code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+ else
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+ code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+ }
}
-static GLuint translate_strq_swizzle(GLuint swizzle)
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
{
- GLuint swiz = 0;
+ unsigned int swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
@@ -221,67 +265,193 @@ static GLuint translate_strq_swizzle(GLuint swizzle)
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_tex: Too many instructions");
- return GL_FALSE;
+ return 0;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->WriteMask << 11)
+ | (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
- if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
- code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+ if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case RADEON_OPCODE_KIL:
+ case RC_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case RADEON_OPCODE_TEX:
+ case RC_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case RADEON_OPCODE_TXB:
+ case RC_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case RADEON_OPCODE_TXP:
+ case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
- | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
- | R500_TEX_DST_ADDR(inst->DestIndex)
+ use_temporary(code, inst->SrcReg[0].Index);
+ if (inst->Opcode != RC_OPCODE_KIL)
+ use_temporary(code, inst->DstReg.Index);
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
- return GL_TRUE;
+ return 1;
}
-static const struct radeon_pair_handler pair_handler = {
- .EmitPaired = emit_paired,
- .EmitTex = emit_tex,
- .MaxHwTemps = 128
-};
+static void grow_branches(struct emit_state * s)
+{
+ unsigned int newreserved = s->BranchesReserved * 2;
+ struct branch_info * newbranches;
+
+ if (!newreserved)
+ newreserved = 4;
+
+ newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
+ memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
+
+ s->Branches = newbranches;
+ s->BranchesReserved = newreserved;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+ if (s->Code->inst_end >= 511) {
+ rc_error(s->C, "emit_tex: Too many instructions");
+ return;
+ }
+
+ unsigned int newip = ++s->Code->inst_end;
+
+ s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ if (s->CurrentBranchDepth >= 32) {
+ rc_error(s->C, "Branch depth exceeds hardware limit");
+ return;
+ }
+
+ if (s->CurrentBranchDepth >= s->BranchesReserved)
+ grow_branches(s);
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch->If = newip;
+ branch->Else = -1;
+ branch->Endif = -1;
+
+ if (s->CurrentBranchDepth > s->MaxBranchDepth)
+ s->MaxBranchDepth = s->CurrentBranchDepth;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Else = newip;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Endif = newip;
+
+ s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+ | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ ;
+
+ if (branch->Else >= 0) {
+ /* increment branch counter also if jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+ s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_B_ELSE /* all active pixels want to jump */
+ | R500_FC_B_OP0_NONE /* no counter op if stay */
+ | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ } else {
+ /* don't touch branch counter on jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+ s->CurrentBranchDepth--;
+ } else {
+ rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode);
+ }
+}
void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
+ struct emit_state s;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
- _mesa_bzero(code, sizeof(*code));
+ memset(&s, 0, sizeof(s));
+ s.C = &compiler->Base;
+ s.Code = code;
+
+ memset(code, 0, sizeof(*code));
code->max_temp_idx = 1;
code->inst_end = -1;
- radeonPairProgram(compiler, &pair_handler, compiler);
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ emit_flowcontrol(&s, inst);
+ } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ continue;
+ } else {
+ emit_tex(compiler, &inst->U.I);
+ }
+ } else {
+ emit_paired(compiler, &inst->U.P);
+ }
+ }
+
+ if (code->max_temp_idx >= 128)
+ rc_error(&compiler->Base, "Too many hardware temporaries used");
+
if (compiler->Base.Error)
return;
@@ -296,4 +466,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
+
+ if (s.MaxBranchDepth >= 4) {
+ if (code->max_temp_idx < 1)
+ code->max_temp_idx = 1;
+
+ code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+ }
}