diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
15 files changed, 578 insertions, 54 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 3167d49bca..d0eb170784 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -23,6 +23,7 @@ C_SOURCES = \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ radeon_optimize.c \ + radeon_remove_constants.c \ radeon_rename_regs.c \ r3xx_fragprog.c \ r300_fragprog.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index c6f47a6f8a..847857b142 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary( 'radeon_pair_schedule.c', 'radeon_pair_regalloc.c', 'radeon_optimize.c', + 'radeon_remove_constants.c', 'radeon_rename_regs.c', 'radeon_emulate_branches.c', 'radeon_emulate_loops.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index d2fa816894..8613ec5109 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -30,6 +30,7 @@ #include "radeon_program_alu.h" #include "radeon_program_tex.h" #include "radeon_rename_regs.h" +#include "radeon_remove_constants.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" @@ -180,6 +181,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after dataflow passes"); + if (c->Base.remove_unused_constants) { + rc_remove_unused_constants(&c->Base, + &c->code->constants_remap_table); + + debug_program_log(c, "after constants cleanup"); + } + if(!c->Base.is_r500) { /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too @@ -224,4 +232,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) r300FragmentProgramDump(c->code); } } + + /* Check the number of constants. */ + if (!c->Base.Error) { + unsigned max = c->Base.is_r500 ? R500_PFS_NUM_CONST_REGS : R300_PFS_NUM_CONST_REGS; + + if (c->Base.Program.Constants.Count > max) { + rc_error(&c->Base, "Too many constants. Max: %i, Got: %i\n", + max, c->Base.Program.Constants.Count); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 666c9c2a7a..b05b3aabf3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -31,6 +31,7 @@ #include "radeon_swizzle.h" #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +#include "radeon_remove_constants.h" struct loop { int BgnLoop; @@ -465,7 +466,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi { struct rc_instruction *rci; - struct loop * loops; + struct loop * loops = NULL; int current_loop_depth = 0; int loops_reserved = 0; @@ -484,6 +485,21 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi if (!valid_dst(compiler->code, &vpi->DstReg)) continue; + if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) { + /* Relative addressing of destination operands is not supported yet. */ + if (vpi->DstReg.RelAddr) { + rc_error(&compiler->Base, "Vertex program does not support relative " + "addressing of destination operands (yet).\n"); + return; + } + + /* Neither is Saturate. */ + if (vpi->SaturateMode != RC_SATURATE_NONE) { + rc_error(&compiler->Base, "Vertex program does not support the Saturate " + "modifier (yet).\n"); + } + } + if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS || (compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) { rc_error(&compiler->Base, "Vertex program has too many instructions\n"); @@ -543,10 +559,16 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } case RC_OPCODE_ENDLOOP: { - struct loop * l = &loops[current_loop_depth - 1]; - unsigned int act_addr = l->BgnLoop - 1; - unsigned int last_addr = (compiler->code->length / 4) - 1; - unsigned int ret_addr = l->BgnLoop; + struct loop * l; + unsigned int act_addr; + unsigned int last_addr; + unsigned int ret_addr; + + assert(loops); + l = &loops[current_loop_depth - 1]; + act_addr = l->BgnLoop - 1; + last_addr = (compiler->code->length / 4) - 1; + ret_addr = l->BgnLoop; if (loops_reserved >= R300_VS_MAX_FC_OPS) { rc_error(&compiler->Base, @@ -624,10 +646,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c struct temporary_allocation * ta; unsigned int i, j; - compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); - /* Pass 1: Count original temporaries and allocate structures */ + /* Pass 1: Count original temporaries. */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -645,12 +666,30 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } } + compiler->code->num_temporaries = num_orig_temps; + + /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */ + for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasDstReg) + if (inst->U.I.DstReg.RelAddr) + return; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].RelAddr) { + return; + } + } + } + + compiler->code->num_temporaries = 0; ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - /* Pass 2: Determine original temporary lifetimes */ + /* Pass 3: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP @@ -685,7 +724,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c } } - /* Pass 3: Register allocation */ + /* Pass 4: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -853,6 +892,76 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 1; } +static void transform_negative_addressing(struct r300_vertex_program_compiler *c, + struct rc_instruction *arl, + struct rc_instruction *end, + int min_offset) +{ + struct rc_instruction *inst, *add; + unsigned const_swizzle; + + /* Transform ARL */ + add = rc_insert_new_instruction(&c->Base, arl->Prev); + add->U.I.Opcode = RC_OPCODE_ADD; + add->U.I.DstReg.File = RC_FILE_TEMPORARY; + add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base); + add->U.I.DstReg.WriteMask = RC_MASK_X; + add->U.I.SrcReg[0] = arl->U.I.SrcReg[0]; + add->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants, + min_offset, &const_swizzle); + add->U.I.SrcReg[1].Swizzle = const_swizzle; + + arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index; + arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX; + + /* Rewrite offsets up to and excluding inst. */ + for (inst = arl->Next; inst != end; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) + if (inst->U.I.SrcReg[i].RelAddr) + inst->U.I.SrcReg[i].Index -= min_offset; + } +} + +static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c) +{ + struct rc_instruction *inst, *lastARL = NULL; + int min_offset = 0; + + for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (inst->U.I.Opcode == RC_OPCODE_ARL) { + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); + + lastARL = inst; + min_offset = 0; + continue; + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].RelAddr && + inst->U.I.SrcReg[i].Index < 0) { + /* ARL must precede any indirect addressing. */ + if (lastARL == NULL) { + rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL."); + return; + } + + if (inst->U.I.SrcReg[i].Index < min_offset) + min_offset = inst->U.I.SrcReg[i].Index; + } + } + } + + if (lastARL != NULL && min_offset < 0) + transform_negative_addressing(c, lastARL, inst, min_offset); +} + static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where) { if (c->Base.Debug) { @@ -868,44 +977,56 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { }; -void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) +void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) { struct emulate_loop_state loop_state; - compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; - addArtificialOutputs(compiler); + addArtificialOutputs(c); - debug_program_log(compiler, "before compilation"); + debug_program_log(c, "before compilation"); - if (compiler->Base.is_r500) - rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + if (c->Base.is_r500) + rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU); else - rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); + rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU); + if (c->Base.Error) + return; - debug_program_log(compiler, "after emulate loops"); + debug_program_log(c, "after emulate loops"); - if (!compiler->Base.is_r500) { - rc_emulate_branches(&compiler->Base); - debug_program_log(compiler, "after emulate branches"); + if (!c->Base.is_r500) { + rc_emulate_branches(&c->Base); + if (c->Base.Error) + return; + debug_program_log(c, "after emulate branches"); } - if (compiler->Base.is_r500) { + rc_emulate_negative_addressing(c); + + debug_program_log(c, "after negative addressing emulation"); + + if (c->Base.is_r500) { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &r300_transform_trig_scale_vertex, 0 } }; - radeonLocalTransform(&compiler->Base, 2, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + if (c->Base.Error) + return; - debug_program_log(compiler, "after native rewrite"); + debug_program_log(c, "after native rewrite"); } else { struct radeon_program_transformation transformations[] = { { &r300_transform_vertex_alu, 0 }, { &radeonTransformTrigSimple, 0 } }; - radeonLocalTransform(&compiler->Base, 2, transformations); + radeonLocalTransform(&c->Base, 2, transformations); + if (c->Base.Error) + return; - debug_program_log(compiler, "after native rewrite"); + debug_program_log(c, "after native rewrite"); /* Note: This pass has to be done seperately from ALU rewrite, * because it needs to check every instruction. @@ -913,9 +1034,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) struct radeon_program_transformation transformations2[] = { { &transform_nonnative_modifiers, 0 }, }; - radeonLocalTransform(&compiler->Base, 1, transformations2); + radeonLocalTransform(&c->Base, 1, transformations2); + if (c->Base.Error) + return; - debug_program_log(compiler, "after emulate modifiers"); + debug_program_log(c, "after emulate modifiers"); } { @@ -926,30 +1049,58 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) struct radeon_program_transformation transformations[] = { { &transform_source_conflicts, 0 }, }; - radeonLocalTransform(&compiler->Base, 1, transformations); + radeonLocalTransform(&c->Base, 1, transformations); + if (c->Base.Error) + return; } - debug_program_log(compiler, "after source conflict resolve"); + debug_program_log(c, "after source conflict resolve"); - rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c); + if (c->Base.Error) + return; - debug_program_log(compiler, "after deadcode"); + debug_program_log(c, "after deadcode"); - rc_dataflow_swizzles(&compiler->Base); + rc_dataflow_swizzles(&c->Base); + if (c->Base.Error) + return; - allocate_temporary_registers(compiler); + debug_program_log(c, "after dataflow"); - debug_program_log(compiler, "after dataflow"); + allocate_temporary_registers(c); + if (c->Base.Error) + return; - translate_vertex_program(compiler); + debug_program_log(c, "after register allocation"); - rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants); + if (c->Base.remove_unused_constants) { + rc_remove_unused_constants(&c->Base, + &c->code->constants_remap_table); + if (c->Base.Error) + return; - compiler->code->InputsRead = compiler->Base.Program.InputsRead; - compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten; + debug_program_log(c, "after constants cleanup"); + } - if (compiler->Base.Debug) { + translate_vertex_program(c); + if (c->Base.Error) + return; + + rc_constants_copy(&c->code->constants, &c->Base.Program.Constants); + + c->code->InputsRead = c->Base.Program.InputsRead; + c->code->OutputsWritten = c->Base.Program.OutputsWritten; + + if (c->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler); + r300_vertex_program_dump(c); + } + + /* Check the number of constants. */ + if (!c->Base.Error && + c->Base.Program.Constants.Count > 256) { + rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n", + c->Base.Program.Constants.Count); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 896246d203..f76676fae8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -235,6 +235,7 @@ struct rX00_fragment_program_code { unsigned writes_depth:1; struct rc_constant_list constants; + unsigned *constants_remap_table; }; @@ -266,6 +267,7 @@ struct r300_vertex_program_code { int outputs[VSF_MAX_OUTPUTS]; struct rc_constant_list constants; + unsigned *constants_remap_table; uint32_t InputsRead; uint32_t OutputsWritten; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 7c42eb3ae5..5155b912e1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -39,9 +39,12 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ - unsigned is_r500; + unsigned is_r500:1; unsigned max_temp_regs; + /* Whether to remove unused constants and empty holes in constant space. */ + unsigned remove_unused_constants:1; + /** * Variables used internally, not be touched by callers * of the compiler diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index faf531b412..acdb371de9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -157,8 +157,12 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { usedmask = *pused & inst->U.I.DstReg.WriteMask; - *pused &= ~usedmask; + if (!inst->U.I.DstReg.RelAddr) + *pused &= ~usedmask; } + + if (inst->U.I.DstReg.RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } insts->WriteMask |= usedmask; @@ -213,6 +217,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f { struct deadcode_state s; unsigned int nr_instructions; + unsigned has_temp_reladdr_src = 0; memset(&s, 0, sizeof(s)); s.C = c; @@ -300,6 +305,30 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } + + if (!has_temp_reladdr_src) { + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[i].RelAddr) { + /* If there is a register read from a temporary file with relative addressing, + * mark all preceding written registers as used. */ + for (struct rc_instruction *ptr = inst->Prev; + ptr != &c->Program.Instructions; + ptr = ptr->Prev) { + if (opcode->HasDstReg && + ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && + ptr->U.I.DstReg.WriteMask) { + mark_used(&s, + ptr->U.I.DstReg.File, + ptr->U.I.DstReg.Index, + ptr->U.I.DstReg.WriteMask); + } + } + + has_temp_reladdr_src = 1; + } + } + } } update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 2ea830be7f..da495a3afa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { .Opcode = RC_OPCODE_DP3, .Name = "DP3", .NumSrcRegs = 2, @@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_SUB, .Name = "SUB", .NumSrcRegs = 2, @@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask( case RC_OPCODE_ARL: srcmasks[0] |= RC_MASK_X; break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; case RC_OPCODE_DP3: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 6e18d6eb3f..d3f639c870 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -64,6 +64,9 @@ typedef enum { * dst.c = d src0.c / dy */ RC_OPCODE_DDY, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ RC_OPCODE_DP3, @@ -154,6 +157,9 @@ typedef enum { /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ RC_OPCODE_SNE, + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + /** vec4 instruction: dst.c = src0.c - src1.c */ RC_OPCODE_SUB, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index 407a0a55ee..8327e9aced 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -230,6 +230,34 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, } +static void check_opcode_support(struct r300_fragment_program_compiler *c, + struct rc_sub_instruction *inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + if (inst->DstReg.RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + "of destination operands.\n"); + return; + } + + if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { + rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); + return; + } + } + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->SrcReg[i].RelAddr) { + rc_error(&c->Base, "Fragment program does not support relative addressing " + " of source operands.\n"); + return; + } + } +} + + /** * Translate all ALU instructions into corresponding pair instructions, * performing no other changes. @@ -249,6 +277,8 @@ void rc_pair_translate(struct r300_fragment_program_compiler *c) struct rc_sub_instruction copy = inst->U.I; + check_opcode_support(c, ©); + final_rewrite(©); inst->Type = RC_INSTRUCTION_PAIR; set_pair_instruction(c, &inst->U.P, ©); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 857aae5514..704a7bb2d2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } -static void transform_DP3(struct radeon_compiler* c, +static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_src_register src0 = inst->U.I.SrcReg[0]; struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~RC_MASK_W; - src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } @@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -516,6 +553,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -530,6 +568,7 @@ int radeonTransformALU( case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; @@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -685,7 +782,8 @@ int r300_transform_vertex_alu( case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; - case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; @@ -705,6 +803,7 @@ int r300_transform_vertex_alu( return 1; } return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 9c4b65f4c0..ddce590ee6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -117,8 +117,8 @@ int radeonTransformTEX( struct rc_instruction * inst_rcp = NULL; struct rc_instruction * inst_mad; struct rc_instruction * inst_cmp; - unsigned tmp_texsample = rc_find_free_temporary(c); - unsigned tmp_sum = rc_find_free_temporary(c); + unsigned tmp_texsample; + unsigned tmp_sum; unsigned tmp_recip_w = 0; int pass, fail, tex; @@ -126,6 +126,7 @@ int radeonTransformTEX( struct rc_dst_register output_reg = inst->U.I.DstReg; /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; @@ -144,6 +145,7 @@ int radeonTransformTEX( } /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */ + tmp_sum = rc_find_free_temporary(c); inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tmp_sum; @@ -199,6 +201,8 @@ int radeonTransformTEX( inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c new file mode 100644 index 0000000000..be89e9fa5b --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_remove_constants.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, + unsigned **out_remap_table) +{ + unsigned char *const_used; + unsigned *remap_table; + unsigned *inv_remap_table; + unsigned has_rel_addr = 0; + unsigned is_identity = 1; + unsigned are_externals_remapped = 0; + struct rc_constant *constants = c->Program.Constants.Constants; + + if (!c->Program.Constants.Count) { + *out_remap_table = NULL; + return; + } + + const_used = malloc(c->Program.Constants.Count); + memset(const_used, 0, c->Program.Constants.Count); + + /* Pass 1: Mark used constants. */ + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { + if (inst->U.I.SrcReg[i].RelAddr) { + has_rel_addr = 1; + } else { + const_used[inst->U.I.SrcReg[i].Index] = 1; + } + } + } + } + + /* Pass 2: If there is relative addressing, mark all externals as used. */ + if (has_rel_addr) { + for (unsigned i = 0; i < c->Program.Constants.Count; i++) + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + const_used[i] = 1; + } + + /* Pass 3: Make the remapping table and remap constants. + * This pass removes unused constants simply by overwriting them by other constants. */ + remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned)); + unsigned new_count = 0; + + for (unsigned i = 0; i < c->Program.Constants.Count; i++) { + if (const_used[i]) { + remap_table[new_count] = i; + inv_remap_table[i] = new_count; + + if (i != new_count) { + if (constants[i].Type == RC_CONSTANT_EXTERNAL) + are_externals_remapped = 1; + + constants[new_count] = constants[i]; + is_identity = 0; + } + new_count++; + } + } + + /* is_identity ==> new_count == old_count + * !is_identity ==> new_count < old_count */ + assert( is_identity || new_count < c->Program.Constants.Count); + assert(!(has_rel_addr && are_externals_remapped)); + + /* Pass 4: Redirect reads of all constants to their new locations. */ + if (!is_identity) { + for (struct rc_instruction *inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) { + inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index]; + } + } + } + + } + + /* Set the new constant count. Note that new_count may be less than + * Count even though the remapping function is identity. In that case, + * the constants have been removed at the end of the array. */ + c->Program.Constants.Count = new_count; + + if (are_externals_remapped) { + *out_remap_table = remap_table; + } else { + *out_remap_table = NULL; + free(remap_table); + } + + free(const_used); + free(inv_remap_table); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h new file mode 100644 index 0000000000..0d3a26ca1c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_REMOVE_CONSTANTS_H +#define RADEON_REMOVE_CONSTANTS_H + +#include "radeon_compiler.h" + +void rc_remove_unused_constants(struct radeon_compiler *c, + unsigned **out_remap_table); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 95f4306f60..7b6521c748 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -38,7 +38,6 @@ #include "r300_fragprog_common.h" -#include "program/prog_parameter.h" #include "program/prog_print.h" #include "compiler/radeon_compiler.h" |