diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
32 files changed, 407 insertions, 292 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 782671bac0..deba9ca834 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst) } } +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + /* just some random things... */ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) { @@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } for (n = 0; n <= (code->config & 3); n++) { uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; - int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, alu_offset, tex_offset, alu_end, tex_end, code_addr); if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { @@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) for (j = 0; j < 3; ++j) { int regc = code->alu.inst[i].rgb_addr >> (j * 6); int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); + (rega & 32) ? 'c' : 't', (rega & 31) | msba); } dstc[0] = 0; @@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) (code->alu.inst[i]. rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, flags); } sprintf(flags, "%s%s%s", @@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) dsta[0] = 0; if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 1db8678e89..28d132a5fe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -64,6 +64,20 @@ struct r300_emit_state { __FILE__, __FUNCTION__, ##args); \ } while(0) +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) /** * Mark a temporary register as used. @@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index; + return src.Index & 0x1f; } return 0; @@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { + /* Set the RGB address */ unsigned int src = use_source(code, inst->RGB.Src[j]); unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + code->alu.inst[ip].rgb_addr |= src << (6*j); + /* Set the Alpha address */ src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + code->alu.inst[ip].alpha_addr |= src << (6*j); arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; code->alu.inst[ip].rgb_addr |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { @@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; code->alu.inst[ip].alpha_addr |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { @@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit) unsigned tex_offset; unsigned tex_end; + unsigned int alu_offset_msbs, alu_end_msbs; + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct rc_pair_instruction inst; @@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit) * * Also note that the register specification from AMD is slightly * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - (alu_offset << R300_ALU_START_SHIFT) | - (alu_end << R300_ALU_SIZE_SHIFT) | - (tex_offset << R300_TEX_START_SHIFT) | - (tex_end << R300_TEX_SIZE_SHIFT) | - emit->node_flags; - + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } return 1; } @@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) unsigned int opcode; PROG_CODE; - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { error("Too many TEX instructions"); return 0; } @@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; return 1; } @@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; @@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) finish_node(&emit); code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; code->code_offset = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; if (emit.current_node < 3) { int shift = 3 - emit.current_node; @@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) for(i = 0; i < shift; ++i) code->code_addr[i] = 0; } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 05d3da8a10..5223aaa71a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -98,9 +98,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) unsigned int relevant; int j; - if (reg.Abs) - reg.Negate = RC_MASK_NONE; - if (opcode == RC_OPCODE_KIL || opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || @@ -140,9 +137,6 @@ static void r300_swizzle_split( struct rc_src_register src, unsigned int mask, struct rc_swizzle_split * split) { - if (src.Abs) - src.Negate = RC_MASK_NONE; - split->NumPhases = 0; while(mask) { @@ -222,13 +216,14 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) */ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { + unsigned int swz = GET_SWZ(swizzle, 0); if (src == RC_PAIR_PRESUB_SRC) { - return R300_ALU_ARGA_SRCP_X + swizzle; + return R300_ALU_ARGA_SRCP_X + swz; } - if (swizzle < 3) - return swizzle + 3*src; + if (swz < 3) + return swz + 3*src; - switch(swizzle) { + switch(swz) { case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index e0d349b98c..9286733635 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -78,12 +78,32 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) } } +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + struct radeon_program_transformation rewrite_tex[] = { { &radeonTransformTEX, c }, { 0, 0 } @@ -113,6 +133,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, @@ -124,7 +145,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 472029f63d..8ad2175ead 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -490,13 +490,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) continue; if (info->HasDstReg) { - /* Relative addressing of destination operands is not supported yet. */ - if (vpi->DstReg.RelAddr) { - rc_error(&compiler->Base, "Vertex program does not support relative " - "addressing of destination operands (yet).\n"); - return; - } - /* Neither is Saturate. */ if (vpi->SaturateMode != RC_SATURATE_NONE) { rc_error(&compiler->Base, "Vertex program does not support the Saturate " @@ -668,7 +661,6 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) char hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation * ta; unsigned int i, j; - struct rc_instruction *last_inst_src_reladdr = NULL; memset(hwtemps, 0, sizeof(hwtemps)); @@ -693,28 +685,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) } } - /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up. - * For src temporaries, save the last instruction which uses relative addressing. */ - for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasDstReg) - if (inst->U.I.DstReg.RelAddr) - return; - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - last_inst_src_reladdr = inst; - } - } - } - ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - /* Pass 3: Determine original temporary lifetimes */ + /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP @@ -744,41 +719,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - struct rc_instruction *last_read; - - /* From "last_inst_src_reladdr", "end_loop", and "inst", - * select the instruction with the highest instruction index (IP). - * Note that "end_loop", if available, has always a higher index than "inst". */ - if (last_inst_src_reladdr) { - if (end_loop) { - last_read = last_inst_src_reladdr->IP > end_loop->IP ? - last_inst_src_reladdr : end_loop; - } else { - last_read = last_inst_src_reladdr->IP > inst->IP ? - last_inst_src_reladdr : inst; - } - } else { - last_read = end_loop ? end_loop : inst; - } - - ta[inst->U.I.SrcReg[i].Index].LastRead = last_read; + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; } } } - /* Pass 4: Register allocation */ + /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) { - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; - if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; - } + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; } } @@ -792,12 +748,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) break; } ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } + ta[orig].HwTemp = j; hwtemps[ta[orig].HwTemp] = 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index ef81be48f7..140eeed3de 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -77,9 +77,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) return 0; - if (reg.Negate) - reg.Negate ^= RC_MASK_XYZW; - for(i = 0; i < 4; ++i) { unsigned int swz = GET_SWZ(reg.Swizzle, i); if (swz == RC_SWIZZLE_UNUSED) { @@ -103,9 +100,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 0; } else { /* ALU instructions support almost everything */ - if (reg.Abs) - return 1; - relevant = 0; for(i = 0; i < 3; ++i) { unsigned int swz = GET_SWZ(reg.Swizzle, i); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 5da82d90f6..301b444669 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -170,7 +170,7 @@ static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) { unsigned int t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; @@ -372,7 +372,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + | R500_TEX_SEM_ACQUIRE; if (inst->TexSrcTarget == RC_TEXTURE_RECT) code->inst[ip].inst1 |= R500_TEX_UNSCALED; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index b69e81698a..35360aa70f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -31,6 +31,9 @@ #define R300_PFS_NUM_TEMP_REGS 32 #define R300_PFS_NUM_CONST_REGS 32 +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 @@ -170,6 +173,8 @@ struct r300_fragment_program_external_state { * RC_STATE_R300_TEXSCALE_FACTOR. */ unsigned clamp_and_scale_before_fetch : 1; } unit[16]; + + unsigned frag_clamp:1; }; @@ -187,24 +192,29 @@ struct r300_fragment_program_node { */ struct r300_fragment_program_code { struct { - int length; /**< total # of texture instructions used */ - uint32_t inst[R300_PFS_MAX_TEX_INST]; + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; } tex; struct { - int length; /**< total # of ALU instructions used */ + unsigned int length; /**< total # of ALU instructions used */ struct { uint32_t rgb_inst; uint32_t rgb_addr; uint32_t alpha_inst; uint32_t alpha_addr; - } inst[R300_PFS_MAX_ALU_INST]; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; } alu; uint32_t config; /* US_CONFIG */ uint32_t pixsize; /* US_PIXSIZE */ uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 65548604bc..79cd7996f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -373,9 +373,11 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) s->num_presub_ops++; - info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; @@ -402,11 +404,11 @@ static void print_stats(struct radeon_compiler * c) { struct rc_program_stats s; - rc_get_stats(c, &s); - - if (s.num_insts < 4) + if (c->initial_num_insts <= 5) return; + rc_get_stats(c, &s); + switch (c->type) { case RC_VERTEX_PROGRAM: fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" @@ -461,6 +463,11 @@ void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pa /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) { + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); rc_print_program(&c->Program); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e663339589..2d8e415f35 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -50,6 +50,7 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ + unsigned is_r400:1; unsigned is_r500:1; unsigned has_half_swizzles:1; unsigned has_presub:1; @@ -57,6 +58,7 @@ struct radeon_compiler { unsigned max_temp_regs; unsigned max_constants; int max_alu_insts; + unsigned max_tex_insts; /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants:1; @@ -70,6 +72,8 @@ struct radeon_compiler { /*@}*/ struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ }; void rc_init(struct radeon_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index bf393a9fb1..15ec4418cb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -55,6 +55,24 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) return GET_SWZ(swz, idx); } +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) { @@ -147,13 +165,17 @@ unsigned int rc_src_reads_dst_mask( return dst_mask & rc_swizzle_to_writemask(src_swz); } -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) { unsigned int chan; unsigned int swz = RC_SWIZZLE_UNUSED; unsigned int ret = RC_SOURCE_NONE; - for(chan = 0; chan < channels; chan++) { + for(chan = 0; chan < 4; chan++) { swz = GET_SWZ(swizzle, chan); if (swz == RC_SWIZZLE_W) { ret |= RC_SOURCE_ALPHA; @@ -202,7 +224,7 @@ static void can_use_presub_read_cb( if (d->RemoveSrcs[i].File == file && d->RemoveSrcs[i].Index == index) { src_type &= - ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle); } } @@ -223,7 +245,6 @@ unsigned int rc_inst_can_use_presub( { struct can_use_presub_data d; unsigned int num_presub_srcs; - unsigned int presub_src_type = rc_source_type_mask(presub_writemask); const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); @@ -253,13 +274,7 @@ unsigned int rc_inst_can_use_presub( num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - if ((presub_src_type & RC_SOURCE_RGB) - && d.RGBCount + num_presub_srcs > 3) { - return 0; - } - - if ((presub_src_type & RC_SOURCE_ALPHA) - && d.AlphaCount + num_presub_srcs > 3) { + if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) { return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 461ab9ffb1..dd0f6c6615 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -10,6 +10,8 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz); rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w); @@ -32,7 +34,7 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); +unsigned int rc_source_type_swz(unsigned int swizzle); unsigned int rc_source_type_mask(unsigned int mask); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index d0a64d936e..c080d5aecc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -140,14 +140,8 @@ static void pair_sub_for_all_args( for(i = 0; i < info->NumSrcRegs; i++) { unsigned int src_type; - unsigned int channels = 0; - if (&fullinst->U.P.RGB == sub) - channels = 3; - else if (&fullinst->U.P.Alpha == sub) - channels = 1; - - assert(channels > 0); - src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); if (src_type == RC_SOURCE_NONE) continue; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 87906f37b1..678e147588 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -160,12 +160,8 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { usedmask = *pused & inst->U.I.DstReg.WriteMask; - if (!inst->U.I.DstReg.RelAddr) - *pused &= ~usedmask; + *pused &= ~usedmask; } - - if (inst->U.I.DstReg.RelAddr) - mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } insts->WriteMask |= usedmask; @@ -219,22 +215,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) { struct deadcode_state s; unsigned int nr_instructions; - unsigned has_temp_reladdr_src = 0; rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; unsigned int ip; - /* Give up if there is relative addressing of destination operands. */ - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->HasDstReg && - inst->U.I.DstReg.WriteMask && - inst->U.I.DstReg.RelAddr) { - return; - } - } - memset(&s, 0, sizeof(s)); s.C = c; @@ -321,32 +304,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } - - if (!has_temp_reladdr_src) { - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - /* If there is a register read from a temporary file with relative addressing, - * mark all preceding written registers as used. */ - for (struct rc_instruction *ptr = inst->Prev; - ptr != &c->Program.Instructions; - ptr = ptr->Prev) { - opcode = rc_get_opcode_info(ptr->U.I.Opcode); - if (opcode->HasDstReg && - ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && - ptr->U.I.DstReg.WriteMask) { - mark_used(&s, - ptr->U.I.DstReg.File, - ptr->U.I.DstReg.Index, - ptr->U.I.DstReg.WriteMask); - } - } - - has_temp_reladdr_src = 1; - break; - } - } - } } update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index a0f7bd8174..133a9f72ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c, mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 44f4c0fbdc..c4e6a5e0a1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -139,7 +139,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.DstReg.RelAddr || inst_mov->U.I.WriteALUResult || inst_mov->U.I.SaturateMode) return; @@ -312,7 +311,18 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * struct rc_constant * constant; struct rc_src_register newsrc; int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + /* Convert immediates to swizzles. */ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) @@ -326,7 +336,7 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * newsrc = inst->U.I.SrcReg[src]; have_real_reference = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { + for (chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); unsigned int newswz; float imm; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 9beb5d6357..8e10813ff0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -365,8 +365,8 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, - 3) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { continue; } @@ -423,11 +423,11 @@ static int destructive_merge_instructions( unsigned int index = 0; int source; - if (alpha->Alpha.Arg[arg].Swizzle < 3) { + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { srcrgb = 1; file = alpha->RGB.Src[oldsrc].File; index = alpha->RGB.Src[oldsrc].Index; - } else if (alpha->Alpha.Arg[arg].Swizzle < 4) { + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { srcalpha = 1; file = alpha->Alpha.Src[oldsrc].File; index = alpha->Alpha.Src[oldsrc].Index; @@ -544,18 +544,12 @@ static void rgb_to_alpha_remap ( { int new_src_index; unsigned int i; - struct rc_pair_instruction_source * old_src = - rc_pair_get_src(&inst->U.P, arg); - if (!old_src) { - return; - } for (i = 0; i < 3; i++) { if (get_swz(arg->Swizzle, i) == old_swz) { SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); } } - memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, old_file, new_index); /* This conversion is not possible, we must have made a mistake in @@ -728,7 +722,8 @@ static int convert_rgb_to_alpha( for (j = 0; j < 3; j++) { unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); if (swz != RC_SWIZZLE_UNUSED) { - pair_inst->Alpha.Arg[i].Swizzle = swz; + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); break; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fc05366f50..9e03eb1aca 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -28,6 +28,7 @@ #include "radeon_program_pair.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" /** @@ -213,16 +214,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, if (needrgb && !istranscendent) { unsigned int srcrgb = 0; unsigned int srcalpha = 0; + unsigned int srcmask = 0; int j; /* We don't care about the alpha channel here. We only * want the part of the swizzle that writes to rgb, * since we are creating an rgb instruction. */ for(j = 0; j < 3; ++j) { unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) + + if (swz < RC_SWIZZLE_W) srcrgb = 1; - else if (swz < 4) + else if (swz == RC_SWIZZLE_W) srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; } source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, inst->SrcReg[i].Index); @@ -232,9 +238,10 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, return; } pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); } if (needalpha) { unsigned int srcrgb = 0; @@ -252,7 +259,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, return; } pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); } @@ -302,12 +309,6 @@ static void check_opcode_support(struct r300_fragment_program_compiler *c, const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg) { - if (inst->DstReg.RelAddr) { - rc_error(&c->Base, "Fragment program does not support relative addressing " - "of destination operands.\n"); - return; - } - if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index df6c94b35f..a07f6b63c6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -56,11 +56,7 @@ struct rc_src_register { struct rc_dst_register { unsigned int File:3; - - /** Negative values may be used for relative addressing. */ - signed int Index:(RC_REGISTER_INDEX_BITS+1); - unsigned int RelAddr:1; - + unsigned int Index:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:4; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 58977a40c7..9fc991166a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -91,7 +91,6 @@ static struct rc_dst_register dstregtmpmask(int index, int mask) dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; - dst.RelAddr = 0; return dst; } @@ -689,11 +688,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(dst.Index, RC_MASK_Y), srcreg(RC_FILE_TEMPORARY, dst.Index), diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 5905d26e52..68874795b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -211,27 +211,9 @@ struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg) { - unsigned int i, type; - unsigned int channels = 0; + unsigned int type; - for(i = 0; i < 3; i++) { - if (arg == pair_inst->RGB.Arg + i) { - channels = 3; - break; - } - } - - if (channels == 0) { - for (i = 0; i < 3; i++) { - if (arg == pair_inst->Alpha.Arg + i) { - channels = 1; - break; - } - } - } - - assert(channels > 0); - type = rc_source_type_swz(arg->Swizzle, channels); + type = rc_source_type_swz(arg->Swizzle); if (type & RC_SOURCE_RGB) { return &pair_inst->RGB.Src[arg->Source]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ccf7a0070c..6708b16d29 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -63,7 +63,7 @@ struct rc_pair_instruction_source { struct rc_pair_instruction_arg { unsigned int Source:2; - unsigned int Swizzle:9; + unsigned int Swizzle:12; unsigned int Abs:1; unsigned int Negate:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index ae13f6742f..390d131946 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -110,7 +110,7 @@ static void rc_print_mask(FILE * f, unsigned int mask) static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) { - rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + rc_print_register(f, dst.File, dst.Index, 0); if (dst.WriteMask != RC_MASK_XYZW) { fprintf(f, "."); rc_print_mask(f, dst.WriteMask); @@ -379,7 +379,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst else fprintf(f,"%d", inst->Alpha.Arg[arg].Source); fprintf(f,".%c%s", - rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); } fprintf(f, "\n"); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index f9d9f34b6a..1cf77d9cf7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -33,14 +33,14 @@ /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, - int tmu) + int tmu) { struct rc_src_register reg = { 0, }; if (compiler->enable_shadow_ambient) { reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, - RC_STATE_SHADOW_AMBIENT, tmu); + RC_STATE_SHADOW_AMBIENT, tmu); reg.Swizzle = RC_SWIZZLE_WWWW; } else { reg.File = RC_FILE_NONE; @@ -149,14 +149,11 @@ int radeonTransformTEX( return 1; } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; struct rc_instruction * inst_rcp = NULL; - struct rc_instruction * inst_mad; - struct rc_instruction * inst_cmp; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; - unsigned tmp_recip_w = 0; - int pass, fail, tex; + int pass, fail; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; @@ -167,63 +164,68 @@ int radeonTransformTEX( inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - tmp_recip_w = rc_find_free_temporary(c); + tmp_sum = rc_find_free_temporary(c); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tmp_recip_w; + inst_rcp->U.I.DstReg.Index = tmp_sum; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } - /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */ - tmp_sum = rc_find_free_temporary(c); - inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = tmp_sum; - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - tex = 2; - } else { - inst_mad->U.I.Opcode = RC_OPCODE_ADD; - tex = 1; - } - inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; - inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; - - /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ - if (comparefunc == RC_COMPARE_FUNC_EQUAL) { - comparefunc = RC_COMPARE_FUNC_GEQUAL; - } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { - comparefunc = RC_COMPARE_FUNC_LESS; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; } - /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) - * - * This negates either r or tex: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { pass = 1; fail = 2; } else { @@ -231,16 +233,19 @@ int radeonTransformTEX( fail = 1; } - inst_cmp = rc_insert_new_instruction(c, inst_mad); + inst_cmp = rc_insert_new_instruction(c, inst_add); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle); inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); - assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); + assert(tmp_texsample != tmp_sum); } } @@ -420,17 +425,21 @@ int radeonTransformTEX( scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); } - /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 88165f7895..5bd19c0b9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) struct rc_reader_data reader_data; unsigned char * used; + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + used_length = 2 * rc_recompute_ips(c); used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); memset(used, 0, sizeof(unsigned char) * used_length); diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 9fd8e8fde5..14e60866d9 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -63,7 +63,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_HPOS; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; @@ -76,7 +75,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_TEX0; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; @@ -131,6 +129,7 @@ static void create_fragment_program(struct r300_context *r300) compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32; compiler.code = &r300->blit.fp_code; compiler.AllocateHwInputs = fp_allocate_hw_inputs; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index c288834d24..0d8bd4fc70 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/extensions.h" #include "main/bufferobj.h" #include "main/texobj.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 4e457b51eb..a0a26f1b38 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -226,6 +226,7 @@ static void translate_fragment_program(struct gl_context *ctx, struct r300_fragm compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32; compiler.OutputDepth = FRAG_RESULT_DEPTH; memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); compiler.OutputColor[0] = FRAG_RESULT_COLOR; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f7705b0f6f..2b9d85fae8 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) +# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24 +# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24) +# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28 +# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28) /* gap */ @@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_SIZE_MASK (31 << 17) # define R300_RGBA_OUT (1 << 22) # define R300_W_OUT (1 << 23) +# define R400_TEX_START_MSB_SHIFT 24 +# define R400_TEX_START_MSG_MASK (0xf << 24) +# define R400_TEX_SIZE_MSB_SHIFT 28 +# define R400_TEX_SIZE_MSG_MASK (0xf << 28) /* TEX * As far as I can tell, texture instructions cannot write into output @@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_OP_TXP 3 # define R300_TEX_OP_TXB 4 # define R300_TEX_INST_MASK (7 << 15) +# define R400_SRC_ADDR_EXT_BIT (1 << 19) +# define R400_DST_ADDR_EXT_BIT (1 << 20) /* Output format from the unfied shader */ #define R300_US_OUT_FMT 0x46A4 @@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ +/* R4xx extended fragment shader registers. */ +#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) +# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) +# define R400_ADDRD_EXT_A_MSB_BIT 0x80 + +#define R400_US_CODE_BANK 0x46b8 +# define R400_BANK_SHIFT 0 +# define R400_BANK_MASK 0xf +# define R400_R390_MODE_ENABLE (1 << 4) +#define R400_US_CODE_EXT 0x46bc +# define R400_ALU_OFFSET_MSB_SHIFT 0 +# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0) +# define R400_ALU_SIZE_MSB_SHIFT 3 +# define R400_ALU_SIZE_MSB_MASK (0x7 << 3) +# define R400_ALU_START0_MSB_SHIFT 6 +# define R400_ALU_START0_MSB_MASK (0x7 << 6) +# define R400_ALU_SIZE0_MSB_SHIFT 9 +# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9) +# define R400_ALU_START1_MSB_SHIFT 12 +# define R400_ALU_START1_MSB_MASK (0x7 << 12) +# define R400_ALU_SIZE1_MSB_SHIFT 15 +# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15) +# define R400_ALU_START2_MSB_SHIFT 18 +# define R400_ALU_START2_MSB_MASK (0x7 << 18) +# define R400_ALU_SIZE2_MSB_SHIFT 21 +# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21) +# define R400_ALU_START3_MSB_SHIFT 24 +# define R400_ALU_START3_MSB_MASK (0x7 << 24) +# define R400_ALU_SIZE3_MSB_SHIFT 27 +# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27) +/* END: R4xx extended fragment shader registers. */ + /* Fog: Fog Blending Enable */ #define R300_FG_FOG_BLEND 0x4bc0 # define R300_FG_FOG_BLEND_DISABLE (0 << 0) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ab8c1df5f7..51989c6b22 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -214,18 +214,18 @@ static void r300SetBlendState(struct gl_context * ctx) (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); int eqnA = R300_COMB_FCN_ADD_CLAMP; - if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + if (_mesa_rgba_logicop_enabled(ctx) || !ctx->Color.BlendEnabled) { r300SetBlendCntl(r300, func, eqn, 0, func, eqn); return; } func = - (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << - R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB, + (blend_factor(ctx->Color.Blend[0].SrcRGB, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstRGB, GL_FALSE) << R300_DST_BLEND_SHIFT); - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqn = R300_COMB_FCN_ADD_CLAMP; break; @@ -253,17 +253,17 @@ static void r300SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } funcA = - (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << - R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA, + (blend_factor(ctx->Color.Blend[0].SrcA, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstA, GL_FALSE) << R300_DST_BLEND_SHIFT); - switch (ctx->Color.BlendEquationA) { + switch (ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = R300_COMB_FCN_ADD_CLAMP; break; @@ -291,7 +291,7 @@ static void r300SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -335,7 +335,7 @@ static void r300SetLogicOpState(struct gl_context *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, rop); - if (RGBA_LOGICOP_ENABLED(ctx)) { + if (_mesa_rgba_logicop_enabled(ctx)) { r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE | translate_logicop(ctx->Color.LogicOp); } else { @@ -349,7 +349,7 @@ static void r300SetLogicOpState(struct gl_context *ctx) */ static void r300LogicOpcode(struct gl_context *ctx, GLenum logicop) { - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) r300SetLogicOpState(ctx); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index de66293999..f930b4d06b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index 471a3723cb..232603ece5 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -128,7 +128,6 @@ static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_regi { dest->File = translate_register_file(src->File); dest->Index = src->Index; - dest->RelAddr = src->RelAddr; dest->WriteMask = src->WriteMask; } |