summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c133
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c69
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c37
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c45
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c17
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c23
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c22
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c99
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c8
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.c3
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c1
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h44
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c22
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c1
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c1
32 files changed, 407 insertions, 292 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 782671bac0..deba9ca834 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst)
}
}
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+ return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
@@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
+ if (c->is_r400) {
+ fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+ }
for (n = 0; n <= (code->config & 3); n++) {
uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
- int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
- int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+ (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+ unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+ (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
- fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
- "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+ "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n,
alu_offset, tex_offset, alu_end, tex_end, code_addr);
if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
@@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_addr >> (j * 6);
int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+ int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+ code->alu.inst[i].r400_ext_addr);
+ int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+ code->alu.inst[i].r400_ext_addr);
sprintf(srcc[j], "%c%i",
- (regc & 32) ? 'c' : 't', regc & 31);
+ (regc & 32) ? 'c' : 't', (regc & 31) | msbc);
sprintf(srca[j], "%c%i",
- (rega & 32) ? 'c' : 't', rega & 31);
+ (rega & 32) ? 'c' : 't', (rega & 31) | msba);
}
dstc[0] = 0;
@@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
(code->alu.inst[i].
rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
+ unsigned int msb = get_msb(
+ R400_ADDRD_EXT_RGB_MSB_BIT,
+ code->alu.inst[i].r400_ext_addr);
+
sprintf(dstc, "t%i.%s ",
- (code->alu.inst[i].
- rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ ((code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT)
+ & 31) | msb,
flags);
}
sprintf(flags, "%s%s%s",
@@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+ unsigned int msb = get_msb(
+ R400_ADDRD_EXT_A_MSB_BIT,
+ code->alu.inst[i].r400_ext_addr);
sprintf(dsta, "t%i.w ",
- (code->alu.inst[i].
- alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ ((code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+ | msb);
}
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 1db8678e89..28d132a5fe 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -64,6 +64,20 @@ struct r300_emit_state {
__FILE__, __FUNCTION__, ##args); \
} while(0)
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+ return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+ return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
/**
* Mark a temporary register as used.
@@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
- return src.Index;
+ return src.Index & 0x1f;
}
return 0;
@@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
+ /* Set the RGB address */
unsigned int src = use_source(code, inst->RGB.Src[j]);
unsigned int arg;
+ if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+ code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
code->alu.inst[ip].rgb_addr |= src << (6*j);
+ /* Set the Alpha address */
src = use_source(code, inst->Alpha.Src[j]);
+ if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+ code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
code->alu.inst[ip].alpha_addr |= src << (6*j);
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
@@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
+ if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+ code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
code->alu.inst[ip].rgb_addr |=
- (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+ ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
@@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
+ if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+ code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
code->alu.inst[ip].alpha_addr |=
- (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+ ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
@@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit)
unsigned tex_offset;
unsigned tex_end;
+ unsigned int alu_offset_msbs, alu_end_msbs;
+
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
struct rc_pair_instruction inst;
@@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit)
*
* Also note that the register specification from AMD is slightly
* incorrect in its description of this register. */
- code->code_addr[emit->current_node] =
- (alu_offset << R300_ALU_START_SHIFT) |
- (alu_end << R300_ALU_SIZE_SHIFT) |
- (tex_offset << R300_TEX_START_SHIFT) |
- (tex_end << R300_TEX_SIZE_SHIFT) |
- emit->node_flags;
-
+ code->code_addr[emit->current_node] =
+ ((alu_offset << R300_ALU_START_SHIFT)
+ & R300_ALU_START_MASK)
+ | ((alu_end << R300_ALU_SIZE_SHIFT)
+ & R300_ALU_SIZE_MASK)
+ | ((tex_offset << R300_TEX_START_SHIFT)
+ & R300_TEX_START_MASK)
+ | ((tex_end << R300_TEX_SIZE_SHIFT)
+ & R300_TEX_SIZE_MASK)
+ | emit->node_flags
+ | (get_msbs_tex(tex_offset, 5)
+ << R400_TEX_START_MSB_SHIFT)
+ | (get_msbs_tex(tex_end, 5)
+ << R400_TEX_SIZE_MSB_SHIFT)
+ ;
+
+ /* Write r400 extended instruction fields. These will be ignored on
+ * r300 cards. */
+ alu_offset_msbs = get_msbs_alu(alu_offset);
+ alu_end_msbs = get_msbs_alu(alu_end);
+ switch(emit->current_node) {
+ case 0:
+ code->r400_code_offset_ext |=
+ alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+ | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+ break;
+ case 1:
+ code->r400_code_offset_ext |=
+ alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+ | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+ break;
+ case 2:
+ code->r400_code_offset_ext |=
+ alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+ | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+ break;
+ case 3:
+ code->r400_code_offset_ext |=
+ alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+ | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+ break;
+ }
return 1;
}
@@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
unsigned int opcode;
PROG_CODE;
- if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+ if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
error("Too many TEX instructions");
return 0;
}
@@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
use_temporary(code, inst->U.I.SrcReg[0].Index);
code->tex.inst[code->tex.length++] =
- (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
- (dest << R300_DST_ADDR_SHIFT) |
- (unit << R300_TEX_ID_SHIFT) |
- (opcode << R300_TEX_INST_SHIFT);
+ ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+ & R300_SRC_ADDR_MASK)
+ | ((dest << R300_DST_ADDR_SHIFT)
+ & R300_DST_ADDR_MASK)
+ | (unit << R300_TEX_ID_SHIFT)
+ | (opcode << R300_TEX_INST_SHIFT)
+ | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+ R400_SRC_ADDR_EXT_BIT : 0)
+ | (dest >= R300_PFS_NUM_TEMP_REGS ?
+ R400_DST_ADDR_EXT_BIT : 0)
+ ;
return 1;
}
@@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
struct r300_emit_state emit;
struct r300_fragment_program_code *code = &compiler->code->code.r300;
+ unsigned int tex_end;
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
@@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
finish_node(&emit);
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+ /* Set r400 extended instruction fields. These values will be ignored
+ * on r300 cards. */
+ code->r400_code_offset_ext |=
+ (get_msbs_alu(0)
+ << R400_ALU_OFFSET_MSB_SHIFT)
+ | (get_msbs_alu(code->alu.length - 1)
+ << R400_ALU_SIZE_MSB_SHIFT);
+
+ tex_end = code->tex.length ? code->tex.length - 1 : 0;
code->code_offset =
- (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
- ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
- (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
- ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+ ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+ & R300_PFS_CNTL_ALU_OFFSET_MASK)
+ | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+ & R300_PFS_CNTL_ALU_END_MASK)
+ | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+ & R300_PFS_CNTL_TEX_OFFSET_MASK)
+ | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+ & R300_PFS_CNTL_TEX_END_MASK)
+ | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+ | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+ ;
if (emit.current_node < 3) {
int shift = 3 - emit.current_node;
@@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
for(i = 0; i < shift; ++i)
code->code_addr[i] = 0;
}
+
+ if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+ || code->alu.length > R300_PFS_MAX_ALU_INST
+ || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+ code->r390_mode = 1;
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 05d3da8a10..5223aaa71a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -98,9 +98,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
unsigned int relevant;
int j;
- if (reg.Abs)
- reg.Negate = RC_MASK_NONE;
-
if (opcode == RC_OPCODE_KIL ||
opcode == RC_OPCODE_TEX ||
opcode == RC_OPCODE_TXB ||
@@ -140,9 +137,6 @@ static void r300_swizzle_split(
struct rc_src_register src, unsigned int mask,
struct rc_swizzle_split * split)
{
- if (src.Abs)
- src.Negate = RC_MASK_NONE;
-
split->NumPhases = 0;
while(mask) {
@@ -222,13 +216,14 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
*/
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
+ unsigned int swz = GET_SWZ(swizzle, 0);
if (src == RC_PAIR_PRESUB_SRC) {
- return R300_ALU_ARGA_SRCP_X + swizzle;
+ return R300_ALU_ARGA_SRCP_X + swz;
}
- if (swizzle < 3)
- return swizzle + 3*src;
+ if (swz < 3)
+ return swz + 3*src;
- switch(swizzle) {
+ switch(swz) {
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index e0d349b98c..9286733635 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -78,12 +78,32 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user)
}
}
+static int radeon_saturate_output(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT)
+ return 0;
+
+ inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+ return 1;
+}
+
void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{
int is_r500 = c->Base.is_r500;
int opt = !c->Base.disable_optimizations;
+ int sat_out = c->state.frag_clamp;
/* Lists of instruction transformations. */
+ struct radeon_program_transformation saturate_output[] = {
+ { &radeon_saturate_output, c },
+ { 0, 0 }
+ };
+
struct radeon_program_transformation rewrite_tex[] = {
{ &radeonTransformTEX, c },
{ 0, 0 }
@@ -113,6 +133,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"unroll loops", 1, is_r500, rc_unroll_loops, NULL},
{"transform loops", 1, !is_r500, rc_transform_loops, NULL},
{"emulate branches", 1, !is_r500, rc_emulate_branches, NULL},
+ {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output},
{"transform TEX", 1, 1, rc_local_transform, rewrite_tex},
{"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500},
{"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300},
@@ -124,7 +145,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
- {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
+ {"register rename", 1, !is_r500, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"register allocation", 1, opt, rc_pair_regalloc, NULL},
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 472029f63d..8ad2175ead 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -490,13 +490,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
continue;
if (info->HasDstReg) {
- /* Relative addressing of destination operands is not supported yet. */
- if (vpi->DstReg.RelAddr) {
- rc_error(&compiler->Base, "Vertex program does not support relative "
- "addressing of destination operands (yet).\n");
- return;
- }
-
/* Neither is Saturate. */
if (vpi->SaturateMode != RC_SATURATE_NONE) {
rc_error(&compiler->Base, "Vertex program does not support the Saturate "
@@ -668,7 +661,6 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
char hwtemps[RC_REGISTER_MAX_INDEX];
struct temporary_allocation * ta;
unsigned int i, j;
- struct rc_instruction *last_inst_src_reladdr = NULL;
memset(hwtemps, 0, sizeof(hwtemps));
@@ -693,28 +685,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
}
}
- /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up.
- * For src temporaries, save the last instruction which uses relative addressing. */
- for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
- if (opcode->HasDstReg)
- if (inst->U.I.DstReg.RelAddr)
- return;
-
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
- inst->U.I.SrcReg[i].RelAddr) {
- last_inst_src_reladdr = inst;
- }
- }
- }
-
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
sizeof(struct temporary_allocation) * num_orig_temps);
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
- /* Pass 3: Determine original temporary lifetimes */
+ /* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
/* Instructions inside of loops need to use the ENDLOOP
@@ -744,41 +719,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
- struct rc_instruction *last_read;
-
- /* From "last_inst_src_reladdr", "end_loop", and "inst",
- * select the instruction with the highest instruction index (IP).
- * Note that "end_loop", if available, has always a higher index than "inst". */
- if (last_inst_src_reladdr) {
- if (end_loop) {
- last_read = last_inst_src_reladdr->IP > end_loop->IP ?
- last_inst_src_reladdr : end_loop;
- } else {
- last_read = last_inst_src_reladdr->IP > inst->IP ?
- last_inst_src_reladdr : inst;
- }
- } else {
- last_read = end_loop ? end_loop : inst;
- }
-
- ta[inst->U.I.SrcReg[i].Index].LastRead = last_read;
+ ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst;
}
}
}
- /* Pass 4: Register allocation */
+ /* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) {
- for (i = 0; i < opcode->NumSrcRegs; ++i) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
- unsigned int orig = inst->U.I.SrcReg[i].Index;
- inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
- if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = 0;
- }
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = 0;
}
}
@@ -792,12 +748,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user)
break;
}
ta[orig].Allocated = 1;
- if (last_inst_src_reladdr &&
- last_inst_src_reladdr->IP > inst->IP) {
- ta[orig].HwTemp = orig;
- } else {
- ta[orig].HwTemp = j;
- }
+ ta[orig].HwTemp = j;
hwtemps[ta[orig].HwTemp] = 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index ef81be48f7..140eeed3de 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -77,9 +77,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
return 0;
- if (reg.Negate)
- reg.Negate ^= RC_MASK_XYZW;
-
for(i = 0; i < 4; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
if (swz == RC_SWIZZLE_UNUSED) {
@@ -103,9 +100,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 0;
} else {
/* ALU instructions support almost everything */
- if (reg.Abs)
- return 1;
-
relevant = 0;
for(i = 0; i < 3; ++i) {
unsigned int swz = GET_SWZ(reg.Swizzle, i);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 5da82d90f6..301b444669 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -170,7 +170,7 @@ static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
unsigned int t = inst->Alpha.Arg[i].Source;
- t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+ t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
@@ -372,7 +372,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
| (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
- | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+ | R500_TEX_SEM_ACQUIRE;
if (inst->TexSrcTarget == RC_TEXTURE_RECT)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index b69e81698a..35360aa70f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -31,6 +31,9 @@
#define R300_PFS_NUM_TEMP_REGS 32
#define R300_PFS_NUM_CONST_REGS 32
+#define R400_PFS_MAX_ALU_INST 512
+#define R400_PFS_MAX_TEX_INST 512
+
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
@@ -170,6 +173,8 @@ struct r300_fragment_program_external_state {
* RC_STATE_R300_TEXSCALE_FACTOR. */
unsigned clamp_and_scale_before_fetch : 1;
} unit[16];
+
+ unsigned frag_clamp:1;
};
@@ -187,24 +192,29 @@ struct r300_fragment_program_node {
*/
struct r300_fragment_program_code {
struct {
- int length; /**< total # of texture instructions used */
- uint32_t inst[R300_PFS_MAX_TEX_INST];
+ unsigned int length; /**< total # of texture instructions used */
+ uint32_t inst[R400_PFS_MAX_TEX_INST];
} tex;
struct {
- int length; /**< total # of ALU instructions used */
+ unsigned int length; /**< total # of ALU instructions used */
struct {
uint32_t rgb_inst;
uint32_t rgb_addr;
uint32_t alpha_inst;
uint32_t alpha_addr;
- } inst[R300_PFS_MAX_ALU_INST];
+ uint32_t r400_ext_addr;
+ } inst[R400_PFS_MAX_ALU_INST];
} alu;
uint32_t config; /* US_CONFIG */
uint32_t pixsize; /* US_PIXSIZE */
uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t r400_code_offset_ext; /* US_CODE_EXT */
uint32_t code_addr[4]; /* US_CODE_ADDR */
+ /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+ * for r400 cards */
+ unsigned int r390_mode:1;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 65548604bc..79cd7996f7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -373,9 +373,11 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+ info = rc_get_opcode_info(tmp->U.I.Opcode);
+ if (info->Opcode == RC_OPCODE_BEGIN_TEX)
+ continue;
if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
s->num_presub_ops++;
- info = rc_get_opcode_info(tmp->U.I.Opcode);
} else {
if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
s->num_presub_ops++;
@@ -402,11 +404,11 @@ static void print_stats(struct radeon_compiler * c)
{
struct rc_program_stats s;
- rc_get_stats(c, &s);
-
- if (s.num_insts < 4)
+ if (c->initial_num_insts <= 5)
return;
+ rc_get_stats(c, &s);
+
switch (c->type) {
case RC_VERTEX_PROGRAM:
fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n"
@@ -461,6 +463,11 @@ void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pa
/* Executes a list of compiler passes given in the parameter 'list'. */
void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
{
+ struct rc_program_stats s;
+
+ rc_get_stats(c, &s);
+ c->initial_num_insts = s.num_insts;
+
if (c->Debug & RC_DBG_LOG) {
fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
rc_print_program(&c->Program);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index e663339589..2d8e415f35 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -50,6 +50,7 @@ struct radeon_compiler {
char * ErrorMsg;
/* Hardware specification. */
+ unsigned is_r400:1;
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
@@ -57,6 +58,7 @@ struct radeon_compiler {
unsigned max_temp_regs;
unsigned max_constants;
int max_alu_insts;
+ unsigned max_tex_insts;
/* Whether to remove unused constants and empty holes in constant space. */
unsigned remove_unused_constants:1;
@@ -70,6 +72,8 @@ struct radeon_compiler {
/*@}*/
struct emulate_loop_state loop_state;
+
+ unsigned initial_num_insts; /* Number of instructions at start. */
};
void rc_init(struct radeon_compiler * c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index bf393a9fb1..15ec4418cb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -55,6 +55,24 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
return GET_SWZ(swz, idx);
}
+/**
+ * The purpose of this function is to standardize the number channels used by
+ * swizzles. All swizzles regardless of what instruction they are a part of
+ * should have 4 channels initialized with values.
+ * @param channels The number of channels in initial_value that have a
+ * meaningful value.
+ * @return An initialized swizzle that has all of the unused channels set to
+ * RC_SWIZZLE_UNUSED.
+ */
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels)
+{
+ unsigned int i;
+ for (i = channels; i < 4; i++) {
+ SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED);
+ }
+ return initial_value;
+}
+
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
@@ -147,13 +165,17 @@ unsigned int rc_src_reads_dst_mask(
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
-unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels)
+/**
+ * @return A bit mask specifying whether this swizzle will select from an RGB
+ * source, an Alpha source, or both.
+ */
+unsigned int rc_source_type_swz(unsigned int swizzle)
{
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
unsigned int ret = RC_SOURCE_NONE;
- for(chan = 0; chan < channels; chan++) {
+ for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz == RC_SWIZZLE_W) {
ret |= RC_SOURCE_ALPHA;
@@ -202,7 +224,7 @@ static void can_use_presub_read_cb(
if (d->RemoveSrcs[i].File == file
&& d->RemoveSrcs[i].Index == index) {
src_type &=
- ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4);
+ ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle);
}
}
@@ -223,7 +245,6 @@ unsigned int rc_inst_can_use_presub(
{
struct can_use_presub_data d;
unsigned int num_presub_srcs;
- unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
@@ -253,13 +274,7 @@ unsigned int rc_inst_can_use_presub(
num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
- if ((presub_src_type & RC_SOURCE_RGB)
- && d.RGBCount + num_presub_srcs > 3) {
- return 0;
- }
-
- if ((presub_src_type & RC_SOURCE_ALPHA)
- && d.AlphaCount + num_presub_srcs > 3) {
+ if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) {
return 0;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index 461ab9ffb1..dd0f6c6615 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -10,6 +10,8 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz);
rc_swizzle get_swz(unsigned int swz, rc_swizzle idx);
+unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels);
+
unsigned int combine_swizzles4(unsigned int src,
rc_swizzle swz_x, rc_swizzle swz_y,
rc_swizzle swz_z, rc_swizzle swz_w);
@@ -32,7 +34,7 @@ unsigned int rc_src_reads_dst_mask(
unsigned int dst_idx,
unsigned int dst_mask);
-unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels);
+unsigned int rc_source_type_swz(unsigned int swizzle);
unsigned int rc_source_type_mask(unsigned int mask);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index d0a64d936e..c080d5aecc 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -140,14 +140,8 @@ static void pair_sub_for_all_args(
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int src_type;
- unsigned int channels = 0;
- if (&fullinst->U.P.RGB == sub)
- channels = 3;
- else if (&fullinst->U.P.Alpha == sub)
- channels = 1;
-
- assert(channels > 0);
- src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels);
+
+ src_type = rc_source_type_swz(sub->Arg[i].Swizzle);
if (src_type == RC_SOURCE_NONE)
continue;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index 87906f37b1..678e147588 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -160,12 +160,8 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
if (pused) {
usedmask = *pused & inst->U.I.DstReg.WriteMask;
- if (!inst->U.I.DstReg.RelAddr)
- *pused &= ~usedmask;
+ *pused &= ~usedmask;
}
-
- if (inst->U.I.DstReg.RelAddr)
- mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
insts->WriteMask |= usedmask;
@@ -219,22 +215,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
{
struct deadcode_state s;
unsigned int nr_instructions;
- unsigned has_temp_reladdr_src = 0;
rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
unsigned int ip;
- /* Give up if there is relative addressing of destination operands. */
- for(struct rc_instruction * inst = c->Program.Instructions.Next;
- inst != &c->Program.Instructions;
- inst = inst->Next) {
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (opcode->HasDstReg &&
- inst->U.I.DstReg.WriteMask &&
- inst->U.I.DstReg.RelAddr) {
- return;
- }
- }
-
memset(&s, 0, sizeof(s));
s.C = c;
@@ -321,32 +304,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
-
- if (!has_temp_reladdr_src) {
- for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
- if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
- inst->U.I.SrcReg[i].RelAddr) {
- /* If there is a register read from a temporary file with relative addressing,
- * mark all preceding written registers as used. */
- for (struct rc_instruction *ptr = inst->Prev;
- ptr != &c->Program.Instructions;
- ptr = ptr->Prev) {
- opcode = rc_get_opcode_info(ptr->U.I.Opcode);
- if (opcode->HasDstReg &&
- ptr->U.I.DstReg.File == RC_FILE_TEMPORARY &&
- ptr->U.I.DstReg.WriteMask) {
- mark_used(&s,
- ptr->U.I.DstReg.File,
- ptr->U.I.DstReg.Index,
- ptr->U.I.DstReg.WriteMask);
- }
- }
-
- has_temp_reladdr_src = 1;
- break;
- }
- }
- }
}
update_instruction(&s, inst);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
index a0f7bd8174..133a9f72ec 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c,
mov->U.I.DstReg.Index = tempreg;
mov->U.I.DstReg.WriteMask = split.Phase[phase];
mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+ mov->U.I.PreSub = inst->U.I.PreSub;
phase_refmask = 0;
for(unsigned int chan = 0; chan < 4; ++chan) {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 44f4c0fbdc..c4e6a5e0a1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -139,7 +139,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
- inst_mov->U.I.DstReg.RelAddr ||
inst_mov->U.I.WriteALUResult ||
inst_mov->U.I.SaturateMode)
return;
@@ -312,7 +311,18 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
struct rc_constant * constant;
struct rc_src_register newsrc;
int have_real_reference;
+ unsigned int chan;
+
+ /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
+ for (chan = 0; chan < 4; ++chan)
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
+ break;
+ if (chan == 4) {
+ inst->U.I.SrcReg[src].File = RC_FILE_NONE;
+ continue;
+ }
+ /* Convert immediates to swizzles. */
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
inst->U.I.SrcReg[src].RelAddr ||
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
@@ -326,7 +336,7 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
newsrc = inst->U.I.SrcReg[src];
have_real_reference = 0;
- for(unsigned int chan = 0; chan < 4; ++chan) {
+ for (chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
unsigned int newswz;
float imm;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 9beb5d6357..8e10813ff0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -365,8 +365,8 @@ static int merge_presub_sources(
for(arg = 0; arg < info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
- if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
- 3) & type)) {
+ if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle)
+ & type)) {
continue;
}
@@ -423,11 +423,11 @@ static int destructive_merge_instructions(
unsigned int index = 0;
int source;
- if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+ if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) {
srcrgb = 1;
file = alpha->RGB.Src[oldsrc].File;
index = alpha->RGB.Src[oldsrc].Index;
- } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+ } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) {
srcalpha = 1;
file = alpha->Alpha.Src[oldsrc].File;
index = alpha->Alpha.Src[oldsrc].Index;
@@ -544,18 +544,12 @@ static void rgb_to_alpha_remap (
{
int new_src_index;
unsigned int i;
- struct rc_pair_instruction_source * old_src =
- rc_pair_get_src(&inst->U.P, arg);
- if (!old_src) {
- return;
- }
for (i = 0; i < 3; i++) {
if (get_swz(arg->Swizzle, i) == old_swz) {
SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
}
}
- memset(old_src, 0, sizeof(struct rc_pair_instruction_source));
new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
old_file, new_index);
/* This conversion is not possible, we must have made a mistake in
@@ -728,7 +722,8 @@ static int convert_rgb_to_alpha(
for (j = 0; j < 3; j++) {
unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
if (swz != RC_SWIZZLE_UNUSED) {
- pair_inst->Alpha.Arg[i].Swizzle = swz;
+ pair_inst->Alpha.Arg[i].Swizzle =
+ rc_init_swizzle(swz, 1);
break;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index fc05366f50..9e03eb1aca 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -28,6 +28,7 @@
#include "radeon_program_pair.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
/**
@@ -213,16 +214,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
+ unsigned int srcmask = 0;
int j;
/* We don't care about the alpha channel here. We only
* want the part of the swizzle that writes to rgb,
* since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz < 3)
+
+ if (swz < RC_SWIZZLE_W)
srcrgb = 1;
- else if (swz < 4)
+ else if (swz == RC_SWIZZLE_W)
srcalpha = 1;
+
+ if (swz < RC_SWIZZLE_UNUSED)
+ srcmask |= 1 << j;
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
@@ -232,9 +238,10 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
return;
}
pair->RGB.Arg[i].Source = source;
- pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Swizzle =
+ rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+ pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
}
if (needalpha) {
unsigned int srcrgb = 0;
@@ -252,7 +259,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
return;
}
pair->Alpha.Arg[i].Source = source;
- pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
}
@@ -302,12 +309,6 @@ static void check_opcode_support(struct r300_fragment_program_compiler *c,
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
if (opcode->HasDstReg) {
- if (inst->DstReg.RelAddr) {
- rc_error(&c->Base, "Fragment program does not support relative addressing "
- "of destination operands.\n");
- return;
- }
-
if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index df6c94b35f..a07f6b63c6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -56,11 +56,7 @@ struct rc_src_register {
struct rc_dst_register {
unsigned int File:3;
-
- /** Negative values may be used for relative addressing. */
- signed int Index:(RC_REGISTER_INDEX_BITS+1);
- unsigned int RelAddr:1;
-
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
unsigned int WriteMask:4;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 58977a40c7..9fc991166a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -91,7 +91,6 @@ static struct rc_dst_register dstregtmpmask(int index, int mask)
dst.File = RC_FILE_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
- dst.RelAddr = 0;
return dst;
}
@@ -689,11 +688,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
&constant_swizzle);
/* MOV dst, src */
+ dst.WriteMask = RC_MASK_XYZW;
emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
dst,
inst->U.I.SrcReg[0]);
- /* MAX dst.z, src, 0.00...001 */
+ /* MAX dst.y, src, 0.00...001 */
emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
dstregtmpmask(dst.Index, RC_MASK_Y),
srcreg(RC_FILE_TEMPORARY, dst.Index),
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 5905d26e52..68874795b8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -211,27 +211,9 @@ struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg)
{
- unsigned int i, type;
- unsigned int channels = 0;
+ unsigned int type;
- for(i = 0; i < 3; i++) {
- if (arg == pair_inst->RGB.Arg + i) {
- channels = 3;
- break;
- }
- }
-
- if (channels == 0) {
- for (i = 0; i < 3; i++) {
- if (arg == pair_inst->Alpha.Arg + i) {
- channels = 1;
- break;
- }
- }
- }
-
- assert(channels > 0);
- type = rc_source_type_swz(arg->Swizzle, channels);
+ type = rc_source_type_swz(arg->Swizzle);
if (type & RC_SOURCE_RGB) {
return &pair_inst->RGB.Src[arg->Source];
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index ccf7a0070c..6708b16d29 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -63,7 +63,7 @@ struct rc_pair_instruction_source {
struct rc_pair_instruction_arg {
unsigned int Source:2;
- unsigned int Swizzle:9;
+ unsigned int Swizzle:12;
unsigned int Abs:1;
unsigned int Negate:1;
};
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
index ae13f6742f..390d131946 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -110,7 +110,7 @@ static void rc_print_mask(FILE * f, unsigned int mask)
static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
{
- rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ rc_print_register(f, dst.File, dst.Index, 0);
if (dst.WriteMask != RC_MASK_XYZW) {
fprintf(f, ".");
rc_print_mask(f, dst.WriteMask);
@@ -379,7 +379,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
else
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
fprintf(f,".%c%s",
- rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+ rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs);
}
fprintf(f, "\n");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index f9d9f34b6a..1cf77d9cf7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -33,14 +33,14 @@
/* Series of transformations to be done on textures. */
static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
- int tmu)
+ int tmu)
{
struct rc_src_register reg = { 0, };
if (compiler->enable_shadow_ambient) {
reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
- RC_STATE_SHADOW_AMBIENT, tmu);
+ RC_STATE_SHADOW_AMBIENT, tmu);
reg.Swizzle = RC_SWIZZLE_WWWW;
} else {
reg.File = RC_FILE_NONE;
@@ -149,14 +149,11 @@ int radeonTransformTEX(
return 1;
} else {
- rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
struct rc_instruction * inst_rcp = NULL;
- struct rc_instruction * inst_mad;
- struct rc_instruction * inst_cmp;
+ struct rc_instruction *inst_mul, *inst_add, *inst_cmp;
unsigned tmp_texsample;
unsigned tmp_sum;
- unsigned tmp_recip_w = 0;
- int pass, fail, tex;
+ int pass, fail;
/* Save the output register. */
struct rc_dst_register output_reg = inst->U.I.DstReg;
@@ -167,63 +164,68 @@ int radeonTransformTEX(
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- if (inst->U.I.Opcode == RC_OPCODE_TXP) {
- tmp_recip_w = rc_find_free_temporary(c);
+ tmp_sum = rc_find_free_temporary(c);
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
/* Compute 1/W. */
inst_rcp = rc_insert_new_instruction(c, inst);
inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+ inst_rcp->U.I.DstReg.Index = tmp_sum;
inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_rcp->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3));
}
- /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */
- tmp_sum = rc_find_free_temporary(c);
- inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
- inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mad->U.I.DstReg.Index = tmp_sum;
- inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_mad->U.I.SrcReg[0].Swizzle =
+ /* Divide Z by W (if it's TXP) and saturate. */
+ inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+ inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = tmp_sum;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[0].Swizzle =
RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2));
if (inst->U.I.Opcode == RC_OPCODE_TXP) {
- inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
- inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
- tex = 2;
- } else {
- inst_mad->U.I.Opcode = RC_OPCODE_ADD;
- tex = 1;
- }
- inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
- inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
-
- /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
- if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
- comparefunc = RC_COMPARE_FUNC_GEQUAL;
- } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
- comparefunc = RC_COMPARE_FUNC_LESS;
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = tmp_sum;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
}
- /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+ /* Add the depth texture value. */
+ inst_add = rc_insert_new_instruction(c, inst_mul);
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = tmp_sum;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[0].Index = tmp_sum;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[1].Index = tmp_texsample;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+
+ /* Note that SrcReg[0] is r, SrcReg[1] is tex and:
* LESS: r < tex <=> -tex+r < 0
* GEQUAL: r >= tex <=> not (-tex+r < 0)
* GREATER: r > tex <=> tex-r < 0
* LEQUAL: r <= tex <=> not ( tex-r < 0)
- *
- * This negates either r or tex: */
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
- inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
+ * EQUAL: GEQUAL
+ * NOTEQUAL:LESS
+ */
+
+ /* This negates either r or tex: */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL ||
+ comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL)
+ inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW;
else
- inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+ inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
/* This negates the whole expresion: */
- if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER ||
+ comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
pass = 1;
fail = 2;
} else {
@@ -231,16 +233,19 @@ int radeonTransformTEX(
fail = 1;
}
- inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ inst_cmp = rc_insert_new_instruction(c, inst_add);
inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
inst_cmp->U.I.DstReg = output_reg;
inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+ inst_cmp->U.I.SrcReg[0].Swizzle =
+ combine_swizzles(RC_SWIZZLE_WWWW,
+ compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle);
inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
- assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
+ assert(tmp_texsample != tmp_sum);
}
}
@@ -420,17 +425,21 @@ int radeonTransformTEX(
scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR);
}
- /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
+ /* Cannot write texture to output registers or with saturate (all chips),
+ * or with masks (non-r500). */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+ inst->U.I.SaturateMode ||
(!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode;
inst_mov->U.I.DstReg = inst->U.I.DstReg;
inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+ inst->U.I.SaturateMode = 0;
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 88165f7895..5bd19c0b9c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user)
struct rc_reader_data reader_data;
unsigned char * used;
+ /* XXX Remove this once the register allocation works with flow control. */
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
+ return;
+ }
+
used_length = 2 * rc_recompute_ips(c);
used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
memset(used, 0, sizeof(unsigned char) * used_length);
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
index 9fd8e8fde5..14e60866d9 100644
--- a/src/mesa/drivers/dri/r300/r300_blit.c
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -63,7 +63,6 @@ static void create_vertex_program(struct r300_context *r300)
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = VERT_RESULT_HPOS;
- inst->U.I.DstReg.RelAddr = 0;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst->U.I.SrcReg[0].Abs = 0;
inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
@@ -76,7 +75,6 @@ static void create_vertex_program(struct r300_context *r300)
inst->U.I.Opcode = RC_OPCODE_MOV;
inst->U.I.DstReg.File = RC_FILE_OUTPUT;
inst->U.I.DstReg.Index = VERT_RESULT_TEX0;
- inst->U.I.DstReg.RelAddr = 0;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
inst->U.I.SrcReg[0].Abs = 0;
inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
@@ -131,6 +129,7 @@ static void create_fragment_program(struct r300_context *r300)
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+ compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32;
compiler.code = &r300->blit.fp_code;
compiler.AllocateHwInputs = fp_allocate_hw_inputs;
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index c288834d24..0d8bd4fc70 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/extensions.h"
#include "main/bufferobj.h"
#include "main/texobj.h"
+#include "main/mfeatures.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 4e457b51eb..a0a26f1b38 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -226,6 +226,7 @@ static void translate_fragment_program(struct gl_context *ctx, struct r300_fragm
compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+ compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32;
compiler.OutputDepth = FRAG_RESULT_DEPTH;
memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
compiler.OutputColor[0] = FRAG_RESULT_COLOR;
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f7705b0f6f..2b9d85fae8 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
# define R300_PFS_CNTL_TEX_END_SHIFT 18
# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
+# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
+# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
+# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28
+# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28)
/* gap */
@@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TEX_SIZE_MASK (31 << 17)
# define R300_RGBA_OUT (1 << 22)
# define R300_W_OUT (1 << 23)
+# define R400_TEX_START_MSB_SHIFT 24
+# define R400_TEX_START_MSG_MASK (0xf << 24)
+# define R400_TEX_SIZE_MSB_SHIFT 28
+# define R400_TEX_SIZE_MSG_MASK (0xf << 28)
/* TEX
* As far as I can tell, texture instructions cannot write into output
@@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_TEX_OP_TXP 3
# define R300_TEX_OP_TXB 4
# define R300_TEX_INST_MASK (7 << 15)
+# define R400_SRC_ADDR_EXT_BIT (1 << 19)
+# define R400_DST_ADDR_EXT_BIT (1 << 20)
/* Output format from the unfied shader */
#define R300_US_OUT_FMT 0x46A4
@@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_ALU_OUTA_CLAMP (1 << 30)
/* END: Fragment program instruction set */
+/* R4xx extended fragment shader registers. */
+#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
+# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x))
+# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
+# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4))
+# define R400_ADDRD_EXT_A_MSB_BIT 0x80
+
+#define R400_US_CODE_BANK 0x46b8
+# define R400_BANK_SHIFT 0
+# define R400_BANK_MASK 0xf
+# define R400_R390_MODE_ENABLE (1 << 4)
+#define R400_US_CODE_EXT 0x46bc
+# define R400_ALU_OFFSET_MSB_SHIFT 0
+# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0)
+# define R400_ALU_SIZE_MSB_SHIFT 3
+# define R400_ALU_SIZE_MSB_MASK (0x7 << 3)
+# define R400_ALU_START0_MSB_SHIFT 6
+# define R400_ALU_START0_MSB_MASK (0x7 << 6)
+# define R400_ALU_SIZE0_MSB_SHIFT 9
+# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9)
+# define R400_ALU_START1_MSB_SHIFT 12
+# define R400_ALU_START1_MSB_MASK (0x7 << 12)
+# define R400_ALU_SIZE1_MSB_SHIFT 15
+# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15)
+# define R400_ALU_START2_MSB_SHIFT 18
+# define R400_ALU_START2_MSB_MASK (0x7 << 18)
+# define R400_ALU_SIZE2_MSB_SHIFT 21
+# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21)
+# define R400_ALU_START3_MSB_SHIFT 24
+# define R400_ALU_START3_MSB_MASK (0x7 << 24)
+# define R400_ALU_SIZE3_MSB_SHIFT 27
+# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27)
+/* END: R4xx extended fragment shader registers. */
+
/* Fog: Fog Blending Enable */
#define R300_FG_FOG_BLEND 0x4bc0
# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index ab8c1df5f7..51989c6b22 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -214,18 +214,18 @@ static void r300SetBlendState(struct gl_context * ctx)
(R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
int eqnA = R300_COMB_FCN_ADD_CLAMP;
- if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ if (_mesa_rgba_logicop_enabled(ctx) || !ctx->Color.BlendEnabled) {
r300SetBlendCntl(r300, func, eqn, 0, func, eqn);
return;
}
func =
- (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) <<
- R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB,
+ (blend_factor(ctx->Color.Blend[0].SrcRGB, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstRGB,
GL_FALSE) <<
R300_DST_BLEND_SHIFT);
- switch (ctx->Color.BlendEquationRGB) {
+ switch (ctx->Color.Blend[0].EquationRGB) {
case GL_FUNC_ADD:
eqn = R300_COMB_FCN_ADD_CLAMP;
break;
@@ -253,17 +253,17 @@ static void r300SetBlendState(struct gl_context * ctx)
default:
fprintf(stderr,
"[%s:%u] Invalid RGB blend equation (0x%04x).\n",
- __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB);
return;
}
funcA =
- (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) <<
- R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA,
+ (blend_factor(ctx->Color.Blend[0].SrcA, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstA,
GL_FALSE) <<
R300_DST_BLEND_SHIFT);
- switch (ctx->Color.BlendEquationA) {
+ switch (ctx->Color.Blend[0].EquationA) {
case GL_FUNC_ADD:
eqnA = R300_COMB_FCN_ADD_CLAMP;
break;
@@ -291,7 +291,7 @@ static void r300SetBlendState(struct gl_context * ctx)
default:
fprintf(stderr,
"[%s:%u] Invalid A blend equation (0x%04x).\n",
- __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA);
return;
}
@@ -335,7 +335,7 @@ static void r300SetLogicOpState(struct gl_context *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
R300_STATECHANGE(r300, rop);
- if (RGBA_LOGICOP_ENABLED(ctx)) {
+ if (_mesa_rgba_logicop_enabled(ctx)) {
r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE |
translate_logicop(ctx->Color.LogicOp);
} else {
@@ -349,7 +349,7 @@ static void r300SetLogicOpState(struct gl_context *ctx)
*/
static void r300LogicOpcode(struct gl_context *ctx, GLenum logicop)
{
- if (RGBA_LOGICOP_ENABLED(ctx))
+ if (_mesa_rgba_logicop_enabled(ctx))
r300SetLogicOpState(ctx);
}
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index de66293999..f930b4d06b 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
+#include "main/mfeatures.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texstore.h"
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
index 471a3723cb..232603ece5 100644
--- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
@@ -128,7 +128,6 @@ static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_regi
{
dest->File = translate_register_file(src->File);
dest->Index = src->Index;
- dest->RelAddr = src->RelAddr;
dest->WriteMask = src->WriteMask;
}