summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c122
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h15
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c370
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c331
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c369
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c115
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c103
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c4
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c4
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c4
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c21
25 files changed, 1128 insertions, 499 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index 8be32ea91f..1db8678e89 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -76,6 +76,9 @@ static void use_temporary(struct r300_fragment_program_code *code, unsigned int
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
} else if (src.File == RC_FILE_TEMPORARY) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 2d28b06539..05d3da8a10 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -94,6 +94,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
*/
static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
+ const struct swizzle_data* sd;
unsigned int relevant;
int j;
@@ -127,7 +128,8 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
return 0;
- if (!lookup_native_swizzle(reg.Swizzle))
+ sd = lookup_native_swizzle(reg.Swizzle);
+ if (!sd || (reg.File == RC_FILE_PRESUB && sd->srcp_stride == 0))
return 0;
return 1;
@@ -201,7 +203,7 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
- if (!sd) {
+ if (!sd || (src == RC_PAIR_PRESUB_SRC && sd->srcp_stride == 0)) {
fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 2f130198d3..7b9c316794 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -137,7 +137,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
* many texture indirections.*/
- {"register rename", 1, !is_r500, rc_rename_regs, NULL},
+ {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"register allocation", 1, opt, rc_pair_regalloc, NULL},
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 6f101c68eb..5da82d90f6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -45,9 +45,6 @@
#include "radeon_program_pair.h"
-#define MAX_BRANCH_DEPTH_FULL 32
-#define MAX_BRANCH_DEPTH_PARTIAL 4
-
#define PROG_CODE \
struct r500_fragment_program_code *code = &c->code->code.r500
@@ -200,6 +197,9 @@ static void use_temporary(struct r500_fragment_program_code* code, unsigned int
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
{
+ if (!src.Used)
+ return 0;
+
if (src.File == RC_FILE_CONSTANT) {
return src.Index | 0x100;
} else if (src.File == RC_FILE_TEMPORARY) {
@@ -506,7 +506,7 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
break;
}
case RC_OPCODE_IF:
- if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
+ if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
return;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index cfb6df2cd7..b69e81698a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -34,6 +34,8 @@
#define R500_PFS_MAX_INST 512
#define R500_PFS_NUM_TEMP_REGS 128
#define R500_PFS_NUM_CONST_REGS 256
+#define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
+#define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
index 97f4c75849..2b8d284ce9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@@ -31,6 +31,8 @@
#include "radeon_compiler_util.h"
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
*/
unsigned int rc_swizzle_to_writemask(unsigned int swz)
@@ -59,3 +61,123 @@ unsigned int rc_src_reads_dst_mask(
}
return dst_mask & rc_swizzle_to_writemask(src_swz);
}
+
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels)
+{
+ unsigned int chan;
+ unsigned int swz = RC_SWIZZLE_UNUSED;
+ unsigned int ret = RC_SOURCE_NONE;
+
+ for(chan = 0; chan < channels; chan++) {
+ swz = GET_SWZ(swizzle, chan);
+ if (swz == RC_SWIZZLE_W) {
+ ret |= RC_SOURCE_ALPHA;
+ } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+ || swz == RC_SWIZZLE_Z) {
+ ret |= RC_SOURCE_RGB;
+ }
+ }
+ return ret;
+}
+
+unsigned int rc_source_type_mask(unsigned int mask)
+{
+ unsigned int ret = RC_SOURCE_NONE;
+
+ if (mask & RC_MASK_XYZ)
+ ret |= RC_SOURCE_RGB;
+
+ if (mask & RC_MASK_W)
+ ret |= RC_SOURCE_ALPHA;
+
+ return ret;
+}
+
+struct can_use_presub_data {
+ struct rc_src_register RemoveSrcs[3];
+ unsigned int RGBCount;
+ unsigned int AlphaCount;
+};
+
+static void can_use_presub_read_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct can_use_presub_data * d = userdata;
+ unsigned int src_type = rc_source_type_mask(mask);
+ unsigned int i;
+
+ if (file == RC_FILE_NONE)
+ return;
+
+ for(i = 0; i < 3; i++) {
+ if (d->RemoveSrcs[i].File == file
+ && d->RemoveSrcs[i].Index == index) {
+ src_type &=
+ ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4);
+ }
+ }
+
+ if (src_type & RC_SOURCE_RGB)
+ d->RGBCount++;
+
+ if (src_type & RC_SOURCE_ALPHA)
+ d->AlphaCount++;
+}
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1)
+{
+ struct can_use_presub_data d;
+ unsigned int num_presub_srcs;
+ unsigned int presub_src_type = rc_source_type_mask(presub_writemask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (presub_op == RC_PRESUB_NONE) {
+ return 1;
+ }
+
+ if (info->HasTexture) {
+ return 0;
+ }
+
+ /* We can't use more than one presubtract value in an
+ * instruction, unless the two prsubtract operations
+ * are the same and read from the same registers.
+ * XXX For now we will limit instructions to only one presubtract
+ * value.*/
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ return 0;
+ }
+
+ memset(&d, 0, sizeof(d));
+ d.RemoveSrcs[0] = replace_reg;
+ d.RemoveSrcs[1] = presub_src0;
+ d.RemoveSrcs[2] = presub_src1;
+
+ rc_for_all_reads_mask(inst, can_use_presub_read_cb, &d);
+
+ num_presub_srcs = rc_presubtract_src_reg_count(presub_op);
+
+ if ((presub_src_type & RC_SOURCE_RGB)
+ && d.RGBCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ if ((presub_src_type & RC_SOURCE_ALPHA)
+ && d.AlphaCount + num_presub_srcs > 3) {
+ return 0;
+ }
+
+ return 1;
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
index 1a14e7cb0e..e50dfbd4fb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h
@@ -3,6 +3,9 @@
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
+struct rc_instruction;
+struct rc_src_register;
+
unsigned int rc_swizzle_to_writemask(unsigned int swz);
unsigned int rc_src_reads_dst_mask(
@@ -13,4 +16,16 @@ unsigned int rc_src_reads_dst_mask(
unsigned int dst_idx,
unsigned int dst_mask);
+unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels);
+
+unsigned int rc_source_type_mask(unsigned int mask);
+
+unsigned int rc_inst_can_use_presub(
+ struct rc_instruction * inst,
+ rc_presubtract_op presub_op,
+ unsigned int presub_writemask,
+ struct rc_src_register replace_reg,
+ struct rc_src_register presub_src0,
+ struct rc_src_register presub_src1);
+
#endif /* RADEON_PROGRAM_UTIL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index 5927498818..9df07edf2b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -139,7 +139,46 @@ static void pair_sub_for_all_args(
const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
- cb(userdata, fullinst, &sub->Arg[i]);
+ unsigned int src_type;
+ unsigned int channels = 0;
+ if (&fullinst->U.P.RGB == sub)
+ channels = 3;
+ else if (&fullinst->U.P.Alpha == sub)
+ channels = 1;
+
+ assert(channels > 0);
+ src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels);
+
+ if (src_type == RC_SOURCE_NONE)
+ continue;
+
+ if (sub->Arg[i].Source == RC_PAIR_PRESUB_SRC) {
+ unsigned int presub_type;
+ unsigned int presub_src_count;
+ struct rc_pair_instruction_source * src_array;
+ unsigned int j;
+ if (src_type & RC_SOURCE_RGB) {
+ presub_type = fullinst->
+ U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.RGB.Src;
+ } else {
+ presub_type = fullinst->
+ U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index;
+ src_array = fullinst->U.P.Alpha.Src;
+ }
+ presub_src_count
+ = rc_presubtract_src_reg_count(presub_type);
+ for(j = 0; j < presub_src_count; j++) {
+ cb(userdata, fullinst, &sub->Arg[i],
+ &src_array[j]);
+ }
+ } else {
+ struct rc_pair_instruction_source * src =
+ rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
+ if (src) {
+ cb(userdata, fullinst, &sub->Arg[i], src);
+ }
+ }
}
}
@@ -430,12 +469,29 @@ static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
}
+struct branch_write_mask {
+ unsigned int IfWriteMask:4;
+ unsigned int ElseWriteMask:4;
+ unsigned int HasElse:1;
+};
+
+union get_readers_read_cb {
+ rc_read_src_fn I;
+ rc_pair_read_arg_fn P;
+};
+
struct get_readers_callback_data {
struct radeon_compiler * C;
struct rc_reader_data * ReaderData;
- rc_read_src_fn ReadCB;
+ rc_read_src_fn ReadNormalCB;
+ rc_pair_read_arg_fn ReadPairCB;
rc_read_write_mask_fn WriteCB;
+ rc_register_file DstFile;
+ unsigned int DstIndex;
+ unsigned int DstMask;
unsigned int AliveWriteMask;
+ /* For convenience, this is indexed starting at 1 */
+ struct branch_write_mask BranchMasks[R500_PFS_MAX_BRANCH_DEPTH_FULL + 1];
};
static void add_reader(
@@ -443,7 +499,7 @@ static void add_reader(
struct rc_reader_data * data,
struct rc_instruction * inst,
unsigned int mask,
- struct rc_src_register * src)
+ void * arg_or_src)
{
struct rc_reader * new;
memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
@@ -451,7 +507,74 @@ static void add_reader(
new = &data->Readers[data->ReaderCount++];
new->Inst = inst;
new->WriteMask = mask;
- new->Src = src;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ new->U.Src = arg_or_src;
+ } else {
+ new->U.Arg = arg_or_src;
+ }
+}
+
+static unsigned int get_readers_read_callback(
+ struct get_readers_callback_data * cb_data,
+ unsigned int has_rel_addr,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int swizzle)
+{
+ unsigned int shared_mask, read_mask;
+
+ if (has_rel_addr) {
+ cb_data->ReaderData->Abort = 1;
+ return RC_MASK_NONE;
+ }
+
+ shared_mask = rc_src_reads_dst_mask(file, index, swizzle,
+ cb_data->DstFile, cb_data->DstIndex, cb_data->AliveWriteMask);
+
+ if (shared_mask == RC_MASK_NONE)
+ return shared_mask;
+
+ /* If we make it this far, it means that this source reads from the
+ * same register written to by d->ReaderData->Writer. */
+
+ read_mask = rc_swizzle_to_writemask(swizzle);
+ if (cb_data->ReaderData->AbortOnRead & read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ /* XXX The behavior in this case should be configurable. */
+ if ((read_mask & cb_data->AliveWriteMask) != read_mask) {
+ cb_data->ReaderData->Abort = 1;
+ return shared_mask;
+ }
+
+ return shared_mask;
+}
+
+static void get_readers_pair_read_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int shared_mask;
+ struct get_readers_callback_data * d = userdata;
+
+ shared_mask = get_readers_read_callback(d,
+ 0 /*Pair Instructions don't use RelAddr*/,
+ src->File, src->Index, arg->Swizzle);
+
+ if (shared_mask == RC_MASK_NONE)
+ return;
+
+ if (d->ReadPairCB)
+ d->ReadPairCB(d->ReaderData, inst, arg, src);
+
+ if (d->ReaderData->Abort)
+ return;
+
+ add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, arg);
}
/**
@@ -464,36 +587,18 @@ static void get_readers_normal_read_callback(
struct rc_src_register * src)
{
struct get_readers_callback_data * d = userdata;
- unsigned int read_mask;
-
- if (src->RelAddr)
- d->ReaderData->Abort = 1;
+ unsigned int shared_mask;
- unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
- src->Swizzle,
- d->ReaderData->Writer->U.I.DstReg.File,
- d->ReaderData->Writer->U.I.DstReg.Index,
- d->AliveWriteMask);
+ shared_mask = get_readers_read_callback(d,
+ src->RelAddr, src->File, src->Index, src->Swizzle);
if (shared_mask == RC_MASK_NONE)
return;
+ /* The callback function could potentially clear d->ReaderData->Abort,
+ * so we need to call it before we return. */
+ if (d->ReadNormalCB)
+ d->ReadNormalCB(d->ReaderData, inst, src);
- /* If we make it this far, it means that this source reads from the
- * same register written to by d->ReaderData->Writer. */
-
- if (d->ReaderData->AbortOnRead) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- read_mask = rc_swizzle_to_writemask(src->Swizzle);
- /* XXX The behavior in this case should be configurable. */
- if ((read_mask & d->AliveWriteMask) != read_mask) {
- d->ReaderData->Abort = 1;
- return;
- }
-
- d->ReadCB(d->ReaderData, inst, src);
if (d->ReaderData->Abort)
return;
@@ -514,29 +619,132 @@ static void get_readers_write_callback(
{
struct get_readers_callback_data * d = userdata;
- if (index == d->ReaderData->Writer->U.I.DstReg.Index
- && file == d->ReaderData->Writer->U.I.DstReg.File) {
- unsigned int shared_mask = mask
- & d->ReaderData->Writer->U.I.DstReg.WriteMask;
- if (d->ReaderData->InElse) {
- if (shared_mask & d->AliveWriteMask) {
- /* We set AbortOnRead here because the
- * destination register of d->ReaderData->Writer
- * is written to in both the IF and the
- * ELSE block of this IF/ELSE statement.
- * This means that readers of this
- * destination register that follow this IF/ELSE
- * statement use the value of different
- * instructions depending on the control flow
- * decisions made by the program. */
- d->ReaderData->AbortOnRead = 1;
+ if (index == d->DstIndex && file == d->DstFile) {
+ unsigned int shared_mask = mask & d->DstMask;
+ d->ReaderData->AbortOnRead &= ~shared_mask;
+ d->AliveWriteMask &= ~shared_mask;
+ }
+
+ if(d->WriteCB)
+ d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+static void get_readers_for_single_write(
+ void * userdata,
+ struct rc_instruction * writer,
+ rc_register_file dst_file,
+ unsigned int dst_index,
+ unsigned int dst_mask)
+{
+ struct rc_instruction * tmp;
+ unsigned int branch_depth = 0;
+ struct get_readers_callback_data * d = userdata;
+
+ d->ReaderData->Writer = writer;
+ d->ReaderData->AbortOnRead = 0;
+ d->ReaderData->InElse = 0;
+ d->DstFile = dst_file;
+ d->DstIndex = dst_index;
+ d->DstMask = dst_mask;
+ d->AliveWriteMask = dst_mask;
+ memset(d->BranchMasks, 0, sizeof(d->BranchMasks));
+
+ if (!dst_mask)
+ return;
+
+ for(tmp = writer->Next; tmp != &d->C->Program.Instructions;
+ tmp = tmp->Next){
+ rc_opcode opcode = get_flow_control_inst(tmp);
+ switch(opcode) {
+ case RC_OPCODE_BGNLOOP:
+ /* XXX We can do better when we see a BGNLOOP if we
+ * add a flag called AbortOnWrite to struct
+ * rc_reader_data and leave it set until the next
+ * ENDLOOP. */
+ case RC_OPCODE_ENDLOOP:
+ /* XXX We can do better when we see an ENDLOOP by
+ * searching backwards from writer and looking for
+ * readers of writer's destination index. If we find a
+ * reader before we get to the BGNLOOP, we must abort
+ * unless there is another writer between that reader
+ * and the BGNLOOP. */
+ case RC_OPCODE_BRK:
+ case RC_OPCODE_CONT:
+ d->ReaderData->Abort = 1;
+ return;
+ case RC_OPCODE_IF:
+ branch_depth++;
+ if (branch_depth > R500_PFS_MAX_BRANCH_DEPTH_FULL) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+ d->BranchMasks[branch_depth].IfWriteMask =
+ d->AliveWriteMask;
+ break;
+ case RC_OPCODE_ELSE:
+ if (branch_depth == 0) {
+ d->ReaderData->InElse = 1;
+ } else {
+ unsigned int temp_mask = d->AliveWriteMask;
+ d->AliveWriteMask =
+ d->BranchMasks[branch_depth].IfWriteMask;
+ d->BranchMasks[branch_depth].ElseWriteMask =
+ temp_mask;
+ d->BranchMasks[branch_depth].HasElse = 1;
+ }
+ break;
+ case RC_OPCODE_ENDIF:
+ if (branch_depth == 0) {
+ d->ReaderData->AbortOnRead = d->AliveWriteMask;
+ d->ReaderData->InElse = 0;
+ }
+ else {
+ struct branch_write_mask * masks =
+ &d->BranchMasks[branch_depth];
+
+ if (masks->HasElse) {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~masks->ElseWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask
+ ^ ((masks->IfWriteMask ^
+ masks->ElseWriteMask)
+ & (masks->IfWriteMask
+ ^ d->AliveWriteMask));
+ } else {
+ d->ReaderData->AbortOnRead |=
+ masks->IfWriteMask
+ & ~d->AliveWriteMask;
+ d->AliveWriteMask = masks->IfWriteMask;
+
+ }
+ memset(masks, 0,
+ sizeof(struct branch_write_mask));
+ branch_depth--;
}
+ break;
+ default:
+ break;
+ }
+
+ if (d->ReaderData->InElse)
+ continue;
+
+ if (tmp->Type == RC_INSTRUCTION_NORMAL) {
+ rc_for_all_reads_src(tmp,
+ get_readers_normal_read_callback, d);
} else {
- d->AliveWriteMask &= ~shared_mask;
+ rc_pair_for_all_reads_arg(tmp,
+ get_readers_pair_read_callback, d);
}
- }
+ rc_for_all_writes_mask(tmp, get_readers_write_callback, d);
+
+ if (d->ReaderData->Abort)
+ return;
- d->WriteCB(d->ReaderData, inst, file, index, mask);
+ if (branch_depth == 0 && !d->AliveWriteMask)
+ return;
+ }
}
/**
@@ -577,80 +785,26 @@ static void get_readers_write_callback(
* @param write_cb This function will be called for every instruction after
* writer.
*/
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
struct rc_instruction * writer,
struct rc_reader_data * data,
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb)
{
- struct rc_instruction * tmp;
struct get_readers_callback_data d;
- unsigned int branch_depth = 0;
- data->Writer = writer;
data->Abort = 0;
- data->AbortOnRead = 0;
- data->InElse = 0;
data->ReaderCount = 0;
data->ReadersReserved = 0;
data->Readers = NULL;
d.C = c;
- d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
d.ReaderData = data;
- d.ReadCB = read_cb;
+ d.ReadNormalCB = read_normal_cb;
+ d.ReadPairCB = read_pair_cb;
d.WriteCB = write_cb;
- if (!writer->U.I.DstReg.WriteMask)
- return;
-
- for(tmp = writer->Next; tmp != &c->Program.Instructions;
- tmp = tmp->Next){
- rc_opcode opcode = get_flow_control_inst(tmp);
- switch(opcode) {
- case RC_OPCODE_BGNLOOP:
- /* XXX We can do better when we see a BGNLOOP if we
- * add a flag called AbortOnWrite to struct
- * rc_reader_data and leave it set until the next
- * ENDLOOP. */
- case RC_OPCODE_ENDLOOP:
- /* XXX We can do better when we see an ENDLOOP by
- * searching backwards from writer and looking for
- * readers of writer's destination index. If we find a
- * reader before we get to the BGNLOOP, we must abort
- * unless there is another writer between that reader
- * and the BGNLOOP. */
- data->Abort = 1;
- return;
- case RC_OPCODE_IF:
- branch_depth++;
- break;
- case RC_OPCODE_ELSE:
- if (branch_depth == 0)
- data->InElse = 1;
- break;
- case RC_OPCODE_ENDIF:
- if (branch_depth == 0) {
- data->AbortOnRead = 1;
- data->InElse = 0;
- }
- else {
- branch_depth--;
- }
- break;
- default:
- break;
- }
-
- if (!data->InElse)
- rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
- rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
-
- if (data->Abort)
- return;
-
- if (!d.AliveWriteMask)
- return;
- }
+ rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 7de6b98f76..ef971c5b23 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -36,6 +36,7 @@ struct rc_instruction;
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
+struct rc_pair_instruction_source;
struct rc_compiler;
@@ -59,7 +60,8 @@ void rc_for_all_reads_src(struct rc_instruction * inst, rc_read_src_fn cb,
void * userdata);
typedef void (*rc_pair_read_arg_fn)(void * userdata,
- struct rc_instruction * inst, struct rc_pair_instruction_arg * arg);
+ struct rc_instruction * inst, struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src);
void rc_pair_for_all_reads_arg(struct rc_instruction * inst,
rc_pair_read_arg_fn cb, void * userdata);
@@ -71,7 +73,10 @@ void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, v
struct rc_reader {
struct rc_instruction * Inst;
unsigned int WriteMask;
- struct rc_src_register * Src;
+ union {
+ struct rc_src_register * Src;
+ struct rc_pair_instruction_arg * Arg;
+ } U;
};
struct rc_reader_data {
@@ -87,14 +92,13 @@ struct rc_reader_data {
void * CbData;
};
-void rc_get_readers_normal(
+void rc_get_readers(
struct radeon_compiler * c,
- struct rc_instruction * inst,
+ struct rc_instruction * writer,
struct rc_reader_data * data,
- /*XXX: These should be their own function types. */
- rc_read_src_fn read_cb,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
-
/**
* Compiler passes based on dataflow analysis.
*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 4d9120ffd0..27b10ffbd6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -32,15 +32,14 @@
#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
-struct peephole_state {
- struct rc_instruction * Inst;
- /** Stores a bitmask of the components that are still "alive" (i.e.
- * they have not been written to since Inst was executed.)
- */
- unsigned int WriteMask;
+struct src_clobbered_reads_cb_data {
+ rc_register_file File;
+ unsigned int Index;
+ unsigned int Mask;
+ struct rc_reader_data * ReaderData;
};
-typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
struct rc_instruction *,
unsigned int);
@@ -67,38 +66,18 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
return combine;
}
-struct copy_propagate_state {
- struct radeon_compiler * C;
- struct rc_instruction * Mov;
- unsigned int Conflict:1;
-
- /** Whether Mov's source has been clobbered */
- unsigned int SourceClobbered:1;
-
- /** Which components of Mov's destination register are still from that Mov? */
- unsigned int MovMask:4;
-
- /** Which components of Mov's destination register are clearly *not* from that Mov */
- unsigned int DefinedMask:4;
-
- /** Which components of Mov's source register are sourced */
- unsigned int SourcedMask:4;
-
- /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
- int BranchDepth;
-};
-
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
struct rc_src_register * src)
{
rc_register_file file = src->File;
struct rc_reader_data * reader_data = data;
- const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* It is possible to do copy propigation in this situation,
- * just not right now, see peephole_add_presub_inv() */
- if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
- (info->NumSrcRegs > 2 || info->HasTexture)) {
+ if(!rc_inst_can_use_presub(inst,
+ reader_data->Writer->U.I.PreSub.Opcode,
+ rc_swizzle_to_writemask(src->Swizzle),
+ *src,
+ reader_data->Writer->U.I.PreSub.SrcReg[0],
+ reader_data->Writer->U.I.PreSub.SrcReg[1])) {
reader_data->Abort = 1;
return;
}
@@ -123,23 +102,44 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
}
}
-static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+static void src_clobbered_reads_cb(
+ void * data,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
{
- struct rc_reader_data * reader_data = data;
- struct copy_propagate_state * s = reader_data->CbData;
+ struct src_clobbered_reads_cb_data * sc_data = data;
+
+ if (src->File == sc_data->File
+ && src->Index == sc_data->Index
+ && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
- if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
- if (mask & s->SourcedMask)
- reader_data->AbortOnRead = 1;
- } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
- reader_data->AbortOnRead = 1;
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
}
+
+ if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
+ sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
+ }
+}
+
+static void is_src_clobbered_scan_write(
+ void * data,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct src_clobbered_reads_cb_data sc_data;
+ struct rc_reader_data * reader_data = data;
+ sc_data.File = file;
+ sc_data.Index = index;
+ sc_data.Mask = mask;
+ sc_data.ReaderData = reader_data;
+ rc_for_all_reads_src(reader_data->Writer,
+ src_clobbered_reads_cb, &sc_data);
}
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
- struct copy_propagate_state s;
struct rc_reader_data reader_data;
unsigned int i;
@@ -149,22 +149,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
inst_mov->U.I.SaturateMode)
return;
- memset(&s, 0, sizeof(s));
- s.C = c;
- s.Mov = inst_mov;
- s.MovMask = inst_mov->U.I.DstReg.WriteMask;
- s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
-
- reader_data.CbData = &s;
-
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
- s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
- }
-
/* Get a list of all the readers of this MOV instruction. */
- rc_get_readers_normal(c, inst_mov, &reader_data,
- copy_propagate_scan_read, copy_propagate_scan_write);
+ rc_get_readers(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, NULL,
+ is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
@@ -172,10 +160,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
- *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
+ *reader_data.Readers[i].U.Src = chain_srcregs(*reader_data.Readers[i].U.Src, inst_mov->U.I.SrcReg[0]);
- if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
- inst->U.I.PreSub = s.Mov->U.I.PreSub;
+ if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = inst_mov->U.I.PreSub;
}
/* Finally, remove the original MOV instruction */
@@ -222,8 +210,8 @@ static int is_src_uniform_constant(struct rc_src_register src,
static void constant_folding_mad(struct rc_instruction * inst)
{
- rc_swizzle swz;
- unsigned int negate;
+ rc_swizzle swz = 0;
+ unsigned int negate= 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
@@ -263,8 +251,8 @@ static void constant_folding_mad(struct rc_instruction * inst)
static void constant_folding_mul(struct rc_instruction * inst)
{
- rc_swizzle swz;
- unsigned int negate;
+ rc_swizzle swz = 0;
+ unsigned int negate = 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ONE) {
@@ -296,8 +284,8 @@ static void constant_folding_mul(struct rc_instruction * inst)
static void constant_folding_add(struct rc_instruction * inst)
{
- rc_swizzle swz;
- unsigned int negate;
+ rc_swizzle swz = 0;
+ unsigned int negate = 0;
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
if (swz == RC_SWIZZLE_ZERO) {
@@ -431,132 +419,88 @@ static int src_has_const_swz(struct rc_src_register src) {
return 0;
}
-static void peephole_scan_write(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+static void presub_scan_read(
+ void * data,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
{
- struct peephole_state * s = data;
- if(s->Inst->U.I.DstReg.File == file
- && s->Inst->U.I.DstReg.Index == index) {
- unsigned int common_mask = s->WriteMask & mask;
- s->WriteMask &= ~common_mask;
+ struct rc_reader_data * reader_data = data;
+ rc_presubtract_op * presub_opcode = reader_data->CbData;
+
+ if (!rc_inst_can_use_presub(inst, *presub_opcode,
+ reader_data->Writer->U.I.DstReg.WriteMask,
+ *src,
+ reader_data->Writer->U.I.SrcReg[0],
+ reader_data->Writer->U.I.SrcReg[1])) {
+ reader_data->Abort = 1;
+ return;
}
}
static int presub_helper(
struct radeon_compiler * c,
- struct peephole_state * s,
+ struct rc_instruction * inst_add,
rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace)
{
- struct rc_instruction * inst;
- unsigned int can_remove = 0;
- unsigned int cant_sub = 0;
-
- for(inst = s->Inst->Next; inst != &c->Program.Instructions;
- inst = inst->Next) {
- unsigned int i;
- unsigned char can_use_presub = 1;
- const struct rc_opcode_info * info =
- rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX: There are some situations where instructions
- * with more than 2 src registers can use the
- * presubtract select, but to keep things simple we
- * will disable presubtract on these instructions for
- * now. */
- if (info->NumSrcRegs > 2 || info->HasTexture) {
- can_use_presub = 0;
- }
+ struct rc_reader_data reader_data;
+ unsigned int i;
+ rc_presubtract_op cb_op = presub_opcode;
- /* We can't use more than one presubtract value in an
- * instruction, unless the two prsubtract operations
- * are the same and read from the same registers. */
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
- if (inst->U.I.PreSub.Opcode != presub_opcode
- || inst->U.I.PreSub.SrcReg[0].File !=
- s->Inst->U.I.SrcReg[1].File
- || inst->U.I.PreSub.SrcReg[0].Index !=
- s->Inst->U.I.SrcReg[1].Index) {
- can_use_presub = 0;
- }
- }
+ reader_data.CbData = &cb_op;
+ rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
+ is_src_clobbered_scan_write);
- /* Even if the instruction can't use a presubtract operation
- * we still need to check if the instruction reads from
- * s->Inst->U.I.DstReg, because if it does we must not
- * remove s->Inst. */
- for(i = 0; i < info->NumSrcRegs; i++) {
- unsigned int mask = src_reads_dst_mask(
- inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
- /* XXX We could be more aggressive here using
- * presubtract. It is okay if SrcReg[i] only reads
- * from some of the mask components. */
- if(s->Inst->U.I.DstReg.WriteMask != mask) {
- if (s->Inst->U.I.DstReg.WriteMask & mask) {
- can_remove = 0;
- break;
- } else {
- continue;
- }
- }
- if (cant_sub || !can_use_presub) {
- can_remove = 0;
- break;
- }
- presub_replace(s, inst, i);
- can_remove = 1;
- }
- if(!can_remove)
- break;
- rc_for_all_writes_mask(inst, peephole_scan_write, s);
- /* If all components of inst_add's destination register have
- * been written to by subsequent instructions, the original
- * value of the destination register is no longer valid and
- * we can't keep doing substitutions. */
- if (!s->WriteMask){
- break;
- }
- /* Make this instruction doesn't write to the presubtract source. */
- if (inst->U.I.DstReg.WriteMask &
- src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
- inst->U.I.DstReg)
- || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
- inst->U.I.DstReg)
- || info->IsFlowControl) {
- cant_sub = 1;
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ return 0;
+
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ unsigned int src_index;
+ struct rc_reader reader = reader_data.Readers[i];
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+ for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.Src)
+ presub_replace(inst_add, reader.Inst, src_index);
}
}
- return can_remove;
+ return 1;
}
-/* This function assumes that s->Inst->U.I.SrcReg[0] and
- * s->Inst->U.I.SrcReg[1] aren't both negative. */
-static void presub_replace_add(struct peephole_state *s,
- struct rc_instruction * inst,
- unsigned int src_index)
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
{
rc_presubtract_op presub_opcode;
- if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
+ if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
presub_opcode = RC_PRESUB_SUB;
else
presub_opcode = RC_PRESUB_ADD;
- if (s->Inst->U.I.SrcReg[1].Negate) {
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
- inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
+ if (inst_add->U.I.SrcReg[1].Negate) {
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
} else {
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
- inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
}
- inst->U.I.PreSub.SrcReg[0].Negate = 0;
- inst->U.I.PreSub.SrcReg[1].Negate = 0;
- inst->U.I.PreSub.Opcode = presub_opcode;
- inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
- inst->U.I.PreSub.SrcReg[0]);
- inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
- inst->U.I.SrcReg[src_index].Index = presub_opcode;
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = presub_opcode;
+ inst_reader->U.I.SrcReg[src_index] =
+ chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
-static int is_presub_candidate(struct rc_instruction * inst)
+static int is_presub_candidate(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst)
{
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
unsigned int i;
@@ -565,7 +509,12 @@ static int is_presub_candidate(struct rc_instruction * inst)
return 0;
for(i = 0; i < info->NumSrcRegs; i++) {
- if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
+ struct rc_src_register src = inst->U.I.SrcReg[i];
+ if (src_reads_dst_mask(src, inst->U.I.DstReg))
+ return 0;
+
+ src.File = RC_FILE_PRESUB;
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
return 0;
}
return 1;
@@ -578,9 +527,8 @@ static int peephole_add_presub_add(
struct rc_src_register * src0 = NULL;
struct rc_src_register * src1 = NULL;
unsigned int i;
- struct peephole_state s;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
@@ -604,30 +552,28 @@ static int peephole_add_presub_add(
if (!src1)
return 0;
- s.Inst = inst_add;
- s.WriteMask = inst_add->U.I.DstReg.WriteMask;
- if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+ if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
-static void presub_replace_inv(struct peephole_state * s,
- struct rc_instruction * inst,
- unsigned int src_index)
+static void presub_replace_inv(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
{
- /* We must be careful not to modify s->Inst, since it
- * is possible it will remain part of the program.
- * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
- inst->U.I.PreSub.SrcReg[0].Negate = 0;
- inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
- inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
- inst->U.I.PreSub.SrcReg[0]);
-
- inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
- inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+ /* We must be careful not to modify inst_add, since it
+ * is possible it will remain part of the program.*/
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+ inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
}
/**
@@ -645,9 +591,8 @@ static int peephole_add_presub_inv(
struct rc_instruction * inst_add)
{
unsigned int i, swz, mask;
- struct peephole_state s;
- if (!is_presub_candidate(inst_add))
+ if (!is_presub_candidate(c, inst_add))
return 0;
mask = inst_add->U.I.DstReg.WriteMask;
@@ -674,11 +619,7 @@ static int peephole_add_presub_inv(
return 0;
}
- /* Setup the peephole_state information. */
- s.Inst = inst_add;
- s.WriteMask = inst_add->U.I.DstReg.WriteMask;
-
- if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
+ if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 91524f5ec6..3f880c88fa 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -66,10 +66,13 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
/**
- * If an instruction is inside of a loop, end_loop will be the
- * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ * If an instruction is inside of a loop, EndLoop will be the
+ * IP of the ENDLOOP instruction, and BeginLoop will be the IP
+ * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
+ * will be -1.
*/
- int end_loop;
+ int EndLoop;
+ int BeginLoop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -180,11 +183,13 @@ static void scan_callback(void * data, struct rc_instruction * inst,
reg->Used = 1;
if (file == RC_FILE_INPUT)
reg->Live.Start = -1;
+ else if (s->BeginLoop >= 0)
+ reg->Live.Start = s->BeginLoop;
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else if (s->end_loop)
- reg->Live.End = s->end_loop;
+ } else if (s->EndLoop >= 0)
+ reg->Live.End = s->EndLoop;
else if (inst->IP > reg->Live.End)
reg->Live.End = inst->IP;
}
@@ -195,6 +200,8 @@ static void compute_live_intervals(struct radeon_compiler *c,
memset(s, 0, sizeof(*s));
s->C = c;
s->NumHwTemporaries = c->max_temp_regs;
+ s->BeginLoop = -1;
+ s->EndLoop = -1;
s->HwTemporary =
memory_pool_malloc(&c->Pool,
s->NumHwTemporaries * sizeof(struct hardware_register));
@@ -207,8 +214,10 @@ static void compute_live_intervals(struct radeon_compiler *c,
inst = inst->Next) {
/* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ * instruction is used as the end of the live interval and
+ * the BGNLOOP instruction is used as the beginning. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
+ s->BeginLoop = inst->IP;
int loops = 1;
struct rc_instruction * tmp;
for(tmp = inst->Next;
@@ -219,15 +228,17 @@ static void compute_live_intervals(struct radeon_compiler *c,
} else if (tmp->U.I.Opcode
== RC_OPCODE_ENDLOOP) {
if(!--loops) {
- s->end_loop = tmp->IP;
+ s->EndLoop = tmp->IP;
break;
}
}
}
}
- if (inst->IP == s->end_loop)
- s->end_loop = 0;
+ if (inst->IP == s->EndLoop) {
+ s->EndLoop = -1;
+ s->BeginLoop = -1;
+ }
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index d4a38607d9..cbb5ef6237 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
@@ -54,6 +55,11 @@ struct schedule_instruction {
* this instruction can be scheduled.
*/
unsigned int NumDependencies:5;
+
+ /** List of all readers (see rc_get_readers() for the definition of
+ * "all readers"), even those outside the basic block this instruction
+ * lives in. */
+ struct rc_reader_data GlobalReaders;
};
@@ -94,6 +100,16 @@ struct register_state {
struct reg_value * Values[4];
};
+struct remap_reg {
+ struct rc_instruciont * Inst;
+ unsigned int OldIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int OldSwizzle:3;
+ unsigned int NewIndex:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int NewSwizzle:3;
+ unsigned int OnlyTexReads:1;
+ struct remap_reg * Next;
+};
+
struct schedule_state {
struct radeon_compiler * C;
struct schedule_instruction * Current;
@@ -126,15 +142,6 @@ static struct reg_value ** get_reg_valuep(struct schedule_state * s,
return &s->Temporary[index].Values[chan];
}
-static struct reg_value * get_reg_value(struct schedule_state * s,
- rc_register_file file, unsigned int index, unsigned int chan)
-{
- struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
- if (!pv)
- return 0;
- return *pv;
-}
-
static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
{
inst->NextReady = *list;
@@ -290,16 +297,17 @@ static int merge_presub_sources(
{
unsigned int srcp_src, srcp_regs, is_rgb, is_alpha;
struct rc_pair_sub_instruction * dst_sub;
+ const struct rc_opcode_info * info;
assert(dst_full->Alpha.Opcode == RC_OPCODE_NOP);
switch(type) {
- case RC_PAIR_SOURCE_RGB:
+ case RC_SOURCE_RGB:
is_rgb = 1;
is_alpha = 0;
dst_sub = &dst_full->RGB;
break;
- case RC_PAIR_SOURCE_ALPHA:
+ case RC_SOURCE_ALPHA:
is_rgb = 0;
is_alpha = 1;
dst_sub = &dst_full->Alpha;
@@ -309,8 +317,8 @@ static int merge_presub_sources(
return 0;
}
- const struct rc_opcode_info * info =
- rc_get_opcode_info(dst_full->RGB.Opcode);
+ info = rc_get_opcode_info(dst_full->RGB.Opcode);
+
if (dst_sub->Src[RC_PAIR_PRESUB_SRC].Used)
return 0;
@@ -340,6 +348,8 @@ static int merge_presub_sources(
continue;
free_source = rc_pair_alloc_source(dst_full, is_rgb,
is_alpha, temp.File, temp.Index);
+ if (free_source < 0)
+ return 0;
one_way = 1;
} else {
dst_sub->Src[free_source] = temp;
@@ -355,11 +365,11 @@ static int merge_presub_sources(
for(arg = 0; arg < info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
- if (!(rc_source_type_that_arg_reads(
- dst_full->RGB.Arg[arg].Source,
- dst_full->RGB.Arg[arg].Swizzle) & type)) {
+ if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle,
+ 3) & type)) {
continue;
}
+
if (dst_full->RGB.Arg[arg].Source == srcp_src)
dst_full->RGB.Arg[arg].Source = free_source;
/* We need to do this just in case register
@@ -391,13 +401,13 @@ static int destructive_merge_instructions(
/* Merge the rgb presubtract registers. */
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
- if (!merge_presub_sources(rgb, alpha->RGB, RC_PAIR_SOURCE_RGB)) {
+ if (!merge_presub_sources(rgb, alpha->RGB, RC_SOURCE_RGB)) {
return 0;
}
}
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
- if(!merge_presub_sources(rgb, alpha->Alpha, RC_PAIR_SOURCE_ALPHA)){
+ if(!merge_presub_sources(rgb, alpha->Alpha, RC_SOURCE_ALPHA)){
return 0;
}
}
@@ -524,6 +534,222 @@ static void presub_nop(struct rc_instruction * emitted) {
}
}
}
+
+static void rgb_to_alpha_remap (
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ rc_register_file old_file,
+ rc_swizzle old_swz,
+ unsigned int new_index)
+{
+ int new_src_index;
+ unsigned int i;
+ struct rc_pair_instruction_source * old_src =
+ rc_pair_get_src(&inst->U.P, arg);
+ if (!old_src) {
+ return;
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (get_swz(arg->Swizzle, i) == old_swz) {
+ SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W);
+ }
+ }
+ memset(old_src, 0, sizeof(struct rc_pair_instruction_source));
+ new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1,
+ old_file, new_index);
+ /* This conversion is not possible, we must have made a mistake in
+ * is_rgb_to_alpha_possible. */
+ if (new_src_index < 0) {
+ assert(0);
+ return;
+ }
+
+ arg->Source = new_src_index;
+}
+
+static int can_remap(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static int can_convert_opcode_to_alpha(unsigned int opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_DP2:
+ case RC_OPCODE_DP3:
+ case RC_OPCODE_DP4:
+ case RC_OPCODE_DPH:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void is_rgb_to_alpha_possible(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_pair_instruction_arg * arg,
+ struct rc_pair_instruction_source * src)
+{
+ unsigned int chan_count = 0;
+ unsigned int alpha_sources = 0;
+ unsigned int i;
+ struct rc_reader_data * reader_data = userdata;
+
+ if (!can_remap(inst->U.P.RGB.Opcode)
+ || !can_remap(inst->U.P.Alpha.Opcode)) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ if (!src)
+ return;
+
+ /* XXX There are some cases where we can still do the conversion if
+ * a reader reads from a presubtract source, but for now we'll prevent
+ * it. */
+ if (arg->Source == RC_PAIR_PRESUB_SRC) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure the source only reads from one component.
+ * XXX We should allow the source to read from the same component twice.
+ * XXX If the index we will be converting to is the same as the
+ * current index, then it is OK to read from more than one component.
+ */
+ for (i = 0; i < 3; i++) {
+ rc_swizzle swz = get_swz(arg->Swizzle, i);
+ switch(swz) {
+ case RC_SWIZZLE_X:
+ case RC_SWIZZLE_Y:
+ case RC_SWIZZLE_Z:
+ case RC_SWIZZLE_W:
+ chan_count++;
+ break;
+ default:
+ break;
+ }
+ }
+ if (chan_count > 1) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* Make sure there are enough alpha sources.
+ * XXX If we know what register all the readers are going
+ * to be remapped to, then in some situations we can still do
+ * the subsitution, even if all 3 alpha sources are being used.*/
+ for (i = 0; i < 3; i++) {
+ if (inst->U.P.Alpha.Src[i].Used) {
+ alpha_sources++;
+ }
+ }
+ if (alpha_sources > 2) {
+ reader_data->Abort = 1;
+ return;
+ }
+}
+
+static int convert_rgb_to_alpha(
+ struct schedule_state * s,
+ struct schedule_instruction * sched_inst)
+{
+ struct rc_pair_instruction * pair_inst = &sched_inst->Instruction->U.P;
+ unsigned int old_mask = pair_inst->RGB.WriteMask;
+ unsigned int old_swz = rc_mask_to_swizzle(old_mask);
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(pair_inst->RGB.Opcode);
+ int new_index = -1;
+ unsigned int i;
+
+ if (sched_inst->GlobalReaders.Abort)
+ return 0;
+
+ if (!pair_inst->RGB.WriteMask)
+ return 0;
+
+ if (!can_convert_opcode_to_alpha(pair_inst->RGB.Opcode)
+ || !can_convert_opcode_to_alpha(pair_inst->Alpha.Opcode)) {
+ return 0;
+ }
+
+ assert(sched_inst->NumWriteValues == 1);
+
+ if (!sched_inst->WriteValues[0]) {
+ assert(0);
+ return 0;
+ }
+
+ /* We start at the old index, because if we can reuse the same
+ * register and just change the swizzle then it is more likely we
+ * will be able to convert all the readers. */
+ for (i = pair_inst->RGB.DestIndex; i < RC_REGISTER_MAX_INDEX; i++) {
+ struct reg_value ** new_regvalp = get_reg_valuep(
+ s, RC_FILE_TEMPORARY, i, 3);
+ if (!*new_regvalp) {
+ struct reg_value ** old_regvalp =
+ get_reg_valuep(s,
+ RC_FILE_TEMPORARY,
+ pair_inst->RGB.DestIndex,
+ rc_mask_to_swz(old_mask));
+ new_index = i;
+ *new_regvalp = *old_regvalp;
+ *old_regvalp = NULL;
+ new_regvalp = get_reg_valuep(s, RC_FILE_TEMPORARY, i, 3);
+ break;
+ }
+ }
+ if (new_index < 0) {
+ return 0;
+ }
+
+ pair_inst->Alpha.Opcode = pair_inst->RGB.Opcode;
+ pair_inst->Alpha.DestIndex = new_index;
+ pair_inst->Alpha.WriteMask = 1;
+ pair_inst->Alpha.Target = pair_inst->RGB.Target;
+ pair_inst->Alpha.OutputWriteMask = pair_inst->RGB.OutputWriteMask;
+ pair_inst->Alpha.DepthWriteMask = pair_inst->RGB.DepthWriteMask;
+ pair_inst->Alpha.Saturate = pair_inst->RGB.Saturate;
+ memcpy(pair_inst->Alpha.Arg, pair_inst->RGB.Arg,
+ sizeof(pair_inst->Alpha.Arg));
+ /* Move the swizzles into the first chan */
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ unsigned int j;
+ for (j = 0; j < 3; j++) {
+ unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j);
+ if (swz != RC_SWIZZLE_UNUSED) {
+ pair_inst->Alpha.Arg[i].Swizzle = swz;
+ break;
+ }
+ }
+ }
+ pair_inst->RGB.Opcode = RC_OPCODE_NOP;
+ pair_inst->RGB.DestIndex = 0;
+ pair_inst->RGB.WriteMask = 0;
+ pair_inst->RGB.Target = 0;
+ pair_inst->RGB.OutputWriteMask = 0;
+ pair_inst->RGB.DepthWriteMask = 0;
+ pair_inst->RGB.Saturate = 0;
+ memset(pair_inst->RGB.Arg, 0, sizeof(pair_inst->RGB.Arg));
+
+ for(i = 0; i < sched_inst->GlobalReaders.ReaderCount; i++) {
+ struct rc_reader reader = sched_inst->GlobalReaders.Readers[i];
+ rgb_to_alpha_remap(reader.Inst, reader.U.Arg,
+ RC_FILE_TEMPORARY, old_swz, new_index);
+ }
+ return 1;
+}
+
/**
* Find a good ALU instruction or pair of ALU instruction and emit it.
*
@@ -535,24 +761,16 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
{
struct schedule_instruction * sinst;
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- if (s->ReadyFullALU) {
- sinst = s->ReadyFullALU;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- sinst = s->ReadyAlpha;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
} else {
struct schedule_instruction **prgb;
struct schedule_instruction **palpha;
-
+ struct schedule_instruction *prev;
+pair:
/* Some pairings might fail because they require too
* many source slots; try all possible pairings if necessary */
for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
@@ -571,10 +789,43 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
goto success;
}
}
-
- /* No success in pairing; just take the first RGB instruction */
- sinst = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
+ prev = NULL;
+ /* No success in pairing, now try to convert one of the RGB
+ * instructions to an Alpha so we can pair it with another RGB.
+ */
+ if (s->ReadyRGB && s->ReadyRGB->NextReady) {
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ if ((*prgb)->NumWriteValues == 1) {
+ struct schedule_instruction * prgb_next;
+ if (!convert_rgb_to_alpha(s, *prgb))
+ goto cont_loop;
+ prgb_next = (*prgb)->NextReady;
+ /* Add instruction to the Alpha ready list. */
+ (*prgb)->NextReady = s->ReadyAlpha;
+ s->ReadyAlpha = *prgb;
+ /* Remove instruction from the RGB ready list.*/
+ if (prev)
+ prev->NextReady = prgb_next;
+ else
+ s->ReadyRGB = prgb_next;
+ goto pair;
+ }
+cont_loop:
+ prev = *prgb;
+ }
+ }
+ /* Still no success in pairing, just take the first RGB
+ * or alpha instruction. */
+ if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else if (s->ReadyAlpha) {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ } else {
+ /*XXX Something real bad has happened. */
+ assert(0);
+ }
rc_insert_instruction(before->Prev, sinst->Instruction);
commit_alu_instruction(s, sinst);
@@ -590,13 +841,13 @@ static void scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan)
{
struct schedule_state * s = data;
- struct reg_value * v = get_reg_value(s, file, index, chan);
+ struct reg_value ** v = get_reg_valuep(s, file, index, chan);
struct reg_value_reader * reader;
if (!v)
return;
- if (v->Writer == s->Current) {
+ if (*v && (*v)->Writer == s->Current) {
/* The instruction reads and writes to a register component.
* In this case, we only want to increment dependencies by one.
*/
@@ -607,16 +858,28 @@ static void scan_read(void * data, struct rc_instruction * inst,
reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
reader->Reader = s->Current;
- reader->Next = v->Readers;
- v->Readers = reader;
- v->NumReaders++;
-
- s->Current->NumDependencies++;
+ if (!*v) {
+ /* In this situation, the instruction reads from a register
+ * that hasn't been written to or read from in the current
+ * block. */
+ *v = memory_pool_malloc(&s->C->Pool, sizeof(struct reg_value));
+ memset(*v, 0, sizeof(struct reg_value));
+ (*v)->Readers = reader;
+ } else {
+ reader->Next = (*v)->Readers;
+ (*v)->Readers = reader;
+ /* Only update the current instruction's dependencies if the
+ * register it reads from has been written to in this block. */
+ if ((*v)->Writer) {
+ s->Current->NumDependencies++;
+ }
+ }
+ (*v)->NumReaders++;
if (s->Current->NumReadValues >= 12) {
rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
} else {
- s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ s->Current->ReadValues[s->Current->NumReadValues++] = *v;
}
}
@@ -651,6 +914,16 @@ static void scan_write(void * data, struct rc_instruction * inst,
}
}
+static void is_rgb_to_alpha_possible_normal(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct rc_reader_data * reader_data = userdata;
+ reader_data->Abort = 1;
+
+}
+
static void schedule_block(struct r300_fragment_program_compiler * c,
struct rc_instruction * begin, struct rc_instruction * end)
{
@@ -682,6 +955,11 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
if (!s.Current->NumDependencies)
instruction_ready(&s, s.Current);
+
+ /* Get global readers for possible RGB->Alpha conversion. */
+ rc_get_readers(s.C, inst, &s.Current->GlobalReaders,
+ is_rgb_to_alpha_possible_normal,
+ is_rgb_to_alpha_possible, NULL);
}
/* Temporarily unlink all instructions */
@@ -710,8 +988,13 @@ static int is_controlflow(struct rc_instruction * inst)
void rc_pair_schedule(struct radeon_compiler *cc, void *user)
{
+ struct schedule_state s;
+
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
while(inst != &c->Base.Program.Instructions) {
struct rc_instruction * first;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index c549be5218..fc05366f50 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -280,9 +280,12 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->RGB.DestIndex = inst->DstReg.Index;
pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
}
+
if (needalpha) {
- pair->Alpha.DestIndex = inst->DstReg.Index;
pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ if (pair->Alpha.WriteMask) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index 24b685fbeb..d7bedc5729 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
/**
@@ -91,37 +92,98 @@ struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register
return tmp;
}
-unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+struct get_used_temporaries_data {
+ unsigned char * Used;
+ unsigned int UsedLength;
+};
+
+static void get_used_temporaries_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
{
- char used[RC_REGISTER_MAX_INDEX];
- unsigned int i;
- struct rc_instruction * rcinst;
+ struct get_used_temporaries_data * d = userdata;
- memset(used, 0, sizeof(used));
+ if (file != RC_FILE_TEMPORARY)
+ return;
- for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
- const struct rc_sub_instruction *inst = &rcinst->U.I;
- const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
- unsigned int k;
+ if (index >= d->UsedLength)
+ return;
- for (k = 0; k < opcode->NumSrcRegs; k++) {
- if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
- used[inst->SrcReg[k].Index] = 1;
- }
+ d->Used[index] |= mask;
+}
- if (opcode->HasDstReg) {
- if (inst->DstReg.File == RC_FILE_TEMPORARY)
- used[inst->DstReg.Index] = 1;
- }
+/**
+ * This function fills in the parameter 'used' with a writemask that
+ * represent which components of each temporary register are used by the
+ * program. This is meant to be combined with rc_find_free_temporary_list as a
+ * more efficient version of rc_find_free_temporary.
+ * @param used The function does not initialize this parameter.
+ */
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length)
+{
+ struct rc_instruction * inst;
+ struct get_used_temporaries_data d;
+ d.Used = used;
+ d.UsedLength = used_length;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ rc_for_all_reads_mask(inst, get_used_temporaries_cb, &d);
+ rc_for_all_writes_mask(inst, get_used_temporaries_cb, &d);
}
+}
- for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (!used[i])
+/* Search a list of used temporaries for a free one
+ * \sa rc_get_used_temporaries
+ * @note If this functions finds a free temporary, it will mark it as used
+ * in the used temporary list (param 'used')
+ * @param used list of used temporaries
+ * @param used_length number of items in param 'used'
+ * @param mask which components must be free in the temporary index that is
+ * returned.
+ * @return -1 If there are no more free temporaries, otherwise the index of
+ * a temporary register where the components specified in param 'mask' are
+ * not being used.
+ */
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask)
+{
+ int i;
+ for(i = 0; i < used_length; i++) {
+ if ((~used[i] & mask) == mask) {
+ used[i] |= mask;
return i;
+ }
}
+ return -1;
+}
- rc_error(c, "Ran out of temporary registers\n");
- return 0;
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+ unsigned char used[RC_REGISTER_MAX_INDEX];
+ int free;
+
+ memset(used, 0, sizeof(used));
+
+ rc_get_used_temporaries(c, used, RC_REGISTER_MAX_INDEX);
+
+ free = rc_find_free_temporary_list(c, used, RC_REGISTER_MAX_INDEX,
+ RC_MASK_XYZW);
+ if (free < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
+ }
+ return free;
}
@@ -182,3 +244,14 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c)
return ip;
}
+
+rc_swizzle rc_mask_to_swizzle(unsigned int mask)
+{
+ switch(mask) {
+ case RC_MASK_X: return RC_SWIZZLE_X;
+ case RC_MASK_Y: return RC_SWIZZLE_Y;
+ case RC_MASK_Z: return RC_SWIZZLE_Z;
+ case RC_MASK_W: return RC_SWIZZLE_W;
+ default: return RC_SWIZZLE_UNUSED;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index f0a77d7b53..be078b4f4f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -191,6 +191,20 @@ static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
return ret;
}
+/**
+ * @param mask Must be either RC_MASK_X, RC_MASK_Y, RC_MASK_Z, or RC_MASK_W
+ */
+static inline rc_swizzle rc_mask_to_swz(unsigned int mask)
+{
+ switch (mask) {
+ case RC_MASK_X: return RC_SWIZZLE_X;
+ case RC_MASK_Y: return RC_SWIZZLE_Y;
+ case RC_MASK_Z: return RC_SWIZZLE_Z;
+ case RC_MASK_W: return RC_SWIZZLE_W;
+ default: assert(0);
+ }
+ return RC_SWIZZLE_UNUSED;
+}
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
static inline void reset_srcreg(struct rc_src_register* reg)
@@ -222,6 +236,17 @@ void rc_local_transform(
struct radeon_compiler *c,
void *user);
+void rc_get_used_temporaries(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length);
+
+int rc_find_free_temporary_list(
+ struct radeon_compiler * c,
+ unsigned char * used,
+ unsigned int used_length,
+ unsigned int mask);
+
unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
@@ -233,4 +258,5 @@ unsigned int rc_recompute_ips(struct radeon_compiler * c);
void rc_print_program(const struct rc_program *prog);
+rc_swizzle rc_mask_to_swizzle(unsigned int mask);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 9dcd44c522..45f79ece5b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -181,4 +181,9 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
return 0;
}
}
+
+#define RC_SOURCE_NONE 0x0
+#define RC_SOURCE_RGB 0x1
+#define RC_SOURCE_ALPHA 0x2
+
#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index a21fe8d3df..5905d26e52 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -27,6 +27,9 @@
#include "radeon_program_pair.h"
+#include "radeon_compiler_util.h"
+
+#include <stdlib.h>
/**
* Return the source slot where we installed the given register access,
@@ -204,24 +207,37 @@ void rc_pair_foreach_source_that_rgb_reads(
}
}
-/*return 0 for rgb, 1 for alpha -1 for error. */
-
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle)
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg)
{
- unsigned int chan;
- unsigned int swz = RC_SWIZZLE_UNUSED;
- unsigned int ret = RC_PAIR_SOURCE_NONE;
-
- for(chan = 0; chan < 3; chan++) {
- swz = GET_SWZ(swizzle, chan);
- if (swz == RC_SWIZZLE_W) {
- ret |= RC_PAIR_SOURCE_ALPHA;
- } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
- || swz == RC_SWIZZLE_Z) {
- ret |= RC_PAIR_SOURCE_RGB;
+ unsigned int i, type;
+ unsigned int channels = 0;
+
+ for(i = 0; i < 3; i++) {
+ if (arg == pair_inst->RGB.Arg + i) {
+ channels = 3;
+ break;
}
}
- return ret;
+
+ if (channels == 0) {
+ for (i = 0; i < 3; i++) {
+ if (arg == pair_inst->Alpha.Arg + i) {
+ channels = 1;
+ break;
+ }
+ }
+ }
+
+ assert(channels > 0);
+ type = rc_source_type_swz(arg->Swizzle, channels);
+
+ if (type & RC_SOURCE_RGB) {
+ return &pair_inst->RGB.Src[arg->Source];
+ } else if (type & RC_SOURCE_ALPHA) {
+ return &pair_inst->Alpha.Src[arg->Source];
+ } else {
+ return NULL;
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 54d44a2098..ccf7a0070c 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -55,10 +55,6 @@ struct radeon_compiler;
*/
#define RC_PAIR_PRESUB_SRC 3
-#define RC_PAIR_SOURCE_NONE 0x0
-#define RC_PAIR_SOURCE_RGB 0x1
-#define RC_PAIR_SOURCE_ALPHA 0x2
-
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
@@ -115,9 +111,9 @@ void rc_pair_foreach_source_that_rgb_reads(
void * data,
rc_pair_foreach_src_fn cb);
-unsigned int rc_source_type_that_arg_reads(
- unsigned int source,
- unsigned int swizzle);
+struct rc_pair_instruction_source * rc_pair_get_src(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_arg * arg);
/*@}*/
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
index 618ab5a099..ae13f6742f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -129,6 +129,7 @@ static char rc_swizzle_char(unsigned int swz)
case RC_SWIZZLE_HALF: return 'H';
case RC_SWIZZLE_UNUSED: return '_';
}
+ fprintf(stderr, "bad swz: %u\n", swz);
return '?';
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
index 60e228be5b..88165f7895 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c
@@ -33,100 +33,51 @@
#include "radeon_compiler.h"
#include "radeon_dataflow.h"
-
-struct reg_rename {
- int old_index;
- int new_index;
- int temp_index;
-};
-
-static void rename_reg(void * data, struct rc_instruction * inst,
- rc_register_file * file, unsigned int * index)
-{
- struct reg_rename *r = data;
-
- if(r->old_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->new_index;
- }
- else if(r->new_index == *index && *file == RC_FILE_TEMPORARY) {
- *index = r->temp_index;
- }
-}
-
-static void rename_all(
- struct radeon_compiler *c,
- struct rc_instruction * start,
- unsigned int old,
- unsigned int new,
- unsigned int temp)
-{
- struct rc_instruction * inst;
- struct reg_rename r;
- r.old_index = old;
- r.new_index = new;
- r.temp_index = temp;
- for(inst = start; inst != &c->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, rename_reg, &r);
- }
-}
+#include "radeon_program.h"
/**
* This function renames registers in an attempt to get the code close to
* SSA form. After this function has completed, most of the register are only
- * written to one time, with a few exceptions. For example, this block of code
- * will not be modified by this function:
- * Mov Temp[0].x Const[0].x
- * Mov Temp[0].y Const[0].y
- * Basically, destination registers will be renamed if:
- * 1. There have been no previous writes to that register
- * or
- * 2. If the instruction is writting to the exact components (no more, no less)
- * of a register that has been written to by previous instructions.
+ * written to one time, with a few exceptions.
*
* This function assumes all the instructions are still of type
* RC_INSTRUCTION_NORMAL.
*/
void rc_rename_regs(struct radeon_compiler *c, void *user)
{
- unsigned int cur_index = 0;
- unsigned int icount;
+ unsigned int i, used_length;
+ int new_index;
struct rc_instruction * inst;
- unsigned int * masks;
+ struct rc_reader_data reader_data;
+ unsigned char * used;
- /* The number of instructions in the program is also the maximum
- * number of temp registers that could potentially be used. */
- icount = rc_recompute_ips(c);
- masks = memory_pool_malloc(&c->Pool, icount * sizeof(unsigned int));
- memset(masks, 0, icount * sizeof(unsigned int));
+ used_length = 2 * rc_recompute_ips(c);
+ used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length);
+ memset(used, 0, sizeof(unsigned char) * used_length);
+ rc_get_used_temporaries(c, used, used_length);
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
- const struct rc_opcode_info * info;
- unsigned int old_index, temp_index;
- struct rc_dst_register * dst;
- if(inst->Type != RC_INSTRUCTION_NORMAL) {
- rc_error(c, "%s only works with normal instructions.",
- __FUNCTION__);
- return;
- }
- dst = &inst->U.I.DstReg;
- info = rc_get_opcode_info(inst->U.I.Opcode);
- if(!info->HasDstReg || dst->File != RC_FILE_TEMPORARY) {
+
+ if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
continue;
+
+ rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ continue;
+
+ new_index = rc_find_free_temporary_list(c, used, used_length,
+ RC_MASK_XYZW);
+ if (new_index < 0) {
+ rc_error(c, "Ran out of temporary registers\n");
+ return;
}
- if(dst->Index >= icount || !masks[dst->Index] ||
- masks[dst->Index] == dst->WriteMask) {
- old_index = dst->Index;
- /* We need to set dst->Index here so get free temporary
- * will work. */
- dst->Index = cur_index++;
- temp_index = rc_find_free_temporary(c);
- rename_all(c, inst->Next, old_index,
- dst->Index, temp_index);
+
+ reader_data.Writer->U.I.DstReg.Index = new_index;
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ reader_data.Readers[i].U.Src->Index = new_index;
}
- assert(dst->Index < icount);
- masks[dst->Index] |= dst->WriteMask;
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 9fbd36bfe6..c288834d24 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -86,6 +86,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
+#define need_GL_OES_EGL_image
#include "main/remap_helper.h"
@@ -134,6 +135,9 @@ static const struct dri_extension card_extensions[] = {
{"GL_MESAX_texture_float", NULL},
{"GL_NV_blend_square", NULL},
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+#if FEATURE_OES_EGL_image
+ {"GL_OES_EGL_image", GL_OES_EGL_image_functions },
+#endif
{NULL, NULL}
/* *INDENT-ON* */
};
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
index 81769e1ee5..0c4d8537c6 100644
--- a/src/mesa/drivers/dri/r300/r300_draw.c
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -717,6 +717,10 @@ static void r300DrawPrims(struct gl_context *ctx,
GLuint max_index)
{
GLboolean retval;
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ radeonContextPtr radeon = &r300->radeon;
+
+ radeon_prepare_render(radeon);
/* This check should get folded into just the places that
* min/max index are really needed.
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index 821318e7a5..44090ec289 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -327,8 +327,6 @@ void r300RunRenderPrimitive(struct gl_context * ctx, int start, int end, int pri
BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
- radeon_prepare_render(&rmesa->radeon);
-
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
index a6bda0e499..de66293999 100644
--- a/src/mesa/drivers/dri/r300/r300_tex.c
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -382,5 +382,9 @@ void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *fun
functions->GenerateMipmap = radeonGenerateMipmap;
+#if FEATURE_OES_EGL_image
+ functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d;
+#endif
+
driInitTextureFormats();
}
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 0116c5d2fa..ed9955b05d 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -428,6 +428,7 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
radeonTexObjPtr t;
uint32_t pitch_val;
uint32_t internalFormat, type, format;
+ gl_format texFormat;
type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
@@ -467,9 +468,6 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
radeon_miptree_unreference(&t->mt);
radeon_miptree_unreference(&rImage->mt);
- _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
- rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
- texImage->RowStride = rb->pitch / rb->cpp;
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
@@ -481,22 +479,35 @@ void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format
pitch_val = rb->pitch;
switch (rb->cpp) {
case 4:
- if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+ if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+ texFormat = MESA_FORMAT_RGB888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
- else
+ }
+ else {
+ texFormat = MESA_FORMAT_ARGB8888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ }
pitch_val /= 4;
break;
case 3:
default:
+ texFormat = MESA_FORMAT_RGB888;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
pitch_val /= 4;
break;
case 2:
+ texFormat = MESA_FORMAT_RGB565;
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
pitch_val /= 2;
break;
}
+
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0,
+ rb->cpp, texFormat);
+ texImage->RowStride = rb->pitch / rb->cpp;
+
+
pitch_val--;
t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
| ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));