summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c18
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h10
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c168
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_print.c6
4 files changed, 178 insertions, 24 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index 89dbc7aa20..e09f94074d 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -163,6 +163,24 @@ nv_nvi_refcount(struct nv_instruction *nvi)
return rc;
}
+int
+nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+ struct nv_value *new_val)
+{
+ int i, n;
+
+ if (old_val == new_val)
+ return old_val->refc;
+
+ for (i = 0, n = 0; i < pc->num_refs; ++i) {
+ if (pc->refs[i]->value == old_val) {
+ ++n;
+ nv_reference(pc, &pc->refs[i], new_val);
+ }
+ }
+ return n;
+}
+
static void
nv_pc_free_refs(struct nv_pc *pc)
{
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 3db300dabb..ffcdaf44af 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -363,11 +363,11 @@ new_ref(struct nv_pc *pc, struct nv_value *val)
const unsigned old_size = pc->num_refs * sizeof(struct nv_ref *);
const unsigned new_size = (pc->num_refs + 64) * sizeof(struct nv_ref *);
- pc->refs = REALLOC(pc->refs, old_size, new_size);
+ pc->refs = REALLOC(pc->refs, old_size, new_size);
- ref = CALLOC(64, sizeof(struct nv_ref));
- for (i = 0; i < 64; ++i)
- pc->refs[pc->num_refs + i] = &ref[i];
+ ref = CALLOC(64, sizeof(struct nv_ref));
+ for (i = 0; i < 64; ++i)
+ pc->refs[pc->num_refs + i] = &ref[i];
}
ref = pc->refs[pc->num_refs++];
@@ -426,6 +426,8 @@ int nv_nvi_refcount(struct nv_instruction *);
void nv_nvi_delete(struct nv_instruction *);
void nv_nvi_permute(struct nv_instruction *, struct nv_instruction *);
void nvbb_attach_block(struct nv_basic_block *parent, struct nv_basic_block *);
+int nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
+ struct nv_value *new_val);
int nv_pc_exec_pass0(struct nv_pc *pc);
int nv_pc_exec_pass1(struct nv_pc *pc);
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index a514c59e6a..0018131fb5 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -570,31 +570,99 @@ nv_pass_lower_cond(struct nv_pass *ctx, struct nv_basic_block *b)
}
#endif
-/* TODO: reload elimination, redundant store elimination */
+/* TODO: redundant store elimination */
-struct nv_pass_reldelim {
+struct load_record {
+ struct load_record *next;
+ uint64_t data;
+ struct nv_value *value;
+};
+
+#define LOAD_RECORD_POOL_SIZE 1024
+
+struct nv_pass_reld_elim {
struct nv_pc *pc;
+
+ struct load_record *imm;
+ struct load_record *mem_s;
+ struct load_record *mem_v;
+ struct load_record *mem_c[16];
+ struct load_record *mem_l;
+
+ struct load_record pool[LOAD_RECORD_POOL_SIZE];
+ int alloc;
};
static int
-nv_pass_reload_elim(struct nv_pass_reldelim *ctx, struct nv_basic_block *b)
+nv_pass_reload_elim(struct nv_pass_reld_elim *ctx, struct nv_basic_block *b)
{
- int j;
+ struct load_record **rec, *it;
struct nv_instruction *ld, *next;
+ uint64_t data;
+ struct nv_value *val;
+ int j;
for (ld = b->entry; ld; ld = next) {
next = ld->next;
+ if (!ld->src[0])
+ continue;
+ val = ld->src[0]->value;
+ rec = NULL;
if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
-
+ data = val->reg.id;
+ rec = &ctx->mem_v;
} else
if (ld->opcode == NV_OP_LDA) {
-
+ data = val->reg.id;
+ if (val->reg.file >= NV_FILE_MEM_C(0) &&
+ val->reg.file <= NV_FILE_MEM_C(15))
+ rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
+ else
+ if (val->reg.file == NV_FILE_MEM_S)
+ rec = &ctx->mem_s;
+ else
+ if (val->reg.file == NV_FILE_MEM_L)
+ rec = &ctx->mem_l;
} else
- if (ld->opcode == NV_OP_MOV) {
-
+ if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
+ data = val->reg.imm.u32;
+ rec = &ctx->imm;
+ }
+
+ if (!rec || !ld->def[0]->refc)
+ continue;
+
+ for (it = *rec; it; it = it->next)
+ if (it->data == data)
+ break;
+
+ if (it) {
+#if 1
+ nvcg_replace_value(ctx->pc, ld->def[0], it->value);
+#else
+ ld->opcode = NV_OP_MOV;
+ nv_reference(ctx->pc, &ld->src[0], it->value);
+#endif
+ } else {
+ if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
+ continue;
+ it = &ctx->pool[ctx->alloc++];
+ it->next = *rec;
+ it->data = data;
+ it->value = ld->def[0];
+ *rec = it;
}
}
+
+ ctx->imm = NULL;
+ ctx->mem_s = NULL;
+ ctx->mem_v = NULL;
+ for (j = 0; j < 16; ++j)
+ ctx->mem_c[j] = NULL;
+ ctx->mem_l = NULL;
+ ctx->alloc = 0;
+
DESCEND_ARBITRARY(j, nv_pass_reload_elim);
return 0;
@@ -678,23 +746,74 @@ nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
return 0;
}
+/* local common subexpression elimination, stupid O(n^2) implementation */
+static int
+nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
+{
+ struct nv_instruction *ir, *ik, *next;
+ struct nv_instruction *entry = b->phi ? b->phi : b->entry;
+ int s;
+ unsigned int reps;
+
+ do {
+ reps = 0;
+ for (ir = entry; ir; ir = next) {
+ next = ir->next;
+ for (ik = entry; ik != ir; ik = ik->next) {
+ if (ir->opcode != ik->opcode)
+ continue;
+
+ if (ik->opcode == NV_OP_LDA ||
+ ik->opcode == NV_OP_STA ||
+ ik->opcode == NV_OP_MOV ||
+ nv_is_vector_op(ik->opcode))
+ continue; /* ignore loads, stores & moves */
+
+ if (ik->src[4] || ir->src[4])
+ continue; /* don't mess with address registers */
+
+ for (s = 0; s < 3; ++s) {
+ struct nv_value *a, *b;
+
+ if (!ik->src[s]) {
+ if (ir->src[s])
+ break;
+ continue;
+ }
+ if (ik->src[s]->mod != ir->src[s]->mod)
+ break;
+ a = ik->src[s]->value;
+ b = ir->src[s]->value;
+ if (a == b)
+ continue;
+ if (a->reg.file != b->reg.file ||
+ a->reg.id < 0 ||
+ a->reg.id != b->reg.id)
+ break;
+ }
+ if (s == 3) {
+ nv_nvi_delete(ir);
+ ++reps;
+ nvcg_replace_value(ctx->pc, ir->def[0], ik->def[0]);
+ break;
+ }
+ }
+ }
+ } while(reps);
+
+ DESCEND_ARBITRARY(s, nv_pass_cse);
+
+ return 0;
+}
+
int
nv_pc_exec_pass0(struct nv_pc *pc)
{
- struct nv_pass_reldelim *reldelim;
+ struct nv_pass_reld_elim *reldelim;
struct nv_pass pass;
struct nv_pass_dce dce;
int ret;
- reldelim = CALLOC_STRUCT(nv_pass_reldelim);
- reldelim->pc = pc;
-
- ret = nv_pass_reload_elim(reldelim, pc->root);
-
- FREE(reldelim);
- if (ret)
- return ret;
-
pass.pc = pc;
pc->pass_seq++;
@@ -720,6 +839,19 @@ nv_pc_exec_pass0(struct nv_pc *pc)
if (ret)
return ret;
+ reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
+ reldelim->pc = pc;
+ pc->pass_seq++;
+ ret = nv_pass_reload_elim(reldelim, pc->root);
+ FREE(reldelim);
+ if (ret)
+ return ret;
+
+ pc->pass_seq++;
+ ret = nv_pass_cse(&pass, pc->root);
+ if (ret)
+ return ret;
+
pc->pass_seq++;
ret = nv_pass_lower_mods(&pass, pc->root);
if (ret)
diff --git a/src/gallium/drivers/nv50/nv50_pc_print.c b/src/gallium/drivers/nv50/nv50_pc_print.c
index 00b50b4edc..82080779c3 100644
--- a/src/gallium/drivers/nv50/nv50_pc_print.c
+++ b/src/gallium/drivers/nv50/nv50_pc_print.c
@@ -181,7 +181,7 @@ nv_print_address(const char c, int buf, struct nv_value *a, int offset)
static INLINE void
nv_print_cond(struct nv_instruction *nvi)
{
- PRINT("%s%s%s$c%i ",
+ PRINT("%s%s %s$c%i ",
gree, nv_cond_name(nvi->cc),
mgta, nv_value_id(nvi->flags_src->value));
}
@@ -198,7 +198,7 @@ nv_print_value(struct nv_value *value, struct nv_value *ind, ubyte type)
PRINT(" %s%s", gree, nv_type_name(type));
if (!nv_value_allocated(value))
- reg_pfx = '%';
+ reg_pfx = nv_value_allocated(value->join) ? '&' : '%';
switch (value->reg.file) {
case NV_FILE_GPR:
@@ -268,6 +268,8 @@ nv_print_instruction(struct nv_instruction *i)
{
int j;
+ PRINT("%i: ", i->serial);
+
if (i->flags_src)
nv_print_cond(i);