From 5944cda6ed1182f8dc45452708df5fde2474d437 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Nov 2010 17:44:35 +0800 Subject: i965: Just use memset() to clear most members in FS constructors. This should make it a lot harder to forget to zero things. --- src/mesa/drivers/dri/i965/brw_fs.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9b7fcde858..896dc57705 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -96,10 +96,7 @@ public: void init() { - this->reg = 0; - this->reg_offset = 0; - this->negate = 0; - this->abs = 0; + memset(this, 0, sizeof(*this)); this->hw_reg = -1; this->smear = -1; } @@ -174,6 +171,10 @@ public: } imm; }; +static const fs_reg reg_undef; +static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); +static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); + class fs_inst : public exec_node { public: /* Callers of this talloc-based new need not call delete. It's @@ -190,18 +191,14 @@ public: void init() { + memset(this, 0, sizeof(*this)); this->opcode = BRW_OPCODE_NOP; - this->saturate = false; this->conditional_mod = BRW_CONDITIONAL_NONE; - this->predicated = false; - this->sampler = 0; - this->target = 0; - this->eot = false; - this->header_present = false; - this->shadow_compare = false; - this->mlen = 0; - this->base_mrf = 0; - this->offset = 0; + + this->dst = reg_undef; + this->src[0] = reg_undef; + this->src[1] = reg_undef; + this->src[2] = reg_undef; } fs_inst() @@ -454,9 +451,5 @@ public: int grf_used; }; -static const fs_reg reg_undef; -static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); -static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); - GLboolean brw_do_channel_expressions(struct exec_list *instructions); GLboolean brw_do_vector_splitting(struct exec_list *instructions); -- cgit v1.2.3 From 19631fab35ca4d5ca64d606922f3f20774b27645 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Nov 2010 10:36:06 +0800 Subject: i965: Recognize saturates and turn them into a saturated mov. On pre-gen6, this turns 4 instructions into 1. We could still do better by folding the saturate into the instruction generating the value if nobody else uses it, but that should be a separate pass. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 26 ++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + 2 files changed, 27 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 34f978435a..1b2989f46e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -704,6 +704,27 @@ fs_visitor::visit(ir_dereference_array *ir) } } +/* Instruction selection: Produce a MOV.sat instead of + * MIN(MAX(val, 0), 1) when possible. + */ +bool +fs_visitor::try_emit_saturate(ir_expression *ir) +{ + ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); + + if (!sat_val) + return false; + + sat_val->accept(this); + fs_reg src = this->result; + + this->result = fs_reg(this, ir->type); + fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); + inst->saturate = true; + + return true; +} + void fs_visitor::visit(ir_expression *ir) { @@ -712,6 +733,10 @@ fs_visitor::visit(ir_expression *ir) fs_inst *inst; assert(ir->get_num_operands() <= 2); + + if (try_emit_saturate(ir)) + return; + for (operand = 0; operand < ir->get_num_operands(); operand++) { ir->operands[operand]->accept(this); if (this->result.file == BAD_FILE) { @@ -3162,6 +3187,7 @@ fs_visitor::generate_code() brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicated); + brw_set_saturate(p, inst->saturate); switch (inst->opcode) { case BRW_OPCODE_MOV: diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 896dc57705..f546fabd55 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -397,6 +397,7 @@ public: fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); -- cgit v1.2.3 From b6b91fa02911f5dfc5d528d822674ee5557800d9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 19 Nov 2010 15:57:05 +0800 Subject: i965: Remove duplicate MRF writes in the FS backend. This is quite common for multitexture sampling, and not only cuts down on the second and later set of MOVs, but typically also allows compute-to-MRF on the first set. No statistically siginficant performance difference in nexuiz (n=3), but it reduces instruction count in one of its shaders and seems like a good idea. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 115 +++++++++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 37 +++++++++++ 2 files changed, 152 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 76794fa414..edb02fabb2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -177,6 +177,46 @@ type_size(const struct glsl_type *type) } } +/** + * Returns how many MRFs an FS opcode will write over. + * + * Note that this is not the 0 or 1 implied writes in an actual gen + * instruction -- the FS opcodes often generate MOVs in addition. + */ +int +fs_visitor::implied_mrf_writes(fs_inst *inst) +{ + if (inst->mlen == 0) + return 0; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + case FS_OPCODE_RSQ: + case FS_OPCODE_SQRT: + case FS_OPCODE_EXP2: + case FS_OPCODE_LOG2: + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + return 1; + case FS_OPCODE_POW: + return 2; + case FS_OPCODE_TEX: + case FS_OPCODE_TXB: + case FS_OPCODE_TXL: + return 1; + case FS_OPCODE_FB_WRITE: + return 2; + case FS_OPCODE_PULL_CONSTANT_LOAD: + case FS_OPCODE_UNSPILL: + return 1; + case FS_OPCODE_SPILL: + return 2; + default: + assert(!"not reached"); + return inst->mlen; + } +} + int fs_visitor::virtual_grf_alloc(int size) { @@ -3066,6 +3106,78 @@ fs_visitor::compute_to_mrf() return progress; } +/** + * Walks through basic blocks, locking for repeated MRF writes and + * removing the later ones. + */ +bool +fs_visitor::remove_duplicate_mrf_writes() +{ + fs_inst *last_mrf_move[16]; + bool progress = false; + + memset(last_mrf_move, 0, sizeof(last_mrf_move)); + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + switch (inst->opcode) { + case BRW_OPCODE_DO: + case BRW_OPCODE_WHILE: + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + memset(last_mrf_move, 0, sizeof(last_mrf_move)); + continue; + default: + break; + } + + if (inst->opcode == BRW_OPCODE_MOV && + inst->dst.file == MRF) { + fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + if (prev_inst && inst->equals(prev_inst)) { + inst->remove(); + progress = true; + continue; + } + } + + /* Clear out the last-write records for MRFs that were overwritten. */ + if (inst->dst.file == MRF) { + last_mrf_move[inst->dst.hw_reg] = NULL; + } + + if (inst->mlen > 0) { + /* Found a SEND instruction, which will include two of fewer + * implied MRF writes. We could do better here. + */ + for (int i = 0; i < implied_mrf_writes(inst); i++) { + last_mrf_move[inst->base_mrf + i] = NULL; + } + } + + /* Clear out any MRF move records whose sources got overwritten. */ + if (inst->dst.file == GRF) { + for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { + if (last_mrf_move[i] && + last_mrf_move[i]->src[0].reg == inst->dst.reg) { + last_mrf_move[i] = NULL; + } + } + } + + if (inst->opcode == BRW_OPCODE_MOV && + inst->dst.file == MRF && + inst->src[0].file == GRF && + !inst->predicated) { + last_mrf_move[inst->dst.hw_reg] = inst; + } + } + + return progress; +} + bool fs_visitor::virtual_grf_interferes(int a, int b) { @@ -3438,6 +3550,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) bool progress; do { progress = false; + + progress = v.remove_duplicate_mrf_writes() || progress; + v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f546fabd55..de7b15312a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -148,6 +148,21 @@ public: fs_reg(enum register_file file, int hw_reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); + bool equals(fs_reg *r) + { + return (file == r->file && + reg == r->reg && + reg_offset == r->reg_offset && + hw_reg == r->hw_reg && + type == r->type && + negate == r->negate && + abs == r->abs && + memcmp(&fixed_hw_reg, &r->fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0 && + smear == r->smear && + imm.u == r->imm.u); + } + /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; /** virtual register number. 0 = fixed hw reg */ @@ -270,6 +285,26 @@ public: assert(src[2].reg_offset >= 0); } + bool equals(fs_inst *inst) + { + return (opcode == inst->opcode && + dst.equals(&inst->dst) && + src[0].equals(&inst->src[0]) && + src[1].equals(&inst->src[1]) && + src[2].equals(&inst->src[2]) && + saturate == inst->saturate && + predicated == inst->predicated && + conditional_mod == inst->conditional_mod && + mlen == inst->mlen && + base_mrf == inst->base_mrf && + sampler == inst->sampler && + target == inst->target && + eot == inst->eot && + header_present == inst->header_present && + shadow_compare == inst->shadow_compare && + offset == inst->offset); + } + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; @@ -372,6 +407,7 @@ public: bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void generate_code(); void generate_fb_write(fs_inst *inst); @@ -409,6 +445,7 @@ public: struct brw_reg interp_reg(int location, int channel); int setup_uniform_values(int loc, const glsl_type *type); void setup_builtin_uniform_values(ir_variable *ir); + int implied_mrf_writes(fs_inst *inst); struct brw_context *brw; const struct gl_fragment_program *fp; -- cgit v1.2.3