diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 66 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 38 |
2 files changed, 74 insertions, 30 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacf..660f5b4845 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1058,10 +1058,26 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); @@ -1094,34 +1110,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7a8e981225..cf45fcaa06 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3402,16 +3402,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3488,6 +3490,24 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean |