diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 237 |
1 files changed, 197 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ddd3a94eb0..a1fead0a4f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -448,6 +448,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint msg_control, GLuint msg_type, GLuint msg_length, + GLboolean header_present, GLuint pixel_scoreboard_clear, GLuint response_length, GLuint end_of_thread, @@ -462,7 +463,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_render_cache.msg_type = msg_type; insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; - insn->bits3.dp_render_cache.header_present = 0; /* XXX */ + insn->bits3.dp_render_cache.header_present = header_present; insn->bits3.dp_render_cache.response_length = response_length; insn->bits3.dp_render_cache.msg_length = msg_length; insn->bits3.dp_render_cache.end_of_thread = end_of_thread; @@ -476,7 +477,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write_gen5.msg_type = msg_type; insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; - insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.header_present = header_present; insn->bits3.dp_write_gen5.response_length = response_length; insn->bits3.dp_write_gen5.msg_length = msg_length; insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; @@ -548,7 +549,7 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (intel->gen == 5) { + if (intel->gen >= 5) { insn->bits3.sampler_gen5.binding_table_index = binding_table_index; insn->bits3.sampler_gen5.sampler = sampler; insn->bits3.sampler_gen5.msg_type = msg_type; @@ -557,8 +558,12 @@ static void brw_set_sampler_message(struct brw_context *brw, insn->bits3.sampler_gen5.response_length = response_length; insn->bits3.sampler_gen5.msg_length = msg_length; insn->bits3.sampler_gen5.end_of_thread = eot; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_gen5.end_of_thread = eot; + if (intel->gen >= 6) + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; + else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } } else if (intel->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; @@ -649,6 +654,26 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ } +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + */ +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(rnd, dest); \ + brw_set_src0(rnd, src); \ + rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ + \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ +} + ALU1(MOV) ALU2(SEL) @@ -663,7 +688,6 @@ ALU2(RSL) ALU2(ASR) ALU1(FRC) ALU1(RNDD) -ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -674,6 +698,11 @@ ALU2(DP2) ALU2(LINE) ALU2(PLN) + +ROUND(RNDZ) +ROUND(RNDE) + + struct brw_instruction *brw_ADD(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0, @@ -782,6 +811,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; if (p->single_program_flow) { @@ -795,9 +825,15 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.execution_size = execute_size; insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -819,7 +855,9 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + /* jump count is for 64bit data chunk each, so one 128bit + instruction requires 2 chunks. */ + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -828,9 +866,15 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_ELSE); } - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; @@ -847,9 +891,13 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 0; - if_insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } else { + if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); + } } return insn; @@ -861,7 +909,7 @@ void brw_ENDIF(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -877,9 +925,15 @@ void brw_ENDIF(struct brw_compile *p, } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; @@ -892,25 +946,42 @@ void brw_ENDIF(struct brw_compile *p, * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); + } } else { - assert(0); + assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); + if (intel->gen < 6) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); + } } /* Also pop item off the stack in the endif instruction: */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else { + insn->bits1.branch_gen6.jump_count = 2; + } } } @@ -978,7 +1049,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) @@ -1022,7 +1093,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (intel->gen == 5) + if (intel->gen >= 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); @@ -1100,6 +1171,17 @@ void brw_math( struct brw_compile *p, if (intel->gen >= 6) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type == BRW_REGISTER_TYPE_F); + } + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ @@ -1131,6 +1213,45 @@ void brw_math( struct brw_compile *p, } } +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(intel->gen >= 6); + (void) intel; + + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); +} + /** * Extended math function, float[16]. * Use 2 send instructions. @@ -1264,6 +1385,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, + GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ send_commit_msg, /* response_length */ 0, /* eot */ @@ -1501,12 +1623,16 @@ void brw_fb_WRITE(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; + GLboolean header_present = GL_TRUE; insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; if (intel->gen >= 6) { + if (msg_length == 4) + header_present = GL_FALSE; + /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -1530,6 +1656,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_control, msg_type, msg_length, + header_present, 1, /* pixel scoreboard */ response_length, eot, @@ -1556,6 +1683,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint header_present, GLuint simd_mode) { + struct intel_context *intel = &p->brw->intel; GLboolean need_stall = 0; if (writemask == 0) { @@ -1627,11 +1755,25 @@ void brw_SAMPLE(struct brw_compile *p, } { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct brw_instruction *insn; + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + /* m1 contains header? */ + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1721,13 +1863,28 @@ void brw_ff_sync(struct brw_compile *p, GLuint response_length, GLboolean eot) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(src0, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, insn, |