diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 172 |
1 files changed, 76 insertions, 96 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 96fecc97ee..be86e0e128 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W); + struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W); if (mask & WRITEMASK_X) { + if (intel->gen >= 6) { + struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_x_f, delta_x); + delta_x = delta_x_f; + } + if (c->fp->program.PixelCenterInteger) { /* X' = X */ - brw_MOV(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[0], delta_x); } else { /* X' = X + 0.5 */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5)); } } if (mask & WRITEMASK_Y) { + if (intel->gen >= 6) { + struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_y_f, delta_y); + delta_y = delta_y_f; + } + if (c->fp->program.OriginUpperLeft) { if (c->fp->program.PixelCenterInteger) { /* Y' = Y */ - brw_MOV(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[1], delta_y); } else { - /* Y' = Y + 0.5 */ - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5)); } } else { float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; /* Y' = (height - 1) - Y + center */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_ADD(p, dst[1], negate(delta_y), brw_imm_f(c->key.drawable_height - 1 + center_offset)); } } @@ -896,10 +898,14 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE) || + arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); @@ -967,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c, struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* Both scalar arguments. Do scalar calc. */ - src0.hstride = BRW_HORIZONTAL_STRIDE_1; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.width = BRW_WIDTH_1; - - if (arg0[0].subnr != 0) { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - - /* Ouch. We've used the temp as a dst, and we still - * need a temp to store arg1 in, because src and dst - * offsets have to be equal. Leaving this up to - * glsl2-965 to handle correctly. - */ - assert(arg1[0].subnr == 0); - } else if (arg1[0].subnr != 0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; - } - } else { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - } - } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* This is a heinous hack to get a temporary register for use + * in case both arg0 and arg1 are constants. Why you're + * doing exponentiation on constant values in the shader, we + * don't know. + * + * max_wm_grf is almost surely less than the maximum GRF, and + * gen6 doesn't care about the number of GRFs used in a + * shader like pre-gen6 did. + */ + struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); + brw_MOV(p, temp, src1); + src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); @@ -1012,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c, sechalf(src0), sechalf(src1)); } - - /* Splat a scalar result into all the channels. */ - if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && - arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; - temp_dst.vstride = BRW_VERTICAL_STRIDE_0; - brw_MOV(p, dst[dst_chan], temp_dst); - } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : @@ -1301,9 +1288,15 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + struct intel_context *intel = &p->brw->intel; + struct brw_reg pixelmask; GLuint i, j; + if (intel->gen >= 6) + pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. @@ -1319,26 +1312,11 @@ static void emit_kil( struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } } -/* KIL_NV kills the pixels that are currently executing, not based on a test - * of the arguments. - */ -void emit_kil_nv( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, r0uw, c->emit_mask_reg, r0uw); - brw_pop_insn_state(p); -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -1387,8 +1365,8 @@ static void emit_aa( struct brw_wm_compile *c, GLuint reg ) { struct brw_compile *p = &c->func; - GLuint comp = c->key.aa_dest_stencil_reg / 2; - GLuint off = c->key.aa_dest_stencil_reg % 2; + GLuint comp = c->aa_dest_stencil_reg / 2; + GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); @@ -1416,11 +1394,10 @@ void emit_fb_write(struct brw_wm_compile *c, struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; - int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ - if (c->key.aa_dest_stencil_reg) + if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave @@ -1428,11 +1405,6 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); - if (intel->gen >= 6) - base_reg = nr; - else - base_reg = 0; - for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1493,9 +1465,9 @@ void emit_fb_write(struct brw_wm_compile *c, brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) + if (c->source_depth_to_render_target) { - if (c->key.computes_depth) + if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ @@ -1503,10 +1475,10 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } - if (c->key.dest_depth_reg) + if (c->dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + GLuint comp = c->dest_depth_reg / 2; + GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); @@ -1524,15 +1496,27 @@ void emit_fb_write(struct brw_wm_compile *c, } if (intel->gen >= 6) { - /* Subtract off the message header, since we send headerless. */ - nr -= 2; + /* Load the message header. There's no implied move from src0 + * to the base mrf on gen6. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0)); + brw_pop_insn_state(p); + + if (target != 0) { + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + 0, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(target)); + } } - if (!c->key.runtime_check_aads_emit) { - if (c->key.aa_dest_stencil_reg) + if (!c->runtime_check_aads_emit) { + if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, base_reg, nr, target, eot); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1897,10 +1881,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? |