summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_wm_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c181
1 files changed, 145 insertions, 36 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index f3ad01b3fe..cb71c665b4 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -173,6 +173,7 @@ void emit_delta_xy(struct brw_compile *p,
GLuint mask,
const struct brw_reg *arg0)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg r1 = brw_vec1_grf(1, 0);
if (mask == 0)
@@ -180,6 +181,21 @@ void emit_delta_xy(struct brw_compile *p,
assert(mask == WRITEMASK_XY);
+ if (intel->gen >= 6) {
+ /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
+ Just add them with 0.0 for dst reg.. */
+ r1 = brw_imm_v(0x00000000);
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ r1);
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ r1);
+ return;
+ }
+
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers produced by emit_pixel_xy().
*/
@@ -253,6 +269,15 @@ void emit_pixel_w(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct intel_context *intel = &p->brw->intel;
+ struct brw_reg src;
+ struct brw_reg temp_dst;
+
+ if (intel->gen >= 6)
+ temp_dst = dst[3];
+ else
+ temp_dst = brw_message_reg(2);
+
+ assert(intel->gen < 6);
/* Don't need this if all you are doing is interpolating color, for
* instance.
@@ -264,31 +289,35 @@ void emit_pixel_w(struct brw_wm_compile *c,
* result straight into a message reg.
*/
if (can_do_pln(intel, deltas)) {
- brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+ brw_PLN(p, temp_dst, interp3, deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
}
/* Calc w */
+ if (intel->gen >= 6)
+ src = temp_dst;
+ else
+ src = brw_null_reg();
+
if (c->dispatch_width == 16) {
brw_math_16(p, dst[3],
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_PRECISION_FULL);
} else {
brw_math(p, dst[3],
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
- 2, brw_null_reg(),
+ 2, src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
}
}
-
void emit_linterp(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -307,7 +336,9 @@ void emit_linterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- if (can_do_pln(intel, deltas)) {
+ if (intel->gen >= 6) {
+ brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0));
+ } else if (can_do_pln(intel, deltas)) {
brw_PLN(p, dst[i], interp[i], deltas[0]);
} else {
brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
@@ -330,6 +361,11 @@ void emit_pinterp(struct brw_compile *p,
GLuint nr = arg0[0].nr;
GLuint i;
+ if (intel->gen >= 6) {
+ emit_linterp(p, dst, mask, arg0, interp);
+ return;
+ }
+
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
interp[2] = brw_vec1_grf(nr+1, 0);
@@ -910,10 +946,8 @@ void emit_math2(struct brw_wm_compile *c,
const struct brw_reg *arg1)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
- GLuint saturate = ((mask & SATURATE) ?
- BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE);
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
@@ -922,35 +956,103 @@ void emit_math2(struct brw_wm_compile *c,
brw_push_insn_state(p);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(3), arg1[0]);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
- }
+ /* math can only operate on up to a vec8 at a time, so in
+ * dispatch_width==16 we have to do the second half manually.
+ */
+ if (intel->gen >= 6) {
+ struct brw_reg src0 = arg0[0];
+ struct brw_reg src1 = arg1[0];
+ struct brw_reg temp_dst = dst[dst_chan];
+
+ if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* Both scalar arguments. Do scalar calc. */
+ src0.hstride = BRW_HORIZONTAL_STRIDE_1;
+ src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+ temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+ temp_dst.width = BRW_WIDTH_1;
+
+ if (arg0[0].subnr != 0) {
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+
+ /* Ouch. We've used the temp as a dst, and we still
+ * need a temp to store arg1 in, because src and dst
+ * offsets have to be equal. Leaving this up to
+ * glsl2-965 to handle correctly.
+ */
+ assert(arg1[0].subnr == 0);
+ } else if (arg1[0].subnr != 0) {
+ brw_MOV(p, temp_dst, src1);
+ src1 = temp_dst;
+ }
+ } else {
+ brw_MOV(p, temp_dst, src0);
+ src0 = temp_dst;
+ }
+ } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ brw_MOV(p, temp_dst, src1);
+ src1 = temp_dst;
+ }
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p,
- dst[dst_chan],
- function,
- saturate,
- 2,
- arg0[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p,
+ temp_dst,
+ function,
+ src0,
+ src1);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p,
+ sechalf(temp_dst),
+ function,
+ sechalf(src0),
+ sechalf(src1));
+ }
- /* Send two messages to perform all 16 operations:
- */
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ /* Splat a scalar result into all the channels. */
+ if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
+ arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
+ temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
+ brw_MOV(p, dst[dst_chan], temp_dst);
+ }
+ } else {
+ GLuint saturate = ((mask & SATURATE) ?
+ BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, brw_message_reg(3), arg1[0]);
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+ }
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math(p,
- offset(dst[dst_chan],1),
+ dst[dst_chan],
function,
saturate,
- 4,
- sechalf(arg0[0]),
+ 2,
+ arg0[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+
+ /* Send two messages to perform all 16 operations:
+ */
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p,
+ offset(dst[dst_chan],1),
+ function,
+ saturate,
+ 4,
+ sechalf(arg0[0]),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+ }
}
brw_pop_insn_state(p);
}
@@ -1028,7 +1130,7 @@ void emit_tex(struct brw_wm_compile *c,
/* Fill in the shadow comparison reference value. */
if (shadow) {
- if (intel->gen == 5) {
+ if (intel->gen >= 5) {
/* Fill in the cube map array index value. */
brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
cur_mrf += mrf_per_channel;
@@ -1041,7 +1143,7 @@ void emit_tex(struct brw_wm_compile *c,
cur_mrf += mrf_per_channel;
}
- if (intel->gen == 5) {
+ if (intel->gen >= 5) {
if (shadow)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
else
@@ -1094,7 +1196,7 @@ void emit_txb(struct brw_wm_compile *c,
* from mattering.
*/
if (c->dispatch_width == 16 || intel->gen < 5) {
- if (intel->gen == 5)
+ if (intel->gen >= 5)
msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
else
msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
@@ -1223,7 +1325,7 @@ static void emit_kil( struct brw_wm_compile *c,
/* KIL_NV kills the pixels that are currently executing, not based on a test
* of the arguments.
*/
-static void emit_kil_nv( struct brw_wm_compile *c )
+void emit_kil_nv( struct brw_wm_compile *c )
{
struct brw_compile *p = &c->func;
struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1553,9 +1655,12 @@ static void spill_values( struct brw_wm_compile *c,
void brw_wm_emit( struct brw_wm_compile *c )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
GLuint insn;
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ if (intel->gen >= 6)
+ brw_set_acc_write_control(p, 1);
/* Check if any of the payload regs need to be spilled:
*/
@@ -1667,7 +1772,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
break;
case OPCODE_TRUNC:
- emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
+ for (i = 0; i < 4; i++) {
+ if (dst_flags & (1<<i)) {
+ brw_RNDZ(p, dst[i], args[0][i]);
+ }
+ }
break;
case OPCODE_LRP: