summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_wm_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c171
1 files changed, 124 insertions, 47 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index c7d87b9d94..f79de5dbff 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,6 +34,23 @@
#include "brw_context.h"
#include "brw_wm.h"
+static GLboolean can_do_pln(struct intel_context *intel,
+ const struct brw_reg *deltas)
+{
+ struct brw_context *brw = brw_context(&intel->ctx);
+
+ if (!brw->has_pln)
+ return GL_FALSE;
+
+ if (deltas[1].nr != deltas[0].nr + 1)
+ return GL_FALSE;
+
+ if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -44,8 +61,52 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
return reg;
}
+/* Return the SrcReg index of the channels that can be immediate float operands
+ * instead of usage of PROGRAM_CONSTANT values through push/pull.
+ */
+GLboolean
+brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
+{
+ int opcode_array[] = {
+ [OPCODE_ADD] = 2,
+ [OPCODE_CMP] = 3,
+ [OPCODE_DP3] = 2,
+ [OPCODE_DP4] = 2,
+ [OPCODE_DPH] = 2,
+ [OPCODE_MAX] = 2,
+ [OPCODE_MIN] = 2,
+ [OPCODE_MOV] = 1,
+ [OPCODE_MUL] = 2,
+ [OPCODE_SEQ] = 2,
+ [OPCODE_SGE] = 2,
+ [OPCODE_SGT] = 2,
+ [OPCODE_SLE] = 2,
+ [OPCODE_SLT] = 2,
+ [OPCODE_SNE] = 2,
+ [OPCODE_XPD] = 2,
+ };
+
+ /* These opcodes get broken down in a way that allow two
+ * args to be immediates.
+ */
+ if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
+ if (arg == 1 || arg == 2)
+ return GL_TRUE;
+ }
+
+ if (opcode > ARRAY_SIZE(opcode_array))
+ return GL_FALSE;
-/* Payload R0:
+ return arg == opcode_array[opcode] - 1;
+}
+
+/**
+ * Computes the screen-space x,y position of the pixels.
+ *
+ * This will be used by emit_delta_xy() or emit_wpos_xy() for
+ * interpolation of attributes..
+ *
+ * Payload R0:
*
* R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
* corresponding to each of the 16 execution channels.
@@ -60,7 +121,6 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
* R1.7 -- ?
* R1.8 -- ?
*/
-
void emit_pixel_xy(struct brw_wm_compile *c,
const struct brw_reg *dst,
GLuint mask)
@@ -100,7 +160,14 @@ void emit_pixel_xy(struct brw_wm_compile *c,
brw_pop_insn_state(p);
}
-
+/**
+ * Computes the screen-space x,y distance of the pixels from the start
+ * vertex.
+ *
+ * This will be used in linterp or pinterp with the start vertex value
+ * and the Cx, Cy, and C0 coefficients passed in from the setup engine
+ * to produce interpolated attribute values.
+ */
void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -108,25 +175,27 @@ void emit_delta_xy(struct brw_compile *p,
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
- /* Calc delta X,Y by subtracting origin in r1 from the pixel
- * centers.
- */
- if (mask & WRITEMASK_X) {
- brw_ADD(p,
- dst[0],
- retype(arg0[0], BRW_REGISTER_TYPE_UW),
- negate(r1));
- }
+ if (mask == 0)
+ return;
- if (mask & WRITEMASK_Y) {
- brw_ADD(p,
- dst[1],
- retype(arg0[1], BRW_REGISTER_TYPE_UW),
- negate(suboffset(r1,1)));
+ assert(mask == WRITEMASK_XY);
- }
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers produced by emit_pixel_xy().
+ */
+ brw_ADD(p,
+ dst[0],
+ retype(arg0[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+ brw_ADD(p,
+ dst[1],
+ retype(arg0[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
}
+/**
+ * Computes the pixel offset from the window origin for gl_FragCoord().
+ */
void emit_wpos_xy(struct brw_wm_compile *c,
const struct brw_reg *dst,
GLuint mask,
@@ -134,9 +203,6 @@ void emit_wpos_xy(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
- /* Calculate the pixel offset from window bottom left into destination
- * X and Y channels.
- */
if (mask & WRITEMASK_X) {
if (c->fp->program.PixelCenterInteger) {
/* X' = X */
@@ -186,6 +252,7 @@ void emit_pixel_w(struct brw_wm_compile *c,
const struct brw_reg *deltas)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
/* Don't need this if all you are doing is interpolating color, for
* instance.
@@ -196,8 +263,12 @@ void emit_pixel_w(struct brw_wm_compile *c,
/* Calc 1/w - just linterp wpos[3] optimized by putting the
* result straight into a message reg.
*/
- brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
- brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+ }
/* Calc w */
if (c->dispatch_width == 16) {
@@ -224,6 +295,7 @@ void emit_linterp(struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *deltas)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@@ -235,8 +307,12 @@ void emit_linterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
- brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, dst[i], interp[i], deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
}
}
}
@@ -249,6 +325,7 @@ void emit_pinterp(struct brw_compile *p,
const struct brw_reg *deltas,
const struct brw_reg *w)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg interp[4];
GLuint nr = arg0[0].nr;
GLuint i;
@@ -260,8 +337,12 @@ void emit_pinterp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
- brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ if (can_do_pln(intel, deltas)) {
+ brw_PLN(p, dst[i], interp[i], deltas[0]);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+ brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+ }
}
}
for (i = 0; i < 4; i++) {
@@ -577,14 +658,10 @@ void emit_cmp(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg2[i]);
- brw_set_saturate(p, 0);
-
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg1[i]);
+ brw_SEL(p, dst[i], arg1[i], arg2[i]);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -601,14 +678,10 @@ void emit_max(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg0[i]);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg1[i]);
+ brw_SEL(p, dst[i], arg0[i], arg1[i]);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -625,14 +698,10 @@ void emit_min(struct brw_compile *p,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg1[i]);
- brw_set_saturate(p, 0);
-
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
- brw_MOV(p, dst[i], arg0[i]);
+ brw_SEL(p, dst[i], arg0[i], arg1[i]);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
}
@@ -1086,11 +1155,19 @@ static void emit_kil( struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
- GLuint i;
-
- /* XXX - usually won't need 4 compares!
- */
+ GLuint i, j;
+
for (i = 0; i < 4; i++) {
+ /* Check if we've already done the comparison for this reg
+ * -- common when someone does KIL TEMP.wwww.
+ */
+ for (j = 0; j < i; j++) {
+ if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
+ break;
+ }
+ if (j != i)
+ continue;
+
brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
brw_set_predicate_control_flag_value(p, 0xff);