summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/i965/brw_wm.h63
-rw-r--r--src/gallium/drivers/i965/brw_wm_fp.c871
2 files changed, 698 insertions, 236 deletions
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 2cd5bb7081..8ee99420aa 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -74,6 +74,7 @@ struct brw_wm_prog_key {
GLuint vp_nr_outputs:6;
GLuint nr_cbufs:3;
+ GLuint has_flow_control:1;
GLuint program_string_id;
};
@@ -176,9 +177,36 @@ struct brw_wm_instruction {
#define MAX_WM_OPCODE (MAX_OPCODE + 9)
#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT)
-#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */
+#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */
+
+
+struct brw_fp_src {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned swizzle:8;
+ unsigned indirect:1;
+ unsigned negate:1;
+ unsigned abs:1;
+};
+
+struct brw_fp_dst {
+ unsigned file:4;
+ unsigned index:16;
+ unsigned writemask:4;
+ unsigned indirect:1;
+ unsigned saturate:1;
+};
+
+struct brw_fp_instruction {
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ unsigned opcode:8;
+ unsigned tex_unit:4;
+ unsigned tex_target:4;
+ unsigned target:10; /* destination surface for FB_WRITE */
+ unsigned eot:1; /* mark last instruction (usually FB_WRITE) */
+};
-struct brw_passfp_program;
struct brw_wm_compile {
struct brw_compile func;
@@ -198,9 +226,26 @@ struct brw_wm_compile {
* simplifying and adding instructions for interpolation and
* framebuffer writes.
*/
- struct brw_passfp_program *pass_fp;
-
-
+ struct {
+ GLfloat v[4];
+ unsigned nr;
+ } immediate[BRW_WM_MAX_CONST+3];
+ GLuint nr_immediates;
+
+ struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN];
+ GLuint nr_fp_insns;
+ GLuint fp_temp;
+ GLuint fp_interp_emitted;
+ GLuint fp_fragcolor_emitted;
+ GLuint fp_first_internal_temp;
+
+ struct brw_fp_src fp_pixel_xy;
+ struct brw_fp_src fp_delta_xy;
+ struct brw_fp_src fp_pixel_w;
+
+
+ /* Subsequent passes using SSA representation:
+ */
struct brw_wm_value vreg[BRW_WM_MAX_VREG];
GLuint nr_vreg;
@@ -213,7 +258,7 @@ struct brw_wm_compile {
} payload;
- const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+ const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4];
struct brw_wm_ref undef_ref;
struct brw_wm_value undef_value;
@@ -241,7 +286,7 @@ struct brw_wm_compile {
struct {
GLboolean inited;
struct brw_reg reg;
- } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+ } wm_regs[BRW_FILE_PAYLOAD+1][256][4];
GLboolean used_grf[BRW_WM_MAX_GRF];
GLuint first_free_grf;
@@ -258,13 +303,15 @@ struct brw_wm_compile {
GLint index;
struct brw_reg reg;
} current_const[3];
+
+ GLuint error;
};
GLuint brw_wm_nr_args( GLuint opcode );
GLuint brw_wm_is_scalar_result( GLuint opcode );
-void brw_wm_pass_fp( struct brw_wm_compile *c );
+int brw_wm_pass_fp( struct brw_wm_compile *c );
void brw_wm_pass0( struct brw_wm_compile *c );
void brw_wm_pass1( struct brw_wm_compile *c );
void brw_wm_pass2( struct brw_wm_compile *c );
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 8ba037cdae..57933afbbe 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -31,15 +31,26 @@
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_error.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
#include "brw_wm.h"
#include "brw_util.h"
+#include "brw_debug.h"
#define X 0
#define Y 1
#define Z 2
#define W 3
+#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
static const char *wm_opcode_strings[] = {
@@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = {
"FRONTFACING",
};
+/***********************************************************************
+ * Source regs
+ */
+
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_src reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.swizzle = BRW_SWIZZLE_XYZW;
+ reg.indirect = 0;
+ reg.negate = 0;
+ reg.abs = 0;
+ return reg;
+}
+
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
+{
+ return src_reg(dst.file, dst.index);
+}
+
+static struct brw_fp_src src_undef( void )
+{
+ return src_reg(TGSI_FILE_NULL, 0);
+}
+
+static GLboolean src_is_undef(struct brw_fp_src src)
+{
+ return src.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
+{
+ unsigned swz = reg.swizzle;
+
+ reg.swizzle = ( GET_SWZ(swz, x) << 0 |
+ GET_SWZ(swz, y) << 2 |
+ GET_SWZ(swz, z) << 4 |
+ GET_SWZ(swz, w) << 6 );
+
+ return reg;
+}
+
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
+{
+ return src_swizzle(reg, x, x, x, x);
+}
+
+static struct brw_fp_src src_abs( struct brw_fp_src src )
+{
+ src.negate = 0;
+ src.abs = 1;
+ return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+ src.negate = 1;
+ src.abs = 0;
+ return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+ unsigned nr,
+ float *v2,
+ unsigned *nr2,
+ unsigned *swizzle )
+{
+ unsigned i, j;
+
+ *swizzle = 0;
+
+ for (i = 0; i < nr; i++) {
+ boolean found = FALSE;
+
+ for (j = 0; j < *nr2 && !found; j++) {
+ if (v[i] == v2[j]) {
+ *swizzle |= j << (i * 2);
+ found = TRUE;
+ }
+ }
+
+ if (!found) {
+ if (*nr2 >= 4)
+ return FALSE;
+
+ v2[*nr2] = v[i];
+ *swizzle |= *nr2 << (i * 2);
+ (*nr2)++;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c,
+ const GLfloat *v,
+ unsigned nr)
+{
+ unsigned i, j;
+ unsigned swizzle;
+
+ /* Could do a first pass where we examine all existing immediates
+ * without expanding.
+ */
+
+ for (i = 0; i < c->nr_immediates; i++) {
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ if (c->nr_immediates < Elements(c->immediate)) {
+ i = c->nr_immediates++;
+ if (match_or_expand_immediate( v,
+ nr,
+ c->immediate[i].v,
+ &c->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ c->error = 1;
+ return src_undef();
+
+out:
+ /* Make sure that all referenced elements are from this immediate.
+ * Has the effect of making size-one immediates into scalars.
+ */
+ for (j = nr; j < 4; j++)
+ swizzle |= (swizzle & 0x3) << (j * 2);
+
+ return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+ GET_SWZ(swizzle, X),
+ GET_SWZ(swizzle, Y),
+ GET_SWZ(swizzle, Z),
+ GET_SWZ(swizzle, W) );
+}
+
+
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+ GLfloat f )
+{
+ return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+ GLfloat x,
+ GLfloat y,
+ GLfloat z,
+ GLfloat w)
+{
+ GLfloat f[4] = {x,y,z,w};
+ return src_imm(c, f, 4);
+}
+
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
+{
+ struct brw_fp_dst reg;
+ reg.file = file;
+ reg.index = idx;
+ reg.writemask = BRW_WRITEMASK_XYZW;
+ reg.indirect = 0;
+ return reg;
+}
+
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
+{
+ reg.writemask &= mask;
+ return reg;
+}
+
+static struct brw_fp_dst dst_undef( void )
+{
+ return dst_reg(TGSI_FILE_NULL, 0);
+}
+
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+ return dst.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+ reg.saturate = flag;
+ return reg;
+}
+
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
+{
+ int bit = ffs( ~c->fp_temp );
+
+ if (!bit) {
+ debug_printf("%s: out of temporaries\n", __FILE__);
+ }
+
+ c->fp_temp |= 1<<(bit-1);
+ return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
+{
+ c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
+}
+
+
+/***********************************************************************
+ * Instructions
+ */
+
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+ return &c->fp_instructions[c->nr_fp_insns++];
+}
+
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ GLuint tex_src_unit,
+ GLuint tex_src_target,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ struct brw_fp_instruction *inst = get_fp_inst(c);
+
+ inst->opcode = op;
+ inst->dst = dest;
+ inst->tex_unit = tex_src_unit;
+ inst->tex_target = tex_src_target;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+ return inst;
+}
+
+
+static INLINE void emit_op3(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
+{
+ emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1)
+{
+ emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest,
+ struct brw_fp_src src0)
+{
+ emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+ GLuint op,
+ struct brw_fp_dst dest)
+{
+ emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
+}
@@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = {
*/
static void emit_scalar_insn(struct brw_wm_compile *c,
unsigned opcode,
- struct brw_dst dst,
- struct brw_src src0,
- struct brw_src src1,
- struct brw_src src2 )
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1,
+ struct brw_fp_src src2 )
{
unsigned first_chan = ffs(dst.writemask) - 1;
unsigned first_mask = 1 << first_chan;
@@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
if (dst.writemask == 0)
return;
- emit_op( c, opcode,
- brw_writemask(dst, first_mask),
- src0, src1, src2 );
+ emit_op3( c, opcode,
+ dst_mask(dst, first_mask),
+ src0, src1, src2 );
if (dst.writemask != first_mask) {
emit_op1(c, TGSI_OPCODE_MOV,
- brw_writemask(dst, ~first_mask),
- src_swizzle1(brw_src(dst), first_chan));
+ dst_mask(dst, ~first_mask),
+ src_scalar(src_reg_from_dst(dst), first_chan));
}
}
@@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
* Special instructions for interpolation and other tasks
*/
-static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
{
- if (src_is_undef(c->pixel_xy)) {
- struct ureg_dst pixel_xy = get_temp(c);
- struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+ if (src_is_undef(c->fp_pixel_xy)) {
+ struct brw_fp_dst pixel_xy = get_temp(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
/* Emit the out calculations, and hold onto the results. Use
@@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
*/
/* pixel_xy.xy = PIXELXY payload[0];
*/
- emit_op(c,
- WM_PIXELXY,
- dst_mask(pixel_xy, BRW_WRITEMASK_XY),
- payload_r0_depth,
- src_undef(),
- src_undef());
+ emit_op1(c,
+ WM_PIXELXY,
+ dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+ payload_r0_depth);
- c->pixel_xy = src_reg_from_dst(pixel_xy);
+ c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
}
- return c->pixel_xy;
+ return c->fp_pixel_xy;
}
-static struct ureg_src get_delta_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
{
- if (src_is_undef(c->delta_xy)) {
- struct ureg_dst delta_xy = get_temp(c);
- struct ureg_src pixel_xy = get_pixel_xy(c);
- struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+ if (src_is_undef(c->fp_delta_xy)) {
+ struct brw_fp_dst delta_xy = get_temp(c);
+ struct brw_fp_src pixel_xy = get_pixel_xy(c);
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
/* deltas.xy = DELTAXY pixel_xy, payload[0]
*/
- emit_op(c,
+ emit_op3(c,
WM_DELTAXY,
dst_mask(delta_xy, BRW_WRITEMASK_XY),
pixel_xy,
payload_r0_depth,
src_undef());
- c->delta_xy = src_reg_from_dst(delta_xy);
+ c->fp_delta_xy = src_reg_from_dst(delta_xy);
}
- return c->delta_xy;
+ return c->fp_delta_xy;
}
-static struct ureg_src get_pixel_w( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
{
- if (src_is_undef(c->pixel_w)) {
- struct ureg_dst pixel_w = get_temp(c);
- struct ureg_src deltas = get_delta_xy(c);
- struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS);
+ if (src_is_undef(c->fp_pixel_w)) {
+ struct brw_fp_dst pixel_w = get_temp(c);
+ struct brw_fp_src deltas = get_delta_xy(c);
+
+ /* XXX: assuming position is always first -- valid?
+ */
+ struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
/* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
*/
- emit_op(c,
- WM_PIXELW,
- dst_mask(pixel_w, BRW_WRITEMASK_W),
- interp_wpos,
- deltas,
- src_undef());
+ emit_op3(c,
+ WM_PIXELW,
+ dst_mask(pixel_w, BRW_WRITEMASK_W),
+ interp_wpos,
+ deltas,
+ src_undef());
- c->pixel_w = src_reg_from_dst(pixel_w);
+ c->fp_pixel_w = src_reg_from_dst(pixel_w);
}
- return c->pixel_w;
+ return c->fp_pixel_w;
}
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
static void emit_interp( struct brw_wm_compile *c,
+ GLuint idx,
GLuint semantic,
- GLuint semantic_index,
GLuint interp_mode )
{
- struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
- struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx);
- struct ureg_src deltas = get_delta_xy(c);
+ struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+ struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+ struct brw_fp_src deltas = get_delta_xy(c);
/* Need to use PINTERP on attributes which have been
* multiplied by 1/W in the SF program, and LINTERP on those
* which have not:
*/
switch (semantic) {
- case FRAG_ATTRIB_WPOS:
+ case TGSI_SEMANTIC_POSITION:
/* Have to treat wpos.xy specially:
*/
emit_op1(c,
@@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c,
}
break;
- case FRAG_ATTRIB_FOGC:
+
+ case TGSI_SEMANTIC_FOG:
/* Interpolate the fog coordinate */
emit_op3(c,
WM_PINTERP,
@@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c,
get_pixel_w(c));
emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_YZ),
- brw_imm1f(0.0));
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_YZ),
+ src_imm1f(c, 0.0));
emit_op1(c,
- TGSI_OPCODE_MOV,
- dst_mask(dst, BRW_WRITEMASK_W),
- brw_imm1f(1.0));
+ TGSI_OPCODE_MOV,
+ dst_mask(dst, BRW_WRITEMASK_W),
+ src_imm1f(c, 1.0));
break;
- case FRAG_ATTRIB_FACE:
+ case TGSI_SEMANTIC_FACE:
/* XXX review/test this case */
emit_op0(c,
WM_FRONTFACING,
@@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op1(c,
TGSI_OPCODE_MOV,
dst_mask(dst, BRW_WRITEMASK_YZ),
- brw_imm1f(0.0));
+ src_imm1f(c, 0.0));
emit_op1(c,
TGSI_OPCODE_MOV,
dst_mask(dst, BRW_WRITEMASK_W),
- brw_imm1f(1.0));
+ src_imm1f(c, 1.0));
break;
- case FRAG_ATTRIB_PNTC:
+ case TGSI_SEMANTIC_PSIZE:
/* XXX review/test this case */
emit_op3(c,
WM_PINTERP,
@@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c,
emit_op1(c,
TGSI_OPCODE_MOV,
dst_mask(dst, BRW_WRITEMASK_Z),
- brw_imm1f(c->pass_fp, 0.0f));
+ src_imm1f(c, 0.0f));
emit_op1(c,
TGSI_OPCODE_MOV,
dst_mask(dst, BRW_WRITEMASK_W),
- brw_imm1f(c->pass_fp, 1.0f));
+ src_imm1f(c, 1.0f));
break;
default:
@@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c,
* Expand various instructions here to simpler forms.
*/
static void precalc_dst( struct brw_wm_compile *c,
- struct brw_dst dst,
- struct brw_src src0,
- struct brw_src src1 )
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0,
+ struct brw_fp_src src1 )
{
- if (dst.WriteMask & BRW_WRITEMASK_Y) {
+ if (dst.writemask & BRW_WRITEMASK_Y) {
/* dst.y = mul src0.y, src1.y
*/
emit_op2(c,
@@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c,
src1);
}
- if (dst.WriteMask & BRW_WRITEMASK_XZ) {
- struct prog_instruction *swz;
- GLuint z = GET_SWZ(src0.Swizzle, Z);
-
+ if (dst.writemask & BRW_WRITEMASK_XZ) {
/* dst.z = mov src0.zzzz
*/
emit_op1(c,
TGSI_OPCODE_MOV,
dst_mask(dst, BRW_WRITEMASK_Z),
- src_swizzle1(src0, Z));
+ src_scalar(src0, Z));
- /* dst.x = immf(1.0)
+ /* dst.x = imm1f(1.0)
*/
emit_op1(c,
TGSI_OPCODE_MOV,
- brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
- src_immf(c, 1.0));
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+ src_imm1f(c, 1.0));
}
- if (dst.WriteMask & BRW_WRITEMASK_W) {
+ if (dst.writemask & BRW_WRITEMASK_W) {
/* dst.w = mov src1.w
*/
emit_op1(c,
@@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c,
static void precalc_lit( struct brw_wm_compile *c,
- struct ureg_dst dst,
- struct ureg_src src0 )
+ struct brw_fp_dst dst,
+ struct brw_fp_src src0 )
{
- if (dst.WriteMask & BRW_WRITEMASK_XW) {
+ if (dst.writemask & BRW_WRITEMASK_XW) {
/* dst.xw = imm(1.0f)
*/
emit_op1(c,
TGSI_OPCODE_MOV,
- brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0),
- brw_imm1f(1.0f));
+ dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+ src_imm1f(c, 1.0f));
}
- if (dst.WriteMask & BRW_WRITEMASK_YZ) {
+ if (dst.writemask & BRW_WRITEMASK_YZ) {
emit_op1(c,
TGSI_OPCODE_LIT,
- brw_writemask(dst, BRW_WRITEMASK_YZ),
+ dst_mask(dst, BRW_WRITEMASK_YZ),
src0);
}
}
@@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c,
* instruction itself.
*/
static void precalc_tex( struct brw_wm_compile *c,
- struct brw_dst dst,
+ struct brw_fp_dst dst,
+ unsigned target,
unsigned unit,
- struct brw_src src0 )
+ struct brw_fp_src src0 )
{
- struct ureg_src coord = src_undef();
- struct ureg_dst tmp = dst_undef();
+ struct brw_fp_src coord = src_undef();
+ struct brw_fp_dst tmp = dst_undef();
assert(unit < BRW_MAX_TEX_UNIT);
/* Cubemap: find longest component of coord vector and normalize
* it.
*/
- if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
- struct ureg_src tmpsrc;
+ if (target == TGSI_TEXTURE_CUBE) {
+ struct brw_fp_src tmpsrc;
tmp = get_temp(c);
- tmpsrc = brw_src(tmpcoord)
+ tmpsrc = src_reg_from_dst(tmp);
/* tmp = abs(src0) */
emit_op1(c,
TGSI_OPCODE_MOV,
tmp,
- brw_abs(src0));
+ src_abs(src0));
/* tmp.X = MAX(tmp.X, tmp.Y) */
emit_op2(c, TGSI_OPCODE_MAX,
- brw_writemask(tmp, BRW_WRITEMASK_X),
- src_swizzle1(tmpsrc, X),
- src_swizzle1(tmpsrc, Y));
+ dst_mask(tmp, BRW_WRITEMASK_X),
+ src_scalar(tmpsrc, X),
+ src_scalar(tmpsrc, Y));
/* tmp.X = MAX(tmp.X, tmp.Z) */
emit_op2(c, TGSI_OPCODE_MAX,
- brw_writemask(tmp, BRW_WRITEMASK_X),
+ dst_mask(tmp, BRW_WRITEMASK_X),
tmpsrc,
- src_swizzle1(tmpsrc, Z));
+ src_scalar(tmpsrc, Z));
/* tmp.X = 1 / tmp.X */
emit_op1(c, TGSI_OPCODE_RCP,
@@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op2(c, TGSI_OPCODE_MUL,
tmp,
src0,
- src_swizzle1(tmpsrc, SWIZZLE_X));
+ src_scalar(tmpsrc, X));
coord = tmpsrc;
}
- else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+ else if (target == TGSI_TEXTURE_RECT ||
+ target == TGSI_TEXTURE_SHADOWRECT) {
/* XXX: need a mechanism for internally generated constants.
*/
coord = src0;
@@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c,
if (c->key.yuvtex_mask & (1 << unit)) {
/* convert ycbcr to RGBA */
GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
- struct ureg_dst dst = inst->DstReg;
- struct ureg_dst tmp = get_temp(c);
- struct ureg_src tmpsrc = src_reg_from_dst(tmp);
- struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 );
- struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 );
+ struct brw_fp_dst tmp = get_temp(c);
+ struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+ struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 );
+ struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
/* tmp = TEX ...
*/
emit_tex_op(c,
TGSI_OPCODE_TEX,
- brw_saturate(tmp, dst.Saturate),
+ dst_saturate(tmp, dst.saturate),
unit,
- inst->TexSrcTarget,
+ target,
coord,
src_undef(),
src_undef());
@@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c,
emit_op2(c, TGSI_OPCODE_MUL,
dst_mask(tmp, BRW_WRITEMASK_Y),
tmpsrc,
- src_swizzle1(C0, W));
+ src_scalar(C0, W));
/*
* if (UV swaped)
@@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c,
src_swizzle(tmpsrc, Z,Z,X,X) :
src_swizzle(tmpsrc, X,X,Z,Z)),
C1,
- src_swizzle1(tmpsrc, Y));
+ src_scalar(tmpsrc, Y));
/* RGB.y = MAD YUV.z, C1.w, RGB.y
*/
emit_op3(c,
TGSI_OPCODE_MAD,
dst_mask(dst, BRW_WRITEMASK_Y),
- src_swizzle1(tmpsrc, Z),
- src_swizzle1(C1, W),
- src_swizzle1(src_reg_from_dst(dst), Y));
+ src_scalar(tmpsrc, Z),
+ src_scalar(C1, W),
+ src_scalar(src_reg_from_dst(dst), Y));
release_temp(c, tmp);
}
@@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c,
/* ordinary RGBA tex instruction */
emit_tex_op(c,
TGSI_OPCODE_TEX,
- inst->DstReg,
+ dst,
unit,
- inst->TexSrcTarget,
+ target,
coord,
src_undef(),
src_undef());
@@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c,
/* Release this temp if we ended up allocating it:
*/
- if (!brw_dst_is_undef(tmpcoord))
- release_temp(c, tmpcoord);
+ if (!dst_is_undef(tmp))
+ release_temp(c, tmp);
}
@@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c,
* Check if the given TXP instruction really needs the divide-by-W step.
*/
static GLboolean projtex( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
+ unsigned target,
+ struct brw_fp_src src )
{
- const struct ureg_src src = inst->SrcReg[0];
- GLboolean retVal;
-
- assert(inst->Opcode == TGSI_OPCODE_TXP);
-
/* Only try to detect the simplest cases. Could detect (later)
* cases where we are trying to emit code like RCP {1.0}, MUL x,
* {1.0}, and so on.
@@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c,
* More complex cases than this typically only arise from
* user-provided fragment programs anyway:
*/
- if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
- retVal = GL_FALSE; /* ut2004 gun rendering !?! */
- else if (src.File == TGSI_FILE_INPUT &&
- GET_SWZ(src.Swizzle, W) == W &&
- (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
- retVal = GL_FALSE;
- else
- retVal = GL_TRUE;
-
- return retVal;
+ if (target == TGSI_TEXTURE_CUBE)
+ return GL_FALSE; /* ut2004 gun rendering !?! */
+
+ if (src.file == TGSI_FILE_INPUT &&
+ GET_SWZ(src.swizzle, W) == W &&
+ (c->key.proj_attrib_mask & (1 << src.index)) == 0)
+ return GL_FALSE;
+
+ return GL_TRUE;
}
@@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c,
* Emit code for TXP.
*/
static void precalc_txp( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
+ struct brw_fp_dst dst,
+ unsigned target,
+ unsigned unit,
+ struct brw_fp_src src0 )
{
- struct ureg_src src0 = inst->SrcReg[0];
-
- if (projtex(c, inst)) {
- struct ureg_dst tmp = get_temp(c);
- struct prog_instruction tmp_inst;
+ if (projtex(c, target, src0)) {
+ struct brw_fp_dst tmp = get_temp(c);
/* tmp0.w = RCP inst.arg[0][3]
*/
- emit_op(c,
+ emit_op1(c,
TGSI_OPCODE_RCP,
dst_mask(tmp, BRW_WRITEMASK_W),
- src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
- src_undef(),
- src_undef());
+ src_scalar(src0, W));
/* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
*/
- emit_op(c,
- TGSI_OPCODE_MUL,
- dst_mask(tmp, BRW_WRITEMASK_XYZ),
- src0,
- src_swizzle1(src_reg_from_dst(tmp), W),
- src_undef());
+ emit_op2(c,
+ TGSI_OPCODE_MUL,
+ dst_mask(tmp, BRW_WRITEMASK_XYZ),
+ src0,
+ src_scalar(src_reg_from_dst(tmp), W));
- /* dst = precalc(TEX tmp0)
+ /* dst = TEX tmp0
*/
- tmp_inst = *inst;
- tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
- precalc_tex(c, &tmp_inst);
+ precalc_tex(c,
+ dst,
+ target,
+ unit,
+ src_reg_from_dst(tmp));
release_temp(c, tmp);
}
else
{
- /* dst = precalc(TEX src0)
+ /* dst = TEX src0
*/
- precalc_tex(c, inst);
+ precalc_tex(c, dst, target, unit, src0);
}
}
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+ unsigned semantic,
+ unsigned index )
+{
+ const struct tgsi_shader_info *info = &c->fp->info;
+ unsigned i;
+
+ for (i = 0; i < info->num_outputs; i++)
+ if (info->output_semantic_name[i] == semantic &&
+ info->output_semantic_index[i] == index)
+ return src_reg( TGSI_FILE_OUTPUT, i );
+
+ /* If not found, return some arbitrary immediate value:
+ */
+ return src_imm1f(c, 1.0);
+}
+
static void emit_fb_write( struct brw_wm_compile *c )
{
- struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
- struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH);
- struct ureg_src outcolor;
- struct prog_instruction *inst;
+ struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+ struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
GLuint i;
- /* The inst->Aux field is used for FB write target and the EOT marker */
+ outdepth = src_scalar(outdepth, Z);
for (i = 0 ; i < c->key.nr_cbufs; i++) {
- outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+ struct brw_fp_src outcolor;
+ unsigned target = 1<<i;
- inst = emit_op(c, WM_FB_WRITE,
- dst_mask(dst_undef(), 0),
- outcolor,
- payload_r0_depth,
- outdepth);
+ /* Set EOT flag on last inst:
+ */
+ if (i == c->key.nr_cbufs - 1)
+ target |= 1;
+
+ outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
- inst->Aux = (i<<1);
+ /* Use emit_tex_op so that we can specify the inst->tex_target
+ * field, which is abused to contain the FB write target and the
+ * EOT marker
+ */
+ emit_tex_op(c, WM_FB_WRITE,
+ dst_undef(),
+ target,
+ 0,
+ outcolor,
+ payload_r0_depth,
+ outdepth);
}
-
- /* Set EOT flag on last inst:
- */
- inst->Aux |= 1; //eot
}
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+ const struct tgsi_full_dst_register *dst,
+ unsigned saturate )
+{
+ struct brw_fp_dst out;
+
+ out.file = dst->DstRegister.File;
+ out.index = dst->DstRegister.Index;
+ out.writemask = dst->DstRegister.WriteMask;
+ out.indirect = dst->DstRegister.Indirect;
+ out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+
+ if (out.indirect) {
+ assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
+ assert(dst->DstRegisterInd.Index == 0);
+ }
+
+ return out;
+}
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
-
-static void validate_src_regs( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+ const struct tgsi_full_src_register *src )
{
- GLuint nr_args = brw_wm_nr_args( inst->Opcode );
- GLuint i;
+ struct brw_fp_src out;
+
+ out.file = src->SrcRegister.File;
+ out.index = src->SrcRegister.Index;
+ out.indirect = src->SrcRegister.Indirect;
+
+ out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
+ (src->SrcRegister.SwizzleY << 2) |
+ (src->SrcRegister.SwizzleZ << 4) |
+ (src->SrcRegister.SwizzleW << 6));
+
+ switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ out.abs = 1;
+ out.negate = 0;
+ break;
- for (i = 0; i < nr_args; i++) {
- if (inst->SrcReg[i].File == TGSI_FILE_INPUT) {
- GLuint idx = inst->SrcReg[i].Index;
- if (!(c->fp_interp_emitted & (1<<idx))) {
- emit_interp(c, idx);
- c->fp_interp_emitted |= 1<<idx;
- }
- }
+ case TGSI_UTIL_SIGN_SET:
+ out.abs = 1;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ out.abs = 0;
+ out.negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ default:
+ out.abs = 0;
+ out.negate = 0;
+ break;
}
-}
-
-static void validate_dst_regs( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- if (inst->DstReg.File == TGSI_FILE_OUTPUT) {
- GLuint idx = inst->DstReg.Index;
- if (idx == FRAG_RESULT_COLOR)
- c->fp_fragcolor_emitted |= inst->DstReg.WriteMask;
+
+ if (out.indirect) {
+ assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
+ assert(src->SrcRegisterInd.Index == 0);
}
+
+ return out;
}
@@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c,
static void emit_insn( struct brw_wm_compile *c,
const struct tgsi_full_instruction *inst )
{
-
- switch (inst->Opcode) {
+ unsigned opcode = inst->Instruction.Opcode;
+ struct brw_fp_dst dst;
+ struct brw_fp_src src[3];
+ int i;
+
+ dst = translate_dst( c, &inst->FullDstRegisters[0],
+ inst->Instruction.Saturate );
+
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+ src[i] = translate_src( c, &inst->FullSrcRegisters[0] );
+
+ switch (opcode) {
case TGSI_OPCODE_ABS:
emit_op1(c, TGSI_OPCODE_MOV,
dst,
- brw_abs(src[0]));
+ src_abs(src[0]));
break;
case TGSI_OPCODE_SUB:
emit_op2(c, TGSI_OPCODE_ADD,
dst,
src[0],
- brw_negate(src[1]));
+ src_negate(src[1]));
break;
case TGSI_OPCODE_SCS:
emit_op1(c, TGSI_OPCODE_SCS,
- brw_writemask(dst, BRW_WRITEMASK_XY),
+ dst_mask(dst, BRW_WRITEMASK_XY),
src[0]);
break;
case TGSI_OPCODE_DST:
- precalc_dst(c, inst);
+ precalc_dst(c, dst, src[0], src[1]);
break;
case TGSI_OPCODE_LIT:
- precalc_lit(c, inst);
+ precalc_lit(c, dst, src[0]);
break;
case TGSI_OPCODE_TEX:
- precalc_tex(c, inst);
+ precalc_tex(c, dst,
+ inst->InstructionExtTexture.Texture,
+ src[0].file, /* sampler unit */
+ src[1] );
break;
case TGSI_OPCODE_TXP:
- precalc_txp(c, inst);
+ precalc_txp(c, dst,
+ inst->InstructionExtTexture.Texture,
+ src[0].file, /* sampler unit */
+ src[1] );
break;
case TGSI_OPCODE_TXB:
- out = emit_insn(c, inst);
- out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
- assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+ /* XXX: TXB not done
+ */
+ precalc_tex(c, dst,
+ inst->InstructionExtTexture.Texture,
+ src[0].file, /* sampler unit */
+ src[1] );
break;
case TGSI_OPCODE_XPD:
emit_op2(c, TGSI_OPCODE_XPD,
- brw_writemask(dst, BRW_WRITEMASK_XYZ),
+ dst_mask(dst, BRW_WRITEMASK_XYZ),
src[0],
src[1]);
break;
case TGSI_OPCODE_KIL:
emit_op1(c, TGSI_OPCODE_KIL,
- brw_writemask(dst_undef(), 0),
+ dst_mask(dst_undef(), 0),
src[0]);
break;
@@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c,
emit_fb_write(c);
break;
default:
- if (brw_wm_is_scalar_result(inst->Opcode))
+ if (!c->key.has_flow_control &&
+ brw_wm_is_scalar_result(opcode))
emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
else
- emit_op(c, opcode, dst, src[0], src[1], src[2]);
+ emit_op3(c, opcode, dst, src[0], src[1], src[2]);
break;
}
}
@@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c,
* Initial pass for fragment program code generation.
* This function is used by both the GLSL and non-GLSL paths.
*/
-void brw_wm_pass_fp( struct brw_wm_compile *c )
+int brw_wm_pass_fp( struct brw_wm_compile *c )
{
- struct brw_fragment_program *fp = c->fp;
- GLuint insn;
+ struct brw_fragment_shader *fs = c->fp;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *inst;
+ struct tgsi_full_declaration *decl;
+ const float *imm;
+ GLuint size;
+ GLuint i;
if (BRW_DEBUG & DEBUG_WM) {
debug_printf("pre-fp:\n");
- tgsi_dump(fp->tokens, 0);
+ tgsi_dump(fs->tokens, 0);
}
- c->pixel_xy = brw_src_undef();
- c->delta_xy = brw_src_undef();
- c->pixel_w = brw_src_undef();
+ c->fp_pixel_xy = src_undef();
+ c->fp_delta_xy = src_undef();
+ c->fp_pixel_w = src_undef();
c->nr_fp_insns = 0;
- c->fp->tex_units_used = 0x0;
+ c->nr_immediates = 0;
/* Loop over all instructions doing assorted simplifications and
* transformations.
*/
- tgsi_parse_init( &parse, tokens );
+ tgsi_parse_init( &parse, fs->tokens );
while( !tgsi_parse_end_of_tokens( &parse ) ) {
tgsi_parse_token( &parse );
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- /* If branching shader, emit preamble instructions at decl time, as
- * instruction order in the shader does not correspond to the order
- * instructions are executed in the wild.
- *
- * This is where special instructions such as WM_CINTERP,
- * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute
- * shader inputs from varying vars.
+ /* Turn intput declarations into special WM_* instructions.
*
* XXX: For non-branching shaders, consider deferring variable
* initialization as late as possible to minimize register
* usage. This is how the original BRW driver worked.
+ *
+ * In a branching shader, must preamble instructions at decl
+ * time, as instruction order in the shader does not
+ * correspond to the order instructions are executed in the
+ * wild.
+ *
+ * This is where special instructions such as WM_CINTERP,
+ * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+ * compute shader inputs from the payload registers and pixel
+ * position.
*/
- validate_src_regs(c, inst);
- validate_dst_regs(c, inst);
+ decl = &parse.FullToken.FullDeclaration;
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ unsigned attrib;
+
+ first = decl->DeclarationRange.First;
+ last = decl->DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ for (attrib = first; attrib <= last; attrib++) {
+ emit_interp(c,
+ attrib,
+ decl->Semantic.SemanticName,
+ decl->Declaration.Interpolate );
+ }
+ }
+
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
* float value per instruction. Just save the data for now
* and use directly later.
*/
+ i = c->nr_immediates++;
+ imm = &parse.FullToken.FullImmediate.u[i].Float;
+ size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+ if (c->nr_immediates >= BRW_WM_MAX_CONST)
+ return PIPE_ERROR_OUT_OF_MEMORY;
+
+ for (i = 0; i < size; i++)
+ c->immediate[c->nr_immediates].v[i] = imm[i];
+
+ for (; i < 4; i++)
+ c->immediate[c->nr_immediates].v[i] = 0.0;
+
+ c->immediate[c->nr_immediates].nr = size;
+ c->nr_immediates++;
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
inst = &parse.FullToken.FullInstruction;
- emit_insn( c, inst );
+ emit_insn(c, inst);
break;
}
}
- c->brw_program = brw_finalize( c->builder );
-
if (BRW_DEBUG & DEBUG_WM) {
debug_printf("pass_fp:\n");
- brw_print_program( c->brw_program );
+ //brw_print_program( c->fp_brw_program );
debug_printf("\n");
}
+
+ return c->error;
}