summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/i965simple/brw_wm_decl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/i965simple/brw_wm_decl.c')
-rw-r--r--src/gallium/drivers/i965simple/brw_wm_decl.c392
1 files changed, 392 insertions, 0 deletions
diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c
new file mode 100644
index 0000000000..d50e66f613
--- /dev/null
+++ b/src/gallium/drivers/i965simple/brw_wm_decl.c
@@ -0,0 +1,392 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+ c->tmp_index++;
+ c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index);
+ return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+ c->tmp_index = 0;
+}
+
+
+
+static int is_null( struct brw_reg reg )
+{
+ return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+ reg.nr == BRW_ARF_NULL);
+}
+
+static void emit_pixel_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_xy[0])) {
+
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+
+ c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+ c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+
+ /* Calculate pixel centers by adding 1 or 0 to each of the
+ * micro-tile coordinates passed in r1.
+ */
+ brw_ADD(p,
+ c->pixel_xy[0],
+ stride(suboffset(r1_uw, 4), 2, 4, 0),
+ brw_imm_v(0x10101010));
+
+ brw_ADD(p,
+ c->pixel_xy[1],
+ stride(suboffset(r1_uw, 5), 2, 4, 0),
+ brw_imm_v(0x11001100));
+ }
+}
+
+
+
+
+
+
+static void emit_delta_xy( struct brw_wm_compile *c )
+{
+ if (is_null(c->delta_xy[0])) {
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+ emit_pixel_xy(c);
+
+ c->delta_xy[0] = alloc_tmp(c);
+ c->delta_xy[1] = alloc_tmp(c);
+
+ /* Calc delta X,Y by subtracting origin in r1 from the pixel
+ * centers.
+ */
+ brw_ADD(p,
+ c->delta_xy[0],
+ retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
+ negate(r1));
+
+ brw_ADD(p,
+ c->delta_xy[1],
+ retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
+ negate(suboffset(r1,1)));
+ }
+}
+
+
+
+#if 0
+static void emit_pixel_w( struct brw_wm_compile *c )
+{
+ if (is_null(c->pixel_w)) {
+ struct brw_compile *p = &c->func;
+
+ struct brw_reg interp_wpos = c->coef_wpos;
+
+ c->pixel_w = alloc_tmp(c);
+
+ emit_delta_xy(c);
+
+ /* Calc 1/w - just linterp wpos[3] optimized by putting the
+ * result straight into a message reg.
+ */
+ struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
+ brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
+ brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
+
+ /* Calc w */
+ brw_math_16( p,
+ c->pixel_w,
+ BRW_MATH_FUNCTION_INV,
+ BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_PRECISION_FULL);
+ }
+}
+#endif
+
+
+static void emit_cinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_MOV(p, dst, suboffset(interp[i],3));
+ }
+ }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ emit_delta_xy(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ }
+ }
+}
+
+#if 0
+static void emit_pinterp(struct brw_wm_compile *c,
+ int idx,
+ int mask )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg interp[4];
+ struct brw_reg coef = c->payload_coef[idx];
+ int i;
+
+ get_delta_xy(c);
+ get_pixel_w(c);
+
+ interp[0] = brw_vec1_grf(coef.nr, 0);
+ interp[1] = brw_vec1_grf(coef.nr, 4);
+ interp[2] = brw_vec1_grf(coef.nr+1, 0);
+ interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+ for(i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
+ brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+ brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+ brw_MUL(p, dst, dst, c->pixel_w);
+ }
+ }
+}
+#endif
+
+
+
+#if 0
+static void emit_wpos( )
+{
+ struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+ struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+ struct tgsi_full_src_register deltas = get_delta_xy(c);
+ struct tgsi_full_src_register arg2;
+ unsigned opcode;
+
+ opcode = WM_LINTERP;
+ arg2 = src_undef();
+
+ /* Have to treat wpos.xy specially:
+ */
+ emit_op(c,
+ WM_WPOSXY,
+ dst_mask(dst, WRITEMASK_XY),
+ 0, 0, 0,
+ get_pixel_xy(c),
+ src_undef(),
+ src_undef());
+
+ dst = dst_mask(dst, WRITEMASK_ZW);
+
+ /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+ */
+ emit_op(c,
+ WM_LINTERP,
+ dst,
+ 0, 0, 0,
+ interp,
+ deltas,
+ arg2);
+}
+#endif
+
+
+
+
+/* Perform register allocation:
+ *
+ * -- r0???
+ * -- passthrough depth regs (and stencil/aa??)
+ * -- curbe ??
+ * -- inputs (coefficients)
+ *
+ * Use a totally static register allocation. This will perform poorly
+ * but is an easy way to get started (again).
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+ int i, j;
+ int nr_curbe_regs = 0;
+
+ /* R0, then some depth related regs:
+ */
+ for (i = 0; i < c->key.nr_depth_regs; i++) {
+ c->payload_depth[i] = brw_vec8_grf(i*2, 0);
+ c->reg_index += 2;
+ }
+
+
+ /* Then a copy of our part of the CURBE entry:
+ */
+ {
+ int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ int index = 0;
+
+ /* XXX number of constants, or highest numbered constant? */
+ assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]);
+
+ c->prog_data.max_const = 4*nr_constants;
+ for (i = 0; i < nr_constants; i++) {
+ for (j = 0; j < 4; j++, index++)
+ c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
+ index%8);
+ }
+
+ nr_curbe_regs = 2*((4*nr_constants+15)/16);
+ c->reg_index += nr_curbe_regs;
+ }
+
+ /* Adjust for parameter coefficients for position, which are
+ * currently always provided.
+ */
+// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0);
+ c->reg_index += 2;
+
+ /* Next we receive the plane coefficients for parameter
+ * interpolation:
+ */
+ assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs);
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
+ c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
+ c->reg_index += 2;
+ }
+
+ c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2;
+ c->prog_data.curb_read_length = nr_curbe_regs;
+
+ /* That's the end of the payload, now we can start allocating registers.
+ */
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index++;
+
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+ c->reg_index += 2;
+
+ /* Now allocate room for the interpolated inputs and staging
+ * registers for the outputs:
+ */
+ /* XXX do we want to loop over the _number_ of inputs/outputs or loop
+ * to the highest input/output index that's used?
+ * Probably the same, actually.
+ */
+ assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs);
+ assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs);
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++)
+ for (j = 0; j < 4; j++)
+ c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+ /* Beyond this we should only need registers for internal temporaries:
+ */
+ c->tmp_start = c->reg_index;
+}
+
+
+
+
+
+/* Need to interpolate fragment program inputs in as a preamble to the
+ * shader. A more sophisticated compiler would do this on demand, but
+ * we'll do it up front:
+ */
+void brw_wm_emit_decls(struct brw_wm_compile *c)
+{
+ struct tgsi_parse_context parse;
+ int done = 0;
+
+ prealloc_reg(c);
+
+ tgsi_parse_init( &parse, c->fp->program.tokens );
+
+ while( !done &&
+ !tgsi_parse_end_of_tokens( &parse ) )
+ {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+ unsigned first = decl->DeclarationRange.First;
+ unsigned last = decl->DeclarationRange.Last;
+ unsigned mask = decl->Declaration.UsageMask; /* ? */
+ unsigned i;
+
+ if (decl->Declaration.File != TGSI_FILE_INPUT)
+ break;
+
+ for( i = first; i <= last; i++ ) {
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_cinterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_linterp(c, i, mask);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ //emit_pinterp(c, i, mask);
+ emit_linterp(c, i, mask);
+ break;
+ }
+ }
+ break;
+ }
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ default:
+ done = 1;
+ break;
+ }
+ }
+
+ tgsi_parse_free (&parse);
+
+ release_tmps(c);
+}