#include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" #include "util/u_math.h" #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { c->tmp_index++; c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); } static void release_tmps(struct brw_wm_compile *c) { c->tmp_index = 0; } static int is_null( struct brw_reg reg ) { return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && reg.nr == BRW_ARF_NULL); } static void emit_pixel_xy( struct brw_wm_compile *c ) { if (is_null(c->pixel_xy[0])) { struct brw_compile *p = &c->func; struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ brw_ADD(p, c->pixel_xy[0], stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); brw_ADD(p, c->pixel_xy[1], stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } } static void emit_delta_xy( struct brw_wm_compile *c ) { if (is_null(c->delta_xy[0])) { struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); emit_pixel_xy(c); c->delta_xy[0] = alloc_tmp(c); c->delta_xy[1] = alloc_tmp(c); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ brw_ADD(p, c->delta_xy[0], retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), negate(r1)); brw_ADD(p, c->delta_xy[1], retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), negate(suboffset(r1,1))); } } #if 0 static void emit_pixel_w( struct brw_wm_compile *c ) { if (is_null(c->pixel_w)) { struct brw_compile *p = &c->func; struct brw_reg interp_wpos = c->coef_wpos; c->pixel_w = alloc_tmp(c); emit_delta_xy(c); /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); /* Calc w */ brw_math_16( p, c->pixel_w, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, 2, brw_null_reg(), BRW_MATH_PRECISION_FULL); } } #endif static void emit_cinterp(struct brw_wm_compile *c, int idx, int mask ) { struct brw_compile *p = &c->func; struct brw_reg interp[4]; struct brw_reg coef = c->payload_coef[idx]; int i; interp[0] = brw_vec1_grf(coef.nr, 0); interp[1] = brw_vec1_grf(coef.nr, 4); interp[2] = brw_vec1_grf(coef.nr+1, 0); interp[3] = brw_vec1_grf(coef.nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; brw_MOV(p, dst, suboffset(interp[i],3)); } } } static void emit_linterp(struct brw_wm_compile *c, int idx, int mask ) { struct brw_compile *p = &c->func; struct brw_reg interp[4]; struct brw_reg coef = c->payload_coef[idx]; int i; emit_delta_xy(c); interp[0] = brw_vec1_grf(coef.nr, 0); interp[1] = brw_vec1_grf(coef.nr, 4); interp[2] = brw_vec1_grf(coef.nr+1, 0); interp[3] = brw_vec1_grf(coef.nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<wm_regs[TGSI_FILE_INPUT][idx][i]; brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); } } } #if 0 static void emit_pinterp(struct brw_wm_compile *c, int idx, int mask ) { struct brw_compile *p = &c->func; struct brw_reg interp[4]; struct brw_reg coef = c->payload_coef[idx]; int i; get_delta_xy(c); get_pixel_w(c); interp[0] = brw_vec1_grf(coef.nr, 0); interp[1] = brw_vec1_grf(coef.nr, 4); interp[2] = brw_vec1_grf(coef.nr+1, 0); interp[3] = brw_vec1_grf(coef.nr+1, 4); for(i = 0; i < 4; i++ ) { if (mask & (1<delta_xy[0]); brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); brw_MUL(p, dst, dst, c->pixel_w); } } } #endif #if 0 static void emit_wpos( ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct tgsi_full_src_register deltas = get_delta_xy(c); struct tgsi_full_src_register arg2; unsigned opcode; opcode = WM_LINTERP; arg2 = src_undef(); /* Have to treat wpos.xy specially: */ emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), 0, 0, 0, get_pixel_xy(c), src_undef(), src_undef()); dst = dst_mask(dst, WRITEMASK_ZW); /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ emit_op(c, WM_LINTERP, dst, 0, 0, 0, interp, deltas, arg2); } #endif /* Perform register allocation: * * -- r0??? * -- passthrough depth regs (and stencil/aa??) * -- curbe ?? * -- inputs (coefficients) * * Use a totally static register allocation. This will perform poorly * but is an easy way to get started (again). */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; int nr_curbe_regs = 0; /* R0, then some depth related regs: */ for (i = 0; i < c->key.nr_depth_regs; i++) { c->payload_depth[i] = brw_vec8_grf(i*2, 0); c->reg_index += 2; } /* Then a copy of our part of the CURBE entry: */ { int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; int index = 0; /* XXX number of constants, or highest numbered constant? */ assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); c->prog_data.max_const = 4*nr_constants; for (i = 0; i < nr_constants; i++) { for (j = 0; j < 4; j++, index++) c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, index%8); } nr_curbe_regs = 2*((4*nr_constants+15)/16); c->reg_index += nr_curbe_regs; } /* Adjust for parameter coefficients for position, which are * currently always provided. */ // c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); c->reg_index += 2; /* Next we receive the plane coefficients for parameter * interpolation: */ assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); c->reg_index += 2; } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; c->prog_data.curb_read_length = nr_curbe_regs; /* That's the end of the payload, now we can start allocating registers. */ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; /* Now allocate room for the interpolated inputs and staging * registers for the outputs: */ /* XXX do we want to loop over the _number_ of inputs/outputs or loop * to the highest input/output index that's used? * Probably the same, actually. */ assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) for (j = 0; j < 4; j++) c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); /* Beyond this we should only need registers for internal temporaries: */ c->tmp_start = c->reg_index; } /* Need to interpolate fragment program inputs in as a preamble to the * shader. A more sophisticated compiler would do this on demand, but * we'll do it up front: */ void brw_wm_emit_decls(struct brw_wm_compile *c) { struct tgsi_parse_context parse; int done = 0; prealloc_reg(c); tgsi_parse_init( &parse, c->fp->program.tokens ); while( !done && !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; unsigned first = decl->DeclarationRange.First; unsigned last = decl->DeclarationRange.Last; unsigned mask = decl->Declaration.UsageMask; /* ? */ unsigned i; if (decl->Declaration.File != TGSI_FILE_INPUT) break; for( i = first; i <= last; i++ ) { switch (decl->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: emit_cinterp(c, i, mask); break; case TGSI_INTERPOLATE_LINEAR: emit_linterp(c, i, mask); break; case TGSI_INTERPOLATE_PERSPECTIVE: //emit_pinterp(c, i, mask); emit_linterp(c, i, mask); break; } } break; } case TGSI_TOKEN_TYPE_IMMEDIATE: case TGSI_TOKEN_TYPE_INSTRUCTION: default: done = 1; break; } } tgsi_parse_free (&parse); release_tmps(c); }