diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_glsl.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 628 |
1 files changed, 428 insertions, 200 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 4cf092226c..575cd45d57 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -192,28 +192,42 @@ static void prealloc_reg(struct brw_wm_compile *c) /* constants */ { const int nr_params = c->fp->program.Base.Parameters->NumParameters; - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + + /* use a real constant buffer, or just use a section of the GRF? */ + c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + + if (c->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } } /* fragment shader inputs */ @@ -234,6 +248,81 @@ static void prealloc_reg(struct brw_wm_compile *c) c->reg_index++; c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); c->reg_index += 2; + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->use_const_buffer) { + c->current_const[0].reg = alloc_tmp(c); + c->current_const[1].reg = alloc_tmp(c); + c->current_const[2].reg = alloc_tmp(c); + } + /* + printf("USE CONST BUFFER? %d\n", c->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + */ +} + + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + if (c->current_const[i].index != src->Index) { + + c->current_const[i].index = src->Index; + /*c->current_const[i].reg = alloc_tmp(c);*/ + + /* + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); + */ + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + 1, /* msg_reg */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + BRW_WM_MAX_SURF - 1 /* binding table index */ + ); + +#if 0 + /* dependency stall */ + { + int response_length = 1; + int mark = mark_tmps( c ); + struct brw_reg src = c->current_const[i].reg; + struct brw_reg tmp = alloc_tmp(c); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, tmp, src); + brw_MOV(p, src, tmp); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } +#endif + } + } + } } @@ -241,24 +330,112 @@ static void prealloc_reg(struct brw_wm_compile *c) * Convert Mesa dst register to brw register. */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->NegateBase) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + + /* + printf(" form const[%d] for arg %d, comp %d, reg %d\n", + c->current_const[srcRegIndex].index, + srcRegIndex, + component, + const_reg.nr); + */ + + return const_reg; +} + + /** * Convert Mesa src register to brw register. */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) -{ - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + if (c->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->NegateBase, src->Abs); + } +} + + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->NegateBase) + value = -value; + if (src->Abs) + value = FABSF(value); + /*printf(" form imm reg %f\n", value);*/ + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } } + /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -332,8 +509,8 @@ static void emit_abs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, brw_abs(src)); } } @@ -350,8 +527,8 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_RNDZ(p, dst, src); } } @@ -368,8 +545,8 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg_imm(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -386,8 +563,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -414,10 +591,10 @@ static void emit_delta_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -482,7 +659,7 @@ static void emit_fb_write(struct brw_wm_compile *c, brw_push_insn_state(p); for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + src0 = get_src_reg(c, inst, 0, channel); /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ brw_MOV(p, brw_message_reg(nr + channel), src0); @@ -493,11 +670,11 @@ static void emit_fb_write(struct brw_wm_compile *c, if (c->key.source_depth_to_render_target) { if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + src0 = get_src_reg(c, inst, 2, 2); brw_MOV(p, brw_message_reg(nr), src0); } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + src0 = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src0); } @@ -524,7 +701,7 @@ static void emit_fb_write(struct brw_wm_compile *c, else #endif { - struct brw_reg src = get_src_reg(c, &inst->SrcReg[1], 1, 1); + struct brw_reg src = get_src_reg(c, inst, 1, 1); brw_MOV(p, brw_message_reg(nr), src); } nr += 2; @@ -544,10 +721,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -575,9 +752,9 @@ static void emit_linterp(struct brw_wm_compile *c, struct brw_reg src0; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -587,7 +764,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); } @@ -604,7 +781,7 @@ static void emit_cinterp(struct brw_wm_compile *c, struct brw_reg dst, src0; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -614,7 +791,7 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i],3)); } } @@ -631,10 +808,10 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); @@ -644,7 +821,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); @@ -653,6 +830,36 @@ static void emit_pinterp(struct brw_wm_compile *c, } } +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + static void emit_xpd(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -664,12 +871,12 @@ static void emit_xpd(struct brw_wm_compile *c, GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -685,11 +892,11 @@ static void emit_dp3(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -704,10 +911,10 @@ static void emit_dp4(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -723,10 +930,10 @@ static void emit_dph(struct brw_wm_compile *c, int i; struct brw_compile *p = &c->func; for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -751,7 +958,7 @@ static void emit_math1(struct brw_wm_compile *c, tmp = alloc_tmp(c); /* Get first component of source register */ - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); @@ -769,7 +976,7 @@ static void emit_math1(struct brw_wm_compile *c, /* replicate tmp value across enabled dest channels */ for (i = 0; i < 4; i++) { if (inst->DstReg.WriteMask & (1 << i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, tmp); } } @@ -823,15 +1030,28 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -842,9 +1062,9 @@ static void emit_sub(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, negate(src1)); } } @@ -861,9 +1081,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -880,8 +1100,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -899,68 +1119,61 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); } -static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} -static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_min_max(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); + release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, @@ -968,9 +1181,9 @@ static void emit_pow(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -995,10 +1208,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -1006,7 +1219,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -1048,10 +1261,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -1071,9 +1284,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1130,8 +1343,8 @@ static void emit_ddx(struct brw_wm_compile *c, struct brw_reg dst; struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + src0 = get_src_reg(c, inst, 0, 0); + w = get_src_reg(c, inst, 1, 3); nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -1140,7 +1353,7 @@ static void emit_ddx(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, interp[i]); brw_MUL(p, dst, dst, w); } @@ -1158,9 +1371,9 @@ static void emit_ddy(struct brw_wm_compile *c, struct brw_reg src0, w; GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); + src0 = get_src_reg(c, inst, 0, 0); nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); + w = get_src_reg(c, inst, 1, 3); interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); interp[2] = brw_vec1_grf(nr+1, 0); @@ -1168,7 +1381,7 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i], 1)); brw_MUL(p, dst, dst, w); } @@ -1302,7 +1515,7 @@ static void emit_noise1( struct brw_wm_compile *c, assert( mark == 0 ); - src = get_src_reg( c, inst->SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1314,7 +1527,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param ); } } @@ -1472,8 +1685,8 @@ static void emit_noise2( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1487,7 +1700,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1775,9 +1988,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1793,7 +2006,7 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2198,10 +2411,10 @@ static void emit_noise4( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); - src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -2219,7 +2432,7 @@ static void emit_noise4( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2236,11 +2449,11 @@ static void emit_wpos_xy(struct brw_wm_compile *c, GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -2263,7 +2476,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, struct prog_instruction *inst) @@ -2271,19 +2484,20 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -2297,28 +2511,28 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), src[2]); break; } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2327,10 +2541,9 @@ static void emit_tex(struct brw_wm_compile *c, payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -2349,6 +2562,7 @@ static void emit_tex(struct brw_wm_compile *c, } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) @@ -2359,26 +2573,27 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + unit + MAX_DRAW_BUFFERS, /* surface */ + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0); /* eot */ if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + /** * Resolve subroutine calls after code emit is done. */ @@ -2410,6 +2625,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + /* + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); + */ + + /* fetch any constants that this instruction needs */ + if (c->use_const_buffer) + fetch_constants(c, inst); + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2435,12 +2659,18 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FB_WRITE: emit_fb_write(c, inst); break; + case WM_FRONTFACING: + emit_frontfacing(c, inst); + break; case OPCODE_ABS: emit_abs(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2489,11 +2719,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_lg2(c, inst); break; - case OPCODE_MAX: - emit_max(c, inst); - break; case OPCODE_MIN: - emit_min(c, inst); + case OPCODE_MAX: + emit_min_max(c, inst); break; case OPCODE_DDX: emit_ddx(c, inst); |