summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_wm_glsl.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_glsl.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c1467
1 files changed, 995 insertions, 472 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 8fd776ac39..c9fe1dd8ad 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -1,5 +1,7 @@
#include "main/macros.h"
#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@@ -8,21 +10,28 @@ enum _subroutine {
SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
};
-/* Only guess, need a flag in gl_fragment_program later */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint component);
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
+
for (i = 0; i < fp->Base.NumInstructions; i++) {
- struct prog_instruction *inst = &fp->Base.Instructions[i];
+ const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
+ case OPCODE_ARL:
case OPCODE_IF:
- case OPCODE_TRUNC:
case OPCODE_ENDIF:
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
- case OPCODE_DDX:
- case OPCODE_DDY:
case OPCODE_NOISE1:
case OPCODE_NOISE2:
case OPCODE_NOISE3:
@@ -36,6 +45,87 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
return GL_FALSE;
}
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+ c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+ /*assert(c->used_grf[r]);*/
+ c->used_grf[r] = GL_FALSE;
+ c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+ GLuint r;
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ /* no free temps, try to reclaim some */
+ reclaim_temps(c);
+ c->first_free_grf = 0;
+
+ /* try alloc again */
+ for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+ if (!c->used_grf[r]) {
+ c->used_grf[r] = GL_TRUE;
+ c->first_free_grf = r + 1; /* a guess */
+ return r;
+ }
+ }
+
+ for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+ assert(c->used_grf[r]);
+ }
+
+ /* really, no free GRF regs found */
+ if (!c->out_of_regs) {
+ /* print warning once per compilation */
+ _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+ c->out_of_regs = GL_TRUE;
+ }
+
+ return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+ int r;
+ for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+ if (c->used_grf[r])
+ return r + 1;
+ return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
static void set_reg(struct brw_wm_compile *c, int file, int index,
int component, struct brw_reg reg)
{
@@ -43,25 +133,32 @@ static void set_reg(struct brw_wm_compile *c, int file, int index,
c->wm_regs[file][index][component].inited = GL_TRUE;
}
-static int get_scalar_dst_index(struct prog_instruction *inst)
-{
- int i;
- for (i = 0; i < 4; i++)
- if (inst->DstReg.WriteMask & (1<<i))
- break;
- return i;
-}
-
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
- if(c->tmp_index == c->tmp_max)
- c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
-
+
+ /* if we need to allocate another temp, grow the tmp_regs[] array */
+ if (c->tmp_index == c->tmp_max) {
+ int r = alloc_grf(c);
+ if (r < 0) {
+ /*printf("Out of temps in %s\n", __FUNCTION__);*/
+ r = 50; /* XXX random register! */
+ }
+ c->tmp_regs[ c->tmp_max++ ] = r;
+ }
+
+ /* form the GRF register */
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+ /*printf("alloc_temp %d\n", reg.nr);*/
+ assert(reg.nr < BRW_WM_MAX_GRF);
return reg;
+
}
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
static int mark_tmps(struct brw_wm_compile *c)
{
return c->tmp_index;
@@ -77,8 +174,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark)
c->tmp_index = mark;
}
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers. We allocate the hardware registers as needed here.
+ *
+ * \param file register file, one of PROGRAM_x
+ * \param index register number
+ * \param component src component (X=0, Y=1, Z=2, W=3)
+ * \param nr not used?!?
+ * \param neg negate value?
+ * \param abs take absolute value?
+ */
static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs)
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+ int nr, GLuint neg, GLuint abs)
{
struct brw_reg reg;
switch (file) {
@@ -89,21 +200,40 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
break;
case PROGRAM_UNDEFINED:
return brw_null_reg();
- default:
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ case PROGRAM_PAYLOAD:
break;
+ default:
+ _mesa_problem(NULL, "Unexpected file in get_reg()");
+ return brw_null_reg();
}
- if(c->wm_regs[file][index][component].inited)
- reg = c->wm_regs[file][index][component].reg;
- else
- reg = brw_vec8_grf(c->reg_index, 0);
+ assert(index < 256);
+ assert(component < 4);
- if(!c->wm_regs[file][index][component].inited) {
- set_reg(c, file, index, component, reg);
- c->reg_index++;
+ /* see if we've already allocated a HW register for this Mesa register */
+ if (c->wm_regs[file][index][component].inited) {
+ /* yes, re-use */
+ reg = c->wm_regs[file][index][component].reg;
+ }
+ else {
+ /* no, allocate new register */
+ int grf = alloc_grf(c);
+ /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+ if (grf < 0) {
+ /* totally out of temps */
+ grf = 51; /* XXX random register! */
+ }
+
+ reg = brw_vec8_grf(grf, 0);
+ /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+ set_reg(c, file, index, component, reg);
}
- if (neg & (1<< component)) {
+ if (neg & (1 << component)) {
reg = negate(reg);
}
if (abs)
@@ -111,78 +241,360 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL
return reg;
}
+
+
+/**
+ * This is called if we run out of GRF registers. Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+ GLint intBegin[MAX_PROGRAM_TEMPS];
+ GLint intEnd[MAX_PROGRAM_TEMPS];
+ int index;
+
+ /*printf("Reclaim temps:\n");*/
+
+ _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+ intBegin, intEnd);
+
+ for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+ if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+ /* program temp[i] can be freed */
+ int component;
+ /*printf(" temp[%d] is dead\n", index);*/
+ for (component = 0; component < 4; component++) {
+ if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+ int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+ release_grf(c, r);
+ /*
+ printf(" Reclaim temp %d, reg %d at inst %d\n",
+ index, r, c->cur_inst);
+ */
+ c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+ }
+ }
+ }
+ }
+}
+
+
+
+
+/**
+ * Preallocate registers. This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
static void prealloc_reg(struct brw_wm_compile *c)
{
int i, j;
struct brw_reg reg;
- int nr_interp_regs = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ int urb_read_length = 0;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+ GLuint reg_index = 0;
+
+ memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+ c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
- reg = (i < c->key.nr_depth_regs)
- ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
+ if (i < c->key.nr_depth_regs)
+ reg = brw_vec8_grf(i * 2, 0);
+ else
+ reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- c->reg_index += 2*c->key.nr_depth_regs;
+ reg_index += 2 * c->key.nr_depth_regs;
+
+ /* constants */
{
- int nr_params = c->fp->program.Base.Parameters->NumParameters;
- struct gl_program_parameter_list *plist =
- c->fp->program.Base.Parameters;
- int index = 0;
- c->prog_data.nr_params = 4*nr_params;
- for (i = 0; i < nr_params; i++) {
- for (j = 0; j < 4; j++, index++) {
- reg = brw_vec1_grf(c->reg_index + index/8,
- index%8);
- c->prog_data.param[index] =
- &plist->ParameterValues[i][j];
- set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
- }
- }
- c->nr_creg = 2*((4*nr_params+15)/16);
- c->reg_index += c->nr_creg;
+ const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+ const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+ /* use a real constant buffer, or just use a section of the GRF? */
+ /* XXX this heuristic may need adjustment... */
+ if ((nr_params + nr_temps) * 4 + reg_index > 80)
+ c->fp->use_const_buffer = GL_TRUE;
+ else
+ c->fp->use_const_buffer = GL_FALSE;
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+ if (c->fp->use_const_buffer) {
+ /* We'll use a real constant buffer and fetch constants from
+ * it with a dataport read message.
+ */
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 0;
+ }
+ else {
+ const struct gl_program_parameter_list *plist =
+ c->fp->program.Base.Parameters;
+ int index = 0;
+
+ /* number of float constants in CURBE */
+ c->prog_data.nr_params = 4 * nr_params;
+
+ /* loop over program constants (float[4]) */
+ for (i = 0; i < nr_params; i++) {
+ /* loop over XYZW channels */
+ for (j = 0; j < 4; j++, index++) {
+ reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+ /* Save pointer to parameter/constant value.
+ * Constants will be copied in prepare_constant_buffer()
+ */
+ c->prog_data.param[index] = &plist->ParameterValues[i][j];
+ set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+ }
+ }
+ /* number of constant regs used (each reg is float[8]) */
+ c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+ reg_index += c->nr_creg;
+ }
}
- for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
- if (inputs & (1<<i)) {
- nr_interp_regs++;
- reg = brw_vec8_grf(c->reg_index, 0);
- for (j = 0; j < 4; j++)
- set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
- c->reg_index += 2;
- }
+ /* fragment shader inputs */
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ int fp_input;
+
+ if (i >= VERT_RESULT_VAR0)
+ fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+ else if (i <= VERT_RESULT_TEX7)
+ fp_input = i;
+ else
+ fp_input = -1;
+
+ if (fp_input >= 0 && inputs & (1 << fp_input)) {
+ urb_read_length = reg_index;
+ reg = brw_vec8_grf(reg_index, 0);
+ for (j = 0; j < 4; j++)
+ set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+ }
+ if (c->key.vp_outputs_written & (1 << i)) {
+ reg_index += 2;
+ }
}
+
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
- c->prog_data.urb_read_length = nr_interp_regs * 2;
+ c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
- c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index++;
- c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
- c->reg_index += 2;
+ c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index++;
+ c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+ reg_index += 2;
+
+ /* mark GRF regs [0..reg_index-1] as in-use */
+ for (i = 0; i < reg_index; i++)
+ prealloc_grf(c, i);
+
+ /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
+ prealloc_grf(c, 126);
+ prealloc_grf(c, 127);
+
+ for (i = 0; i < c->nr_fp_insns; i++) {
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ struct brw_reg dst[4];
+
+ switch (inst->Opcode) {
+ case OPCODE_TEX:
+ case OPCODE_TXB:
+ /* Allocate the channels of texture results contiguously,
+ * since they are written out that way by the sampler unit.
+ */
+ for (j = 0; j < 4; j++) {
+ dst[j] = get_dst_reg(c, inst, j);
+ if (j != 0)
+ assert(dst[j].nr == dst[j - 1].nr + 1);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* An instruction may reference up to three constants.
+ * They'll be found in these registers.
+ * XXX alloc these on demand!
+ */
+ if (c->fp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+ }
+ }
+#if 0
+ printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+ printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars. If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ GLuint i;
+
+ /* loop over instruction src regs */
+ for (i = 0; i < 3; i++) {
+ const struct prog_src_register *src = &inst->SrcReg[i];
+ if (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM) {
+ c->current_const[i].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+ /* need to fetch the constant now */
+ brw_dp_READ_4(p,
+ c->current_const[i].reg, /* writeback dest */
+ src->RelAddr, /* relative indexing? */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+ );
+ }
+ }
}
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
- struct prog_instruction *inst, int component, int nr)
+ const struct prog_instruction *inst,
+ GLuint component)
{
+ const int nr = 1;
return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
0, 0);
}
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint component)
+{
+ /* We should have already fetched the constant from the constant
+ * buffer in fetch_constants(). Now we just have to return a
+ * register description that extracts the needed component and
+ * smears it across all eight vector components.
+ */
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ struct brw_reg const_reg;
+
+ assert(component < 4);
+ assert(srcRegIndex < 3);
+ assert(c->current_const[srcRegIndex].index != -1);
+ const_reg = c->current_const[srcRegIndex].reg;
+
+ /* extract desired float from the const_reg, and smear */
+ const_reg = stride(const_reg, 0, 1, 0);
+ const_reg.subnr = component * 4;
+
+ if (src->Negate & (1 << component))
+ const_reg = negate(const_reg);
+ if (src->Abs)
+ const_reg = brw_abs(const_reg);
+
+#if 0
+ printf(" form const[%d].%d for arg %d, reg %d\n",
+ c->current_const[srcRegIndex].index,
+ component,
+ srcRegIndex,
+ const_reg.nr);
+#endif
+
+ return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
static struct brw_reg get_src_reg(struct brw_wm_compile *c,
- struct prog_src_register *src, int index, int nr)
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
{
- int component = GET_SWZ(src->Swizzle, index);
- return get_reg(c, src->File, src->Index, component, nr,
- src->NegateBase, src->Abs);
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ const GLuint nr = 1;
+ const GLuint component = GET_SWZ(src->Swizzle, channel);
+
+ /* Extended swizzle terms */
+ if (component == SWIZZLE_ZERO) {
+ return brw_imm_f(0.0F);
+ }
+ else if (component == SWIZZLE_ONE) {
+ return brw_imm_f(1.0F);
+ }
+
+ if (c->fp->use_const_buffer &&
+ (src->File == PROGRAM_STATE_VAR ||
+ src->File == PROGRAM_CONSTANT ||
+ src->File == PROGRAM_UNIFORM)) {
+ return get_src_reg_const(c, inst, srcRegIndex, component);
+ }
+ else {
+ /* other type of source register */
+ return get_reg(c, src->File, src->Index, component, nr,
+ src->Negate, src->Abs);
+ }
}
-/* Subroutines are minimal support for resusable instruction sequences.
- They are implemented as simply as possible to minimise overhead: there
- is no explicit support for communication between the caller and callee
- other than saving the return address in a temporary register, nor is
- there any automatic local storage. This implies that great care is
- required before attempting reentrancy or any kind of nested
- subroutine invocations. */
+
+/**
+ * Same as \sa get_src_reg() but if the register is a literal, emit
+ * a brw_reg encoding the literal.
+ * Note that a brw instruction only allows one src operand to be a literal.
+ * For instructions with more than one operand, only the second can be a
+ * literal. This means that we treat some literals as constants/uniforms
+ * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
+ *
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ GLuint srcRegIndex, GLuint channel)
+{
+ const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+ if (src->File == PROGRAM_CONSTANT) {
+ /* a literal */
+ const int component = GET_SWZ(src->Swizzle, channel);
+ const GLfloat *param =
+ c->fp->program.Base.Parameters->ParameterValues[src->Index];
+ GLfloat value = param[component];
+ if (src->Negate & (1 << channel))
+ value = -value;
+ if (src->Abs)
+ value = FABSF(value);
+#if 0
+ printf(" form immed value %f for chan %d\n", value, channel);
+#endif
+ return brw_imm_f(value);
+ }
+ else {
+ return get_src_reg(c, inst, srcRegIndex, channel);
+ }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage. This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
static void invoke_subroutine( struct brw_wm_compile *c,
enum _subroutine subroutine,
void (*emit)( struct brw_wm_compile * ) )
@@ -238,25 +650,8 @@ static void invoke_subroutine( struct brw_wm_compile *c,
}
}
-static void emit_abs( struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- int i;
- struct brw_compile *p = &c->func;
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for (i = 0; i < 4; i++) {
- if (inst->DstReg.WriteMask & (1<<i)) {
- struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
- brw_MOV(p, dst, brw_abs(src));
- }
- }
- brw_set_saturate(p, 0);
-}
-
static void emit_trunc( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -265,8 +660,8 @@ static void emit_trunc( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1) ;
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src = get_src_reg(c, inst, 0, i);
brw_RNDZ(p, dst, src);
}
}
@@ -274,7 +669,7 @@ static void emit_trunc( struct brw_wm_compile *c,
}
static void emit_mov( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -283,8 +678,10 @@ static void emit_mov( struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
struct brw_reg src, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ /* XXX some moves from immediate value don't work reliably!!! */
+ /*src = get_src_reg_imm(c, inst, 0, i);*/
+ src = get_src_reg(c, inst, 0, i);
brw_MOV(p, dst, src);
}
}
@@ -292,7 +689,7 @@ static void emit_mov( struct brw_wm_compile *c,
}
static void emit_pixel_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -301,8 +698,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
/* Calculate pixel centers by adding 1 or 0 to each of the
* micro-tile coordinates passed in r1.
*/
@@ -319,21 +716,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
stride(suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
}
-
}
static void emit_delta_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg dst0, dst1, src0, src1;
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
- dst0 = get_dst_reg(c, inst, 0, 1);
- dst1 = get_dst_reg(c, inst, 1, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+ dst0 = get_dst_reg(c, inst, 0);
+ dst1 = get_dst_reg(c, inst, 1);
+ src0 = get_src_reg(c, inst, 0, 0);
+ src1 = get_src_reg(c, inst, 0, 1);
/* Calc delta X,Y by subtracting origin in r1 from the pixel
* centers.
*/
@@ -351,10 +747,8 @@ static void emit_delta_xy(struct brw_wm_compile *c,
negate(suboffset(r1,1)));
}
-
}
-
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
GLuint nr,
@@ -385,7 +779,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
}
static void emit_fb_write(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
int nr = 2;
@@ -397,38 +791,63 @@ static void emit_fb_write(struct brw_wm_compile *c,
*/
if (c->key.aa_dest_stencil_reg)
nr += 1;
- {
- brw_push_insn_state(p);
- for (channel = 0; channel < 4; channel++) {
- src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1);
- /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
- /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
- brw_MOV(p, brw_message_reg(nr + channel), src0);
- }
- /* skip over the regs populated above: */
- nr += 8;
- brw_pop_insn_state(p);
+
+ brw_push_insn_state(p);
+ for (channel = 0; channel < 4; channel++) {
+ src0 = get_src_reg(c, inst, 0, channel);
+ /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
+ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
+ brw_MOV(p, brw_message_reg(nr + channel), src0);
}
+ /* skip over the regs populated above: */
+ nr += 8;
+ brw_pop_insn_state(p);
- if (c->key.source_depth_to_render_target)
- {
- if (c->key.computes_depth) {
- src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- } else {
- src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- brw_MOV(p, brw_message_reg(nr), src0);
- }
+ if (c->key.source_depth_to_render_target) {
+ if (c->key.computes_depth) {
+ src0 = get_src_reg(c, inst, 2, 2);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+ else {
+ src0 = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src0);
+ }
+
+ nr += 2;
+ }
+
+ if (c->key.dest_depth_reg) {
+ const GLuint comp = c->key.dest_depth_reg / 2;
+ const GLuint off = c->key.dest_depth_reg % 2;
+
+ if (off != 0) {
+ /* XXX this code needs review/testing */
+ struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+ struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
- nr += 2;
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
+ /* 2nd half? */
+ brw_MOV(p, brw_message_reg(nr+1), arg1_1);
+ brw_pop_insn_state(p);
+ }
+ else
+ {
+ struct brw_reg src = get_src_reg(c, inst, 1, 1);
+ brw_MOV(p, brw_message_reg(nr), src);
+ }
+ nr += 2;
}
- target = inst->Sampler >> 1;
- eot = inst->Sampler & 1;
+
+ target = inst->Aux >> 1;
+ eot = inst->Aux & 1;
fire_fb_write(c, 0, nr, target, eot);
}
static void emit_pixel_w( struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -436,10 +855,10 @@ static void emit_pixel_w( struct brw_wm_compile *c,
struct brw_reg dst, src0, delta0, delta1;
struct brw_reg interp3;
- dst = get_dst_reg(c, inst, 3, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
+ dst = get_dst_reg(c, inst, 3);
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
interp3 = brw_vec1_grf(src0.nr+1, 4);
/* Calc 1/w - just linterp wpos[3] optimized by putting the
@@ -458,19 +877,19 @@ static void emit_pixel_w( struct brw_wm_compile *c,
}
static void emit_linterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -479,7 +898,7 @@ static void emit_linterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_LINE(p, brw_null_reg(), interp[i], delta0);
brw_MAC(p, dst, suboffset(interp[i],1), delta1);
}
@@ -487,17 +906,17 @@ static void emit_linterp(struct brw_wm_compile *c,
}
static void emit_cinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg interp[4];
struct brw_reg dst, src0;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -506,14 +925,14 @@ static void emit_cinterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV(p, dst, suboffset(interp[i],3));
}
}
}
static void emit_pinterp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -521,13 +940,13 @@ static void emit_pinterp(struct brw_wm_compile *c,
struct brw_reg interp[4];
struct brw_reg dst, delta0, delta1;
struct brw_reg src0, w;
+ GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
- delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
- w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
- GLuint nr = src0.nr;
- int i;
+ src0 = get_src_reg(c, inst, 0, 0);
+ delta0 = get_src_reg(c, inst, 1, 0);
+ delta1 = get_src_reg(c, inst, 1, 1);
+ w = get_src_reg(c, inst, 2, 3);
+ nr = src0.nr;
interp[0] = brw_vec1_grf(nr, 0);
interp[1] = brw_vec1_grf(nr, 4);
@@ -536,7 +955,7 @@ static void emit_pinterp(struct brw_wm_compile *c,
for(i = 0; i < 4; i++ ) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_LINE(p, brw_null_reg(), interp[i], delta0);
brw_MAC(p, dst, suboffset(interp[i],1),
delta1);
@@ -545,8 +964,38 @@ static void emit_pinterp(struct brw_wm_compile *c,
}
}
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+ struct brw_reg dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(0.0));
+ }
+ }
+
+ /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+ * us front face
+ */
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ dst = get_dst_reg(c, inst, i);
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ }
+ }
+ brw_set_predicate_control_flag_value(p, 0xff);
+}
+
static void emit_xpd(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
int i;
struct brw_compile *p = &c->func;
@@ -556,12 +1005,12 @@ static void emit_xpd(struct brw_wm_compile *c,
GLuint i1 = (i+1)%3;
if (mask & (1<<i)) {
struct brw_reg src0, src1, dst;
- dst = get_dst_reg(c, inst, i, 1);
- src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
- src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = negate(get_src_reg(c, inst, 0, i2));
+ src1 = get_src_reg_imm(c, inst, 1, i1);
brw_MUL(p, brw_null_reg(), src0, src1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
+ src0 = get_src_reg(c, inst, 0, i1);
+ src1 = get_src_reg_imm(c, inst, 1, i2);
brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
brw_MAC(p, dst, src0, src1);
brw_set_saturate(p, 0);
@@ -571,17 +1020,25 @@ static void emit_xpd(struct brw_wm_compile *c,
}
static void emit_dp3(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[3], src1[3], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 3; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -590,16 +1047,24 @@ static void emit_dp3(struct brw_wm_compile *c,
}
static void emit_dp4(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
@@ -609,16 +1074,24 @@ static void emit_dp4(struct brw_wm_compile *c,
}
static void emit_dph(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_reg src0[4], src1[4], dst;
int i;
struct brw_compile *p = &c->func;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
for (i = 0; i < 4; i++) {
- src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src0[i] = get_src_reg(c, inst, 0, i);
+ src1[i] = get_src_reg_imm(c, inst, 1, i);
}
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
+ dst = get_dst_reg(c, inst, dst_chan);
brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
brw_MAC(p, dst, src0[2], src1[2]);
@@ -627,63 +1100,77 @@ static void emit_dph(struct brw_wm_compile *c,
brw_set_saturate(p, 0);
}
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
static void emit_math1(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint func)
+ const struct prog_instruction *inst, GLuint func)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ /* Get first component of source register */
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg(c, inst, 0, 0);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
brw_MOV(p, brw_message_reg(2), src0);
brw_math(p,
- dst,
- func,
- (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
- 2,
- brw_null_reg(),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ dst,
+ func,
+ (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+ 2,
+ brw_null_reg(),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
}
static void emit_rcp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
}
static void emit_rsq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
}
static void emit_sin(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
}
static void emit_cos(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
}
static void emit_ex2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
}
static void emit_lg2(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
}
static void emit_add(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -692,36 +1179,31 @@ static void emit_add(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_ADD(p, dst, src0, src1);
}
}
brw_set_saturate(p, 0);
}
-static void emit_sub(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_arl(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
- struct brw_reg src0, src1, dst;
- GLuint mask = inst->DstReg.WriteMask;
- int i;
+ struct brw_reg src0, addr_reg;
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- for (i = 0 ; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_ADD(p, dst, src0, negate(src1));
- }
- }
+ addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_ADDRESS, 0);
+ src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+ brw_MOV(p, addr_reg, src0);
brw_set_saturate(p, 0);
}
+
static void emit_mul(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, dst;
@@ -730,9 +1212,9 @@ static void emit_mul(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_MUL(p, dst, src0, src1);
}
}
@@ -740,7 +1222,7 @@ static void emit_mul(struct brw_wm_compile *c,
}
static void emit_frc(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -749,8 +1231,8 @@ static void emit_frc(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
brw_FRC(p, dst, src0);
}
}
@@ -759,7 +1241,7 @@ static void emit_frc(struct brw_wm_compile *c,
}
static void emit_flr(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg src0, dst;
@@ -768,78 +1250,79 @@ static void emit_flr(struct brw_wm_compile *c,
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg_imm(c, inst, 0, i);
brw_RNDD(p, dst, src0);
}
}
brw_set_saturate(p, 0);
}
-static void emit_max(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
- int i;
- brw_push_insn_state(p);
- for (i = 0; i < 4; i++) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_MOV(p, dst, src0);
- brw_set_saturate(p, 0);
-
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
- brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
- brw_MOV(p, dst, src1);
- brw_set_saturate(p, 0);
- brw_set_predicate_control_flag_value(p, 0xff);
- }
- }
- brw_pop_insn_state(p);
-}
-static void emit_min(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+static void emit_min_max(struct brw_wm_compile *c,
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg src0, src1, dst;
+ const GLuint mask = inst->DstReg.WriteMask;
+ const int mark = mark_tmps(c);
int i;
brw_push_insn_state(p);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ struct brw_reg real_dst = get_dst_reg(c, inst, i);
+ struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+ struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+ struct brw_reg dst;
+ /* if dst==src0 or dst==src1 we need to use a temp reg */
+ GLboolean use_temp = brw_same_reg(dst, src0) ||
+ brw_same_reg(dst, src1);
+ if (use_temp)
+ dst = alloc_tmp(c);
+ else
+ dst = real_dst;
+
+ /*
+ printf(" Min/max: dst %d src0 %d src1 %d\n",
+ dst.nr, src0.nr, src1.nr);
+ */
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
brw_MOV(p, dst, src0);
brw_set_saturate(p, 0);
- brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ if (inst->Opcode == OPCODE_MIN)
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+ else
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
brw_MOV(p, dst, src1);
brw_set_saturate(p, 0);
brw_set_predicate_control_flag_value(p, 0xff);
+ if (use_temp)
+ brw_MOV(p, real_dst, dst);
}
}
brw_pop_insn_state(p);
+ release_tmps(c, mark);
}
static void emit_pow(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst, src0, src1;
- dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+ GLuint mask = inst->DstReg.WriteMask;
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return;
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ dst = get_dst_reg(c, inst, dst_chan);
+ src0 = get_src_reg_imm(c, inst, 0, 0);
+ src1 = get_src_reg_imm(c, inst, 1, 0);
brw_MOV(p, brw_message_reg(2), src0);
brw_MOV(p, brw_message_reg(3), src1);
@@ -855,7 +1338,7 @@ static void emit_pow(struct brw_wm_compile *c,
}
static void emit_lrp(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -864,10 +1347,10 @@ static void emit_lrp(struct brw_wm_compile *c,
int mark = mark_tmps(c);
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ src1 = get_src_reg_imm(c, inst, 1, i);
if (src1.nr == dst.nr) {
tmp1 = alloc_tmp(c);
@@ -875,7 +1358,7 @@ static void emit_lrp(struct brw_wm_compile *c,
} else
tmp1 = src1;
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ src2 = get_src_reg(c, inst, 2, i);
if (src2.nr == dst.nr) {
tmp2 = alloc_tmp(c);
brw_MOV(p, tmp2, src2);
@@ -908,7 +1391,7 @@ static void emit_kil(struct brw_wm_compile *c)
}
static void emit_mad(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -917,10 +1400,10 @@ static void emit_mad(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
- src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
+ src2 = get_src_reg_imm(c, inst, 2, i);
brw_MUL(p, dst, src0, src1);
brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
@@ -931,7 +1414,7 @@ static void emit_mad(struct brw_wm_compile *c,
}
static void emit_sop(struct brw_wm_compile *c,
- struct prog_instruction *inst, GLuint cond)
+ const struct prog_instruction *inst, GLuint cond)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
@@ -940,9 +1423,9 @@ static void emit_sop(struct brw_wm_compile *c,
for (i = 0; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
- src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
+ dst = get_dst_reg(c, inst, i);
+ src0 = get_src_reg(c, inst, 0, i);
+ src1 = get_src_reg_imm(c, inst, 1, i);
brw_push_insn_state(p);
brw_CMP(p, brw_null_reg(), cond, src0, src1);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
@@ -955,113 +1438,58 @@ static void emit_sop(struct brw_wm_compile *c,
}
static void emit_slt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_L);
}
static void emit_sle(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_LE);
}
static void emit_sgt(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_G);
}
static void emit_sge(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_GE);
}
static void emit_seq(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_EQ);
}
static void emit_sne(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
}
-static void emit_ddx(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_MOV(p, dst, interp[i]);
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_ddy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- nr = src0.nr;
- w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
- brw_MOV(p, dst, suboffset(interp[i], 1));
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static __inline struct brw_reg high_words( struct brw_reg reg )
+static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
0, 8, 2 );
}
-static __inline struct brw_reg low_words( struct brw_reg reg )
+static INLINE struct brw_reg low_words( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
}
-static __inline struct brw_reg even_bytes( struct brw_reg reg )
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
{
return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
}
-static __inline struct brw_reg odd_bytes( struct brw_reg reg )
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
0, 16, 2 );
@@ -1161,7 +1589,7 @@ static void noise1_sub( struct brw_wm_compile *c ) {
}
static void emit_noise1( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src, param, dst;
@@ -1171,7 +1599,7 @@ static void emit_noise1( struct brw_wm_compile *c,
assert( mark == 0 );
- src = get_src_reg( c, inst->SrcReg, 0, 1 );
+ src = get_src_reg( c, inst, 0, 0 );
param = alloc_tmp( c );
@@ -1183,7 +1611,7 @@ static void emit_noise1( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param );
}
}
@@ -1331,7 +1759,7 @@ static void noise2_sub( struct brw_wm_compile *c ) {
}
static void emit_noise2( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, param0, param1, dst;
@@ -1341,8 +1769,8 @@ static void emit_noise2( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -1356,7 +1784,7 @@ static void emit_noise2( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -1366,9 +1794,11 @@ static void emit_noise2( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* The three-dimensional case is much like the one- and two- versions above,
- but since the number of corners is rapidly growing we now pack 16 16-bit
- hashes into each register to extract more parallelism from the EUs. */
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
static void noise3_sub( struct brw_wm_compile *c ) {
struct brw_compile *p = &c->func;
@@ -1632,7 +2062,7 @@ static void noise3_sub( struct brw_wm_compile *c ) {
}
static void emit_noise3( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, param0, param1, param2, dst;
@@ -1642,9 +2072,9 @@ static void emit_noise3( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
- src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -1660,7 +2090,7 @@ static void emit_noise3( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -1670,13 +2100,15 @@ static void emit_noise3( struct brw_wm_compile *c,
release_tmps( c, mark );
}
-/* For the four-dimensional case, the little micro-optimisation benefits
- we obtain by unrolling all the loops aren't worth the massive bloat it
- now causes. Instead, we loop twice around performing a similar operation
- to noise3, once for the w=0 cube and once for the w=1, with a bit more
- code to glue it all together. */
-static void noise4_sub( struct brw_wm_compile *c ) {
-
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes. Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
struct brw_compile *p = &c->func;
struct brw_reg param[ 4 ],
x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
@@ -2053,7 +2485,7 @@ static void noise4_sub( struct brw_wm_compile *c ) {
}
static void emit_noise4( struct brw_wm_compile *c,
- struct prog_instruction *inst )
+ const struct prog_instruction *inst )
{
struct brw_compile *p = &c->func;
struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
@@ -2063,10 +2495,10 @@ static void emit_noise4( struct brw_wm_compile *c,
assert( mark == 0 );
- src0 = get_src_reg( c, inst->SrcReg, 0, 1 );
- src1 = get_src_reg( c, inst->SrcReg, 1, 1 );
- src2 = get_src_reg( c, inst->SrcReg, 2, 1 );
- src3 = get_src_reg( c, inst->SrcReg, 3, 1 );
+ src0 = get_src_reg( c, inst, 0, 0 );
+ src1 = get_src_reg( c, inst, 0, 1 );
+ src2 = get_src_reg( c, inst, 0, 2 );
+ src3 = get_src_reg( c, inst, 0, 3 );
param0 = alloc_tmp( c );
param1 = alloc_tmp( c );
@@ -2084,7 +2516,7 @@ static void emit_noise4( struct brw_wm_compile *c,
brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
for (i = 0 ; i < 4; i++) {
if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i, 1);
+ dst = get_dst_reg(c, inst, i);
brw_MOV( p, dst, param0 );
}
}
@@ -2095,17 +2527,17 @@ static void emit_noise4( struct brw_wm_compile *c,
}
static void emit_wpos_xy(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
GLuint mask = inst->DstReg.WriteMask;
struct brw_reg src0[2], dst[2];
- dst[0] = get_dst_reg(c, inst, 0, 1);
- dst[1] = get_dst_reg(c, inst, 1, 1);
+ dst[0] = get_dst_reg(c, inst, 0);
+ dst[1] = get_dst_reg(c, inst, 1);
- src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
- src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
+ src0[0] = get_src_reg(c, inst, 0, 0);
+ src0[1] = get_src_reg(c, inst, 0, 1);
/* Calculate the pixel offset from window bottom left into destination
* X and Y channels.
@@ -2128,27 +2560,32 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
}
/* TODO
- BIAS on SIMD8 not workind yet...
+ BIAS on SIMD8 not working yet...
*/
static void emit_txb(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
- GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
GLuint i;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
+
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
+ dst[i] = get_dst_reg(c, inst, i);
for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
+ src[i] = get_src_reg(c, inst, 0, i);
switch (inst->TexSrcTarget) {
case TEXTURE_1D_INDEX:
- brw_MOV(p, brw_message_reg(2), src[0]);
- brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */
+ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */
+ brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */
break;
case TEXTURE_2D_INDEX:
case TEXTURE_RECT_INDEX:
@@ -2156,46 +2593,63 @@ static void emit_txb(struct brw_wm_compile *c,
brw_MOV(p, brw_message_reg(3), src[1]);
brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
brw_MOV(p, brw_message_reg(2), src[0]);
brw_MOV(p, brw_message_reg(3), src[1]);
brw_MOV(p, brw_message_reg(4), src[2]);
break;
+ default:
+ /* invalid target */
+ abort();
}
- brw_MOV(p, brw_message_reg(5), src[3]);
- brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+ brw_MOV(p, brw_message_reg(5), src[3]); /* bias */
+ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
+
+ if (BRW_IS_IGDNG(p->brw)) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+ } else {
+ /* Does it work well on SIMD8? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ }
+
brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- unit + MAX_DRAW_BUFFERS, /* surface */
- unit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
- 4,
- 4,
- 0);
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
}
+
static void emit_tex(struct brw_wm_compile *c,
- struct prog_instruction *inst)
+ const struct prog_instruction *inst)
{
struct brw_compile *p = &c->func;
struct brw_reg dst[4], src[4], payload_reg;
- GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-
+ /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+ const GLuint unit = inst->TexSrcUnit;
GLuint msg_len;
GLuint i, nr;
GLuint emit;
GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+ GLuint msg_type;
+
+ assert(unit < BRW_MAX_TEX_UNIT);
payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
for (i = 0; i < 4; i++)
- dst[i] = get_dst_reg(c, inst, i, 1);
+ dst[i] = get_dst_reg(c, inst, i);
for (i = 0; i < 4; i++)
- src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
+ src[i] = get_src_reg(c, inst, 0, i);
switch (inst->TexSrcTarget) {
case TEXTURE_1D_INDEX:
@@ -2207,13 +2661,18 @@ static void emit_tex(struct brw_wm_compile *c,
emit = WRITEMASK_XY;
nr = 2;
break;
- default:
+ case TEXTURE_3D_INDEX:
+ case TEXTURE_CUBE_INDEX:
emit = WRITEMASK_XYZ;
nr = 3;
break;
+ default:
+ /* invalid target */
+ abort();
}
msg_len = 1;
+ /* move/load S, T, R coords */
for (i = 0; i < nr; i++) {
static const GLuint swz[4] = {0,1,2,2};
if (emit & (1<<i))
@@ -2224,77 +2683,103 @@ static void emit_tex(struct brw_wm_compile *c,
}
if (shadow) {
- brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
- brw_MOV(p, brw_message_reg(6), src[2]);
+ brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
+ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */
}
+ if (BRW_IS_IGDNG(p->brw)) {
+ if (shadow)
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+ else
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+ } else {
+ /* Does it work for shadow on SIMD8 ? */
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+ }
+
brw_SAMPLE(p,
- retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
- 1,
- retype(payload_reg, BRW_REGISTER_TYPE_UW),
- unit + MAX_DRAW_BUFFERS, /* surface */
- unit, /* sampler */
- inst->DstReg.WriteMask,
- BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
- 4,
- shadow ? 6 : 4,
- 0);
+ retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+ 1, /* msg_reg_nr */
+ retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */
+ SURF_INDEX_TEXTURE(unit),
+ unit, /* sampler */
+ inst->DstReg.WriteMask, /* writemask */
+ msg_type, /* msg_type */
+ 4, /* response_length */
+ shadow ? 6 : 4, /* msg_length */
+ 0, /* eot */
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
if (shadow)
brw_MOV(p, dst[3], brw_imm_f(1.0));
}
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
static void post_wm_emit( struct brw_wm_compile *c )
{
- GLuint nr_insns = c->fp->program.Base.NumInstructions;
- GLuint insn, target_insn;
- struct prog_instruction *inst1, *inst2;
- struct brw_instruction *brw_inst1, *brw_inst2;
- int offset;
- for (insn = 0; insn < nr_insns; insn++) {
- inst1 = &c->fp->program.Base.Instructions[insn];
- brw_inst1 = inst1->Data;
- switch (inst1->Opcode) {
- case OPCODE_CAL:
- target_insn = inst1->BranchTarget;
- inst2 = &c->fp->program.Base.Instructions[target_insn];
- brw_inst2 = inst2->Data;
- offset = brw_inst2 - brw_inst1;
- brw_set_src1(brw_inst1, brw_imm_d(offset*16));
- break;
- default:
- break;
- }
+ brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
}
}
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
-#define MAX_IFSN 32
+#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
- struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
- struct brw_instruction *inst0, *inst1;
- int i, if_insn = 0, loop_insn = 0;
+ struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ GLuint i, if_depth = 0, loop_depth = 0;
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
- c->reg_index = 0;
+ c->out_of_regs = GL_FALSE;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
for (i = 0; i < c->nr_fp_insns; i++) {
- struct prog_instruction *inst = &c->prog_instructions[i];
- struct prog_instruction *orig_inst;
+ const struct prog_instruction *inst = &c->prog_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
- if ((orig_inst = inst->Data) != 0)
- orig_inst->Data = current_insn(p);
+ c->cur_inst = i;
+
+#if 0
+ _mesa_printf("Inst %d: ", i);
+ _mesa_print_instruction(inst);
+#endif
+
+ /* fetch any constants that this instruction needs */
+ if (c->fp->use_const_buffer)
+ fetch_constants(c, inst);
if (inst->CondUpdate)
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, inst);
@@ -2320,14 +2805,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case WM_FB_WRITE:
emit_fb_write(c, inst);
break;
- case OPCODE_ABS:
- emit_abs(c, inst);
+ case WM_FRONTFACING:
+ emit_frontfacing(c, inst);
break;
case OPCODE_ADD:
emit_add(c, inst);
break;
- case OPCODE_SUB:
- emit_sub(c, inst);
+ case OPCODE_ARL:
+ emit_arl(c, inst);
break;
case OPCODE_FRC:
emit_frc(c, inst);
@@ -2342,6 +2827,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_trunc(c, inst);
break;
case OPCODE_MOV:
+ case OPCODE_SWZ:
emit_mov(c, inst);
break;
case OPCODE_DP3:
@@ -2374,17 +2860,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_LG2:
emit_lg2(c, inst);
break;
- case OPCODE_MAX:
- emit_max(c, inst);
- break;
case OPCODE_MIN:
- emit_min(c, inst);
+ case OPCODE_MAX:
+ emit_min_max(c, inst);
break;
case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
case OPCODE_DDY:
- emit_ddy(c, inst);
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
break;
case OPCODE_SLT:
emit_slt(c, inst);
@@ -2435,18 +2925,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_kil(c);
break;
case OPCODE_IF:
- assert(if_insn < MAX_IFSN);
- if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+ assert(if_depth < MAX_IF_DEPTH);
+ if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
break;
case OPCODE_ELSE:
- if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
+ if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
- assert(if_insn > 0);
- brw_ENDIF(p, if_inst[--if_insn]);
+ assert(if_depth > 0);
+ brw_ENDIF(p, if_inst[--if_depth]);
break;
case OPCODE_BGNSUB:
+ brw_save_label(p, inst->Comment, p->nr_insn);
+ break;
case OPCODE_ENDSUB:
+ /* no-op */
break;
case OPCODE_CAL:
brw_push_insn_state(p);
@@ -2456,8 +2949,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_access_mode(p, BRW_ALIGN_16);
brw_ADD(p, get_addr_reg(stack_index),
get_addr_reg(stack_index), brw_imm_d(4));
- orig_inst = inst->Data;
- orig_inst->Data = &p->store[p->nr_insn];
+ brw_save_call(&c->func, inst->Comment, p->nr_insn);
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
brw_pop_insn_state(p);
break;
@@ -2474,7 +2966,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
break;
case OPCODE_BGNLOOP:
- loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+ /* XXX may need to invalidate the current_constant regs */
+ loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
brw_BREAK(p);
@@ -2485,39 +2978,69 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
- loop_insn--;
- inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
- /* patch all the BREAK instructions from
- last BEGINLOOP */
- while (inst0 > loop_inst[loop_insn]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
+ loop_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_inst[loop_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
- } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
- }
- }
- break;
+ }
+ }
+ }
+ break;
default:
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
+
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
- for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
- c->fp->program.Base.Instructions[i].Data = NULL;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("wm-native:\n");
+ for (i = 0; i < p->nr_insn; i++)
+ brw_disasm(stderr, &p->store[i]);
+ _mesa_printf("\n");
+ }
}
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control. Other shaders will be compiled with the
+ */
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
+ if (INTEL_DEBUG & DEBUG_WM) {
+ _mesa_printf("brw_wm_glsl_emit:\n");
+ }
+
+ /* initial instruction translation/simplification */
brw_wm_pass_fp(c);
+
+ /* actual code generation */
brw_wm_emit_glsl(brw, c);
- c->prog_data.total_grf = c->reg_index;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ brw_wm_print_program(c, "brw_wm_glsl_emit done");
+ }
+
+ c->prog_data.total_grf = num_grf_used(c);
c->prog_data.total_scratch = 0;
}