diff options
| -rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 1 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 1 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 21 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 2 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 1035 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 1 | ||||
| -rw-r--r-- | src/mesa/drivers/dri/i965/gen6_wm_state.c | 4 | 
8 files changed, 10 insertions, 1057 deletions
| diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e3ca863fe5..7c3ac0c14e 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -81,7 +81,6 @@ DRIVER_SOURCES = \  	brw_wm_emit.c \  	brw_wm_fp.c \  	brw_wm_iz.c \ -	brw_wm_glsl.c \  	brw_wm_pass0.c \  	brw_wm_pass1.c \  	brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 335339515a..7069724466 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -171,7 +171,6 @@ struct brw_vertex_program {  struct brw_fragment_program {     struct gl_fragment_program program;     GLuint id;  /**< serial no. to identify frag progs, never re-used */ -   GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */     /** for debugging, which texture units are referenced */     GLbitfield tex_units_used; @@ -211,6 +210,7 @@ struct brw_wm_prog_data {     GLuint nr_params;       /**< number of float params/constants */     GLuint nr_pull_params;     GLboolean error; +   int dispatch_width;     /* Pointer to tracked values (only valid once      * _mesa_load_state_parameters has been called at runtime). diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 1367d81469..94efa79109 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,        if (newFP == curFP)  	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;        newFP->id = brw->program_id++;       -      newFP->isGLSL = brw_wm_is_glsl(fprog);        /* Don't reject fragment shaders for their Mesa IR state when we're         * using the new FS backend. diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 659f3cbef5..e0aa3fd7f2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -134,7 +134,7 @@ brw_wm_payload_setup(struct brw_context *brw,        /* R3-4: perspective pixel location barycentric */        c->nr_payload_regs += 2;        /* R5-6: perspective pixel location bary for dispatch width != 8 */ -      if (!c->fp->isGLSL) { /* dispatch_width != 8 */ +      if (c->dispatch_width == 16) {  	 c->nr_payload_regs += 2;        }        /* R7-10: perspective centroid barycentric */ @@ -147,7 +147,7 @@ brw_wm_payload_setup(struct brw_context *brw,        if (uses_depth) {  	 c->source_depth_reg = c->nr_payload_regs;  	 c->nr_payload_regs++; -	 if (!c->fp->isGLSL) { /* dispatch_width != 8 */ +	 if (c->dispatch_width == 16) {  	    /* R28: interpolated depth if not 8-wide. */  	    c->nr_payload_regs++;  	 } @@ -157,7 +157,7 @@ brw_wm_payload_setup(struct brw_context *brw,        if (uses_depth) {  	 c->source_w_reg = c->nr_payload_regs;  	 c->nr_payload_regs++; -	 if (!c->fp->isGLSL) { /* dispatch_width != 8 */ +	 if (c->dispatch_width == 16) {  	    /* R30: interpolated W if not 8-wide. */  	    c->nr_payload_regs++;  	 } @@ -225,23 +225,16 @@ static void do_wm_prog( struct brw_context *brw,     brw_wm_payload_setup(brw, c); -   /* temporary sanity check assertion */ -   ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); -     if (!brw_wm_fs_emit(brw, c)) {        /*         * Shader which use GLSL features such as flow control are handled         * differently from "simple" shaders.         */ -      if (fp->isGLSL) { -	 c->dispatch_width = 8; -	 brw_wm_glsl_emit(brw, c); -      } -      else { -	 c->dispatch_width = 16; -	 brw_wm_non_glsl_emit(brw, c); -      } +      c->dispatch_width = 16; +      brw_wm_payload_setup(brw, c); +      brw_wm_non_glsl_emit(brw, c);     } +   c->prog_data.dispatch_width = c->dispatch_width;     /* Scratch space is used for register spilling */     if (c->last_scratch) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d34566a4bd..937b0edd70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -313,8 +313,6 @@ void brw_wm_print_program( struct brw_wm_compile *c,  void brw_wm_lookup_iz(struct intel_context *intel,  		      struct brw_wm_compile *c); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);  GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);  /* brw_wm_emit.c */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c deleted file mode 100644 index 4150244518..0000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ /dev/null @@ -1,1035 +0,0 @@ -#include "main/macros.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, -                                  const struct prog_instruction *inst, -                                  GLuint component); - -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ -    int i; - -    if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) -       return GL_TRUE; - -    for (i = 0; i < fp->Base.NumInstructions; i++) { -	const struct prog_instruction *inst = &fp->Base.Instructions[i]; -	switch (inst->Opcode) { -	    case OPCODE_ARL: -	    case OPCODE_IF: -	    case OPCODE_ENDIF: -	    case OPCODE_CAL: -	    case OPCODE_BRK: -	    case OPCODE_RET: -	    case OPCODE_BGNLOOP: -		return GL_TRUE;  -	    default: -		break; -	} -    } -    return GL_FALSE;  -} - - - -static void -reclaim_temps(struct brw_wm_compile *c); - - -/** Mark GRF register as used. */ -static void -prealloc_grf(struct brw_wm_compile *c, int r) -{ -   c->used_grf[r] = GL_TRUE; -} - - -/** Mark given GRF register as not in use. */ -static void -release_grf(struct brw_wm_compile *c, int r) -{ -   /*assert(c->used_grf[r]);*/ -   c->used_grf[r] = GL_FALSE; -   c->first_free_grf = MIN2(c->first_free_grf, r); -} - - -/** Return index of a free GRF, mark it as used. */ -static int -alloc_grf(struct brw_wm_compile *c) -{ -   GLuint r; -   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { -      if (!c->used_grf[r]) { -         c->used_grf[r] = GL_TRUE; -         c->first_free_grf = r + 1;  /* a guess */ -         return r; -      } -   } - -   /* no free temps, try to reclaim some */ -   reclaim_temps(c); -   c->first_free_grf = 0; - -   /* try alloc again */ -   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { -      if (!c->used_grf[r]) { -         c->used_grf[r] = GL_TRUE; -         c->first_free_grf = r + 1;  /* a guess */ -         return r; -      } -   } - -   for (r = 0; r < BRW_WM_MAX_GRF; r++) { -      assert(c->used_grf[r]); -   } - -   /* really, no free GRF regs found */ -   if (!c->out_of_regs) { -      /* print warning once per compilation */ -      _mesa_warning(NULL, "i965: ran out of registers for fragment program"); -      c->out_of_regs = GL_TRUE; -   } - -   return -1; -} - - -/** Return number of GRF registers used */ -static int -num_grf_used(const struct brw_wm_compile *c) -{ -   int r; -   for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) -      if (c->used_grf[r]) -         return r + 1; -   return 0; -} - - - -/** - * Record the mapping of a Mesa register to a hardware register. - */ -static void set_reg(struct brw_wm_compile *c, int file, int index,  -	int component, struct brw_reg reg) -{ -    c->wm_regs[file][index][component].reg = reg; -    c->wm_regs[file][index][component].inited = GL_TRUE; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ -    struct brw_reg reg; - -    /* if we need to allocate another temp, grow the tmp_regs[] array */ -    if (c->tmp_index == c->tmp_max) { -       int r = alloc_grf(c); -       if (r < 0) { -          /*printf("Out of temps in %s\n", __FUNCTION__);*/ -          r = 50; /* XXX random register! */ -       } -       c->tmp_regs[ c->tmp_max++ ] = r; -    } - -    /* form the GRF register */ -    reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); -    /*printf("alloc_temp %d\n", reg.nr);*/ -    assert(reg.nr < BRW_WM_MAX_GRF); -    return reg; - -} - -/** - * Save current temp register info. - * There must be a matching call to release_tmps(). - */ -static int mark_tmps(struct brw_wm_compile *c) -{ -    return c->tmp_index; -} - -static void release_tmps(struct brw_wm_compile *c, int mark) -{ -    c->tmp_index = mark; -} - -/** - * Convert Mesa src register to brw register. - * - * Since we're running in SOA mode each Mesa register corresponds to four - * hardware registers.  We allocate the hardware registers as needed here. - * - * \param file  register file, one of PROGRAM_x - * \param index  register number - * \param component  src component (X=0, Y=1, Z=2, W=3) - * \param nr  not used?!? - * \param neg  negate value? - * \param abs  take absolute value? - */ -static struct brw_reg  -get_reg(struct brw_wm_compile *c, int file, int index, int component, -        int nr, GLuint neg, GLuint abs) -{ -    struct brw_reg reg; -    switch (file) { -	case PROGRAM_STATE_VAR: -	case PROGRAM_CONSTANT: -	case PROGRAM_UNIFORM: -	    file = PROGRAM_STATE_VAR; -	    break; -	case PROGRAM_UNDEFINED: -	    return brw_null_reg();	 -	case PROGRAM_TEMPORARY: -	case PROGRAM_INPUT: -	case PROGRAM_OUTPUT: -	case PROGRAM_PAYLOAD: -	    break; -	default: -	    _mesa_problem(NULL, "Unexpected file in get_reg()"); -	    return brw_null_reg(); -    } - -    assert(index < 256); -    assert(component < 4); - -    /* see if we've already allocated a HW register for this Mesa register */ -    if (c->wm_regs[file][index][component].inited) { -       /* yes, re-use */ -       reg = c->wm_regs[file][index][component].reg; -    } -    else { -	/* no, allocate new register */ -       int grf = alloc_grf(c); -       /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ -       if (grf < 0) { -          /* totally out of temps */ -          grf = 51; /* XXX random register! */ -       } - -       reg = brw_vec8_grf(grf, 0); -       /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - -       set_reg(c, file, index, component, reg); -    } - -    if (neg & (1 << component)) { -	reg = negate(reg); -    } -    if (abs) -	reg = brw_abs(reg); -    return reg; -} - - - -/** - * This is called if we run out of GRF registers.  Examine the live intervals - * of temp regs in the program and free those which won't be used again. - */ -static void -reclaim_temps(struct brw_wm_compile *c) -{ -   GLint intBegin[MAX_PROGRAM_TEMPS]; -   GLint intEnd[MAX_PROGRAM_TEMPS]; -   int index; - -   /*printf("Reclaim temps:\n");*/ - -   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, -                             intBegin, intEnd); - -   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { -      if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { -         /* program temp[i] can be freed */ -         int component; -         /*printf("  temp[%d] is dead\n", index);*/ -         for (component = 0; component < 4; component++) { -            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { -               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; -               release_grf(c, r); -               /* -               printf("  Reclaim temp %d, reg %d at inst %d\n", -                      index, r, c->cur_inst); -               */ -               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; -            } -         } -      } -   } -} - - - - -/** - * Preallocate registers.  This sets up the Mesa to hardware register - * mapping for certain registers, such as constants (uniforms/state vars) - * and shader inputs. - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ -    struct intel_context *intel = &c->func.brw->intel; -    int i, j; -    struct brw_reg reg; -    int urb_read_length = 0; -    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; -    GLuint reg_index = 0; - -    memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); -    c->first_free_grf = 0; - -    for (i = 0; i < 4; i++) { -	if (i < (c->nr_payload_regs + 1) / 2) -            reg = brw_vec8_grf(i * 2, 0); -        else -            reg = brw_vec8_grf(0, 0); -	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); -    } -    set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, -	    brw_vec8_grf(c->source_w_reg, 0)); -    reg_index += c->nr_payload_regs; - -    /* constants */ -    { -        const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; -        const GLuint nr_temps = c->fp->program.Base.NumTemporaries; - -        /* use a real constant buffer, or just use a section of the GRF? */ -        /* XXX this heuristic may need adjustment... */ -        if ((nr_params + nr_temps) * 4 + reg_index > 80) { -	   for (i = 0; i < nr_params; i++) { -	      float *pv = c->fp->program.Base.Parameters->ParameterValues[i]; -	      for (j = 0; j < 4; j++) { -		 c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j]; -		 c->prog_data.nr_pull_params++; -	      } -	   } - -	   c->prog_data.nr_params = 0; -	} -        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - -        if (!c->prog_data.nr_pull_params) { -           const struct gl_program_parameter_list *plist =  -              c->fp->program.Base.Parameters; -           int index = 0; - -           /* number of float constants in CURBE */ -           c->prog_data.nr_params = 4 * nr_params; - -           /* loop over program constants (float[4]) */ -           for (i = 0; i < nr_params; i++) { -              /* loop over XYZW channels */ -              for (j = 0; j < 4; j++, index++) { -                 reg = brw_vec1_grf(reg_index + index / 8, index % 8); -                 /* Save pointer to parameter/constant value. -                  * Constants will be copied in prepare_constant_buffer() -                  */ -                 c->prog_data.param[index] = &plist->ParameterValues[i][j]; -                 set_reg(c, PROGRAM_STATE_VAR, i, j, reg); -              } -           } -           /* number of constant regs used (each reg is float[8]) */ -	   c->nr_creg = ALIGN(nr_params, 2) / 2; -	   reg_index += c->nr_creg; -        } -    } - -    /* fragment shader inputs: One 2-reg pair of interpolation -     * coefficients for each vec4 to be set up. -     */ -    if (intel->gen >= 6) { -       for (i = 0; i < FRAG_ATTRIB_MAX; i++) { -	  if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) -	     continue; - -	  reg = brw_vec8_grf(reg_index, 0); -	  for (j = 0; j < 4; j++) { -	     set_reg(c, PROGRAM_PAYLOAD, i, j, reg); -	  } -	  reg_index += 2; -       } -       urb_read_length = reg_index; -    } else { -       for (i = 0; i < VERT_RESULT_MAX; i++) { -	  int fp_input; - -	  if (i >= VERT_RESULT_VAR0) -	     fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; -	  else if (i <= VERT_RESULT_TEX7) -	     fp_input = i; -	  else -	     fp_input = -1; - -	  if (fp_input >= 0 && inputs & (1 << fp_input)) { -	     urb_read_length = reg_index; -	     reg = brw_vec8_grf(reg_index, 0); -	     for (j = 0; j < 4; j++) -		set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); -	  } -	  if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { -	     reg_index += 2; -	  } -       } -    } - -    c->prog_data.first_curbe_grf = c->nr_payload_regs; -    c->prog_data.urb_read_length = urb_read_length; -    c->prog_data.curb_read_length = c->nr_creg; -    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); -    reg_index++; -    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); -    reg_index += 2; - -    /* mark GRF regs [0..reg_index-1] as in-use */ -    for (i = 0; i < reg_index; i++) -       prealloc_grf(c, i); - -    /* Don't use GRF 126, 127.  Using them seems to lead to GPU lock-ups */ -    prealloc_grf(c, 126); -    prealloc_grf(c, 127); - -    for (i = 0; i < c->nr_fp_insns; i++) { -	const struct prog_instruction *inst = &c->prog_instructions[i]; -	struct brw_reg dst[4]; - -	switch (inst->Opcode) { -	case OPCODE_TEX: -	case OPCODE_TXB: -	    /* Allocate the channels of texture results contiguously, -	     * since they are written out that way by the sampler unit. -	     */ -	    for (j = 0; j < 4; j++) { -		dst[j] = get_dst_reg(c, inst, j); -		if (j != 0) -		    assert(dst[j].nr == dst[j - 1].nr + 1); -	    } -	    break; -	default: -	    break; -	} -    } - -    for (i = 0; i < c->nr_fp_insns; i++) { -	const struct prog_instruction *inst = &c->prog_instructions[i]; - -	switch (inst->Opcode) { -	case WM_DELTAXY: -	    /* Allocate WM_DELTAXY destination on G45/GM45 to an -	     * even-numbered GRF if possible so that we can use the PLN -	     * instruction. -	     */ -	    if (inst->DstReg.WriteMask == WRITEMASK_XY && -		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && -		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && -		(IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { -		int grf; - -		for (grf = c->first_free_grf & ~1; -		     grf < BRW_WM_MAX_GRF; -		     grf += 2) -		{ -		    if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { -			c->used_grf[grf] = GL_TRUE; -			c->used_grf[grf + 1] = GL_TRUE; -			c->first_free_grf = grf + 2;  /* a guess */ - -			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, -				brw_vec8_grf(grf, 0)); -			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, -				brw_vec8_grf(grf + 1, 0)); -			break; -		    } -		} -	    } -	default: -	    break; -	} -    } - -    /* An instruction may reference up to three constants. -     * They'll be found in these registers. -     * XXX alloc these on demand! -     */ -    if (c->prog_data.nr_pull_params) { -       for (i = 0; i < 3; i++) { -          c->current_const[i].index = -1; -          c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); -       } -    } -#if 0 -    printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); -    printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); -#endif -} - - -/** - * Check if any of the instruction's src registers are constants, uniforms, - * or statevars.  If so, fetch any constants that we don't already have in - * the three GRF slots. - */ -static void fetch_constants(struct brw_wm_compile *c, -                            const struct prog_instruction *inst) -{ -   struct brw_compile *p = &c->func; -   GLuint i; - -   /* loop over instruction src regs */ -   for (i = 0; i < 3; i++) { -      const struct prog_src_register *src = &inst->SrcReg[i]; -      if (src->File == PROGRAM_STATE_VAR || -          src->File == PROGRAM_CONSTANT || -          src->File == PROGRAM_UNIFORM) { -	 c->current_const[i].index = src->Index; - -#if 0 -	 printf("  fetch const[%d] for arg %d into reg %d\n", -		src->Index, i, c->current_const[i].reg.nr); -#endif - -	 /* need to fetch the constant now */ -	 brw_oword_block_read(p, -			      c->current_const[i].reg, -			      brw_message_reg(1), -			      16 * src->Index, -			      SURF_INDEX_FRAG_CONST_BUFFER); -      } -   } -} - - -/** - * Convert Mesa dst register to brw register. - */ -static struct brw_reg get_dst_reg(struct brw_wm_compile *c,  -                                  const struct prog_instruction *inst, -                                  GLuint component) -{ -    const int nr = 1; -    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, -	    0, 0); -} - - -static struct brw_reg -get_src_reg_const(struct brw_wm_compile *c, -                  const struct prog_instruction *inst, -                  GLuint srcRegIndex, GLuint component) -{ -   /* We should have already fetched the constant from the constant -    * buffer in fetch_constants().  Now we just have to return a -    * register description that extracts the needed component and -    * smears it across all eight vector components. -    */ -   const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; -   struct brw_reg const_reg; - -   assert(component < 4); -   assert(srcRegIndex < 3); -   assert(c->current_const[srcRegIndex].index != -1); -   const_reg = c->current_const[srcRegIndex].reg; - -   /* extract desired float from the const_reg, and smear */ -   const_reg = stride(const_reg, 0, 1, 0); -   const_reg.subnr = component * 4; - -   if (src->Negate & (1 << component)) -      const_reg = negate(const_reg); -   if (src->Abs) -      const_reg = brw_abs(const_reg); - -#if 0 -   printf("  form const[%d].%d for arg %d, reg %d\n", -          c->current_const[srcRegIndex].index, -          component, -          srcRegIndex, -          const_reg.nr); -#endif - -   return const_reg; -} - - -/** - * Convert Mesa src register to brw register. - */ -static struct brw_reg get_src_reg(struct brw_wm_compile *c,  -                                  const struct prog_instruction *inst, -                                  GLuint srcRegIndex, GLuint channel) -{ -    const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; -    const GLuint nr = 1; -    const GLuint component = GET_SWZ(src->Swizzle, channel); - -    /* Only one immediate value can be used per native opcode, and it -     * has be in the src1 slot, so not all Mesa instructions will get -     * to take advantage of immediate constants. -     */ -    if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { -       const struct gl_program_parameter_list *params; - -       params = c->fp->program.Base.Parameters; - -       /* Extended swizzle terms */ -       if (component == SWIZZLE_ZERO) { -	  return brw_imm_f(0.0F); -       } else if (component == SWIZZLE_ONE) { -	  if (src->Negate) -	     return brw_imm_f(-1.0F); -	  else -	     return brw_imm_f(1.0F); -       } - -       if (src->File == PROGRAM_CONSTANT) { -	  float f = params->ParameterValues[src->Index][component]; - -	  if (src->Abs) -	     f = fabs(f); -	  if (src->Negate) -	     f = -f; - -	  return brw_imm_f(f); -       } -    } - -    if (c->prog_data.nr_pull_params && -        (src->File == PROGRAM_STATE_VAR || -         src->File == PROGRAM_CONSTANT || -         src->File == PROGRAM_UNIFORM)) { -       return get_src_reg_const(c, inst, srcRegIndex, component); -    } -    else { -       /* other type of source register */ -       return get_reg(c, src->File, src->Index, component, nr,  -                      src->Negate, src->Abs); -    } -} - -static void emit_arl(struct brw_wm_compile *c, -                     const struct prog_instruction *inst) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg src0, addr_reg; -    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); -    addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE,  -                           BRW_ARF_ADDRESS, 0); -    src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ -    brw_MOV(p, addr_reg, src0); -    brw_set_saturate(p, 0); -} - -static INLINE struct brw_reg high_words( struct brw_reg reg ) -{ -    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), -		   0, 8, 2 ); -} - -static INLINE struct brw_reg low_words( struct brw_reg reg ) -{ -    return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); -} - -static INLINE struct brw_reg even_bytes( struct brw_reg reg ) -{ -    return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); -} - -static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) -{ -    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), -		   0, 16, 2 ); -} - -/** - * Resolve subroutine calls after code emit is done. - */ -static void post_wm_emit( struct brw_wm_compile *c ) -{ -    brw_resolve_cals(&c->func); -} - -static void -get_argument_regs(struct brw_wm_compile *c, -		  const struct prog_instruction *inst, -		  int index, -		  struct brw_reg *dst, -		  struct brw_reg *regs, -		  int mask) -{ -    struct brw_compile *p = &c->func; -    int i, j; - -    for (i = 0; i < 4; i++) { -	if (mask & (1 << i)) { -	    regs[i] = get_src_reg(c, inst, index, i); - -	    /* Unalias destination registers from our sources. */ -	    if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { -	       for (j = 0; j < 4; j++) { -		   if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { -		       struct brw_reg tmp = alloc_tmp(c); -		       brw_MOV(p, tmp, regs[i]); -		       regs[i] = tmp; -		       break; -		   } -	       } -	    } -	} -    } -} - -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) -{ -   struct intel_context *intel = &brw->intel; -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 -    struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; -    int if_depth_in_loop[MAX_LOOP_DEPTH]; -    GLuint i, if_depth = 0, loop_depth = 0; -    struct brw_compile *p = &c->func; -    struct brw_indirect stack_index = brw_indirect(0, 0); - -    c->out_of_regs = GL_FALSE; - -    if_depth_in_loop[loop_depth] = 0; - -    prealloc_reg(c); -    brw_set_compression_control(p, BRW_COMPRESSION_NONE); -    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - -    if (intel->gen >= 6) -	brw_set_acc_write_control(p, 1); - -    for (i = 0; i < c->nr_fp_insns; i++) { -        const struct prog_instruction *inst = &c->prog_instructions[i]; -	int dst_flags; -	struct brw_reg args[3][4], dst[4]; -	int j; -	int mark = mark_tmps( c ); - -        c->cur_inst = i; - -#if 0 -        printf("Inst %d: ", i); -        _mesa_print_instruction(inst); -#endif - -        /* fetch any constants that this instruction needs */ -        if (c->prog_data.nr_pull_params) -           fetch_constants(c, inst); - -	if (inst->Opcode != OPCODE_ARL) { -	   for (j = 0; j < 4; j++) { -	      if (inst->DstReg.WriteMask & (1 << j)) -		 dst[j] = get_dst_reg(c, inst, j); -	      else -		 dst[j] = brw_null_reg(); -	   } -	} -	for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) -	    get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); - -	dst_flags = inst->DstReg.WriteMask; -	if (inst->SaturateMode == SATURATE_ZERO_ONE) -	    dst_flags |= SATURATE; - -	if (inst->CondUpdate) -	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); -	else -	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - -	switch (inst->Opcode) { -	    case WM_PIXELXY: -		emit_pixel_xy(c, dst, dst_flags); -		break; -	    case WM_DELTAXY:  -		emit_delta_xy(p, dst, dst_flags, args[0]); -		break; -	    case WM_PIXELW: -		emit_pixel_w(c, dst, dst_flags, args[0], args[1]); -		break;	 -	    case WM_LINTERP: -		emit_linterp(p, dst, dst_flags, args[0], args[1]); -		break; -	    case WM_PINTERP: -		emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); -		break; -	    case WM_CINTERP: -		emit_cinterp(p, dst, dst_flags, args[0]); -		break; -	    case WM_WPOSXY: -		emit_wpos_xy(c, dst, dst_flags, args[0]); -		break; -	    case WM_FB_WRITE: -		emit_fb_write(c, args[0], args[1], args[2], -			      INST_AUX_GET_TARGET(inst->Aux), -			      inst->Aux & INST_AUX_EOT); -		break; -	    case WM_FRONTFACING: -		emit_frontfacing(p, dst, dst_flags); -		break; -	    case OPCODE_ADD: -		emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_ARL: -		emit_arl(c, inst); -		break; -	    case OPCODE_FRC: -		emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); -		break; -	    case OPCODE_FLR: -		emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); -		break; -	    case OPCODE_LRP: -		emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); -		break; -	    case OPCODE_TRUNC: -		emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); -		break; -	    case OPCODE_MOV: -	    case OPCODE_SWZ: -		emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); -		break; -	    case OPCODE_DP2: -		emit_dp2(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_DP3: -		emit_dp3(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_DP4: -		emit_dp4(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_XPD: -		emit_xpd(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_DPH: -		emit_dph(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_RCP: -		emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); -		break; -	    case OPCODE_RSQ: -		emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); -		break; -	    case OPCODE_SIN: -		emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); -		break; -	    case OPCODE_COS: -		emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); -		break; -	    case OPCODE_EX2: -		emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); -		break; -	    case OPCODE_LG2: -		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); -		break; -	    case OPCODE_CMP: -		emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); -		break; -	    case OPCODE_MIN:	 -		emit_min(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_MAX:	 -		emit_max(p, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_DDX: -	    case OPCODE_DDY: -		emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), -			  args[0]); -                break; -	    case OPCODE_SLT: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_L, args[0], args[1]); -		break; -	    case OPCODE_SLE: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_LE, args[0], args[1]); -		break; -	    case OPCODE_SGT: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_G, args[0], args[1]); -		break; -	    case OPCODE_SGE: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_GE, args[0], args[1]); -		break; -	    case OPCODE_SEQ: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_EQ, args[0], args[1]); -		break; -	    case OPCODE_SNE: -		emit_sop(p, dst, dst_flags, -			 BRW_CONDITIONAL_NEQ, args[0], args[1]); -		break; -	    case OPCODE_SSG: -		emit_sign(p, dst, dst_flags, args[0]); -		break; -	    case OPCODE_MUL: -		emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_POW: -		emit_math2(c, BRW_MATH_FUNCTION_POW, -			   dst, dst_flags, args[0], args[1]); -		break; -	    case OPCODE_MAD: -		emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); -		break; -	    case OPCODE_TEX: -		emit_tex(c, dst, dst_flags, args[0], -			 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, -				 0, 1, 0, 0), -			 inst->TexSrcTarget, -			 inst->TexSrcUnit, -			 (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); -		break; -	    case OPCODE_TXB: -		emit_txb(c, dst, dst_flags, args[0], -			 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, -				 0, 1, 0, 0), -			 inst->TexSrcTarget, -			 c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); -		break; -	    case OPCODE_KIL_NV: -		emit_kil_nv(c); -		break; -	    case OPCODE_IF: -		assert(if_depth < MAX_IF_DEPTH); -		if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); -		if_depth_in_loop[loop_depth]++; -		break; -	    case OPCODE_ELSE: -		assert(if_depth > 0); -		if_inst[if_depth-1]  = brw_ELSE(p, if_inst[if_depth-1]); -		break; -	    case OPCODE_ENDIF: -		assert(if_depth > 0); -		brw_ENDIF(p, if_inst[--if_depth]); -		if_depth_in_loop[loop_depth]--; -		break; -	    case OPCODE_BGNSUB: -		brw_save_label(p, inst->Comment, p->nr_insn); -		break; -	    case OPCODE_ENDSUB: -		/* no-op */ -		break; -	    case OPCODE_CAL:  -		brw_push_insn_state(p); -		brw_set_mask_control(p, BRW_MASK_DISABLE); -                brw_set_access_mode(p, BRW_ALIGN_1); -                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); -                brw_set_access_mode(p, BRW_ALIGN_16); -                brw_ADD(p, get_addr_reg(stack_index), -                         get_addr_reg(stack_index), brw_imm_d(4)); -		brw_save_call(&c->func, inst->Comment, p->nr_insn); -                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); -                brw_pop_insn_state(p); -		break; - -	    case OPCODE_RET: -		brw_push_insn_state(p); -		brw_set_mask_control(p, BRW_MASK_DISABLE); -                brw_ADD(p, get_addr_reg(stack_index), -                        get_addr_reg(stack_index), brw_imm_d(-4)); -                brw_set_access_mode(p, BRW_ALIGN_1); -                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); -                brw_set_access_mode(p, BRW_ALIGN_16); -		brw_pop_insn_state(p); - -		break; -	    case OPCODE_BGNLOOP: -                /* XXX may need to invalidate the current_constant regs */ -		loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); -		if_depth_in_loop[loop_depth] = 0; -		break; -	    case OPCODE_BRK: -		brw_BREAK(p, if_depth_in_loop[loop_depth]); -		brw_set_predicate_control(p, BRW_PREDICATE_NONE); -		break; -	    case OPCODE_CONT: -		brw_CONT(p, if_depth_in_loop[loop_depth]); -		brw_set_predicate_control(p, BRW_PREDICATE_NONE); -		break; -	    case OPCODE_ENDLOOP:  -               { -                  struct brw_instruction *inst0, *inst1; -                  GLuint br = 1; - -                  if (intel->gen == 5) -                     br = 2; - -		  assert(loop_depth > 0); -                  loop_depth--; -                  inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); -                  /* patch all the BREAK/CONT instructions from last BGNLOOP */ -                  while (inst0 > loop_inst[loop_depth]) { -                     inst0--; -                     if (inst0->header.opcode == BRW_OPCODE_BREAK && -			 inst0->bits3.if_else.jump_count == 0) { -			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); -                     } -                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && -			      inst0->bits3.if_else.jump_count == 0) { -                        inst0->bits3.if_else.jump_count = br * (inst1 - inst0); -                     } -                  } -               } -               break; -	    default: -		printf("unsupported opcode %d (%s) in fragment shader\n", -		       inst->Opcode, inst->Opcode < MAX_OPCODE ? -		       _mesa_opcode_string(inst->Opcode) : "unknown"); -	} - -	/* Release temporaries containing any unaliased source regs. */ -	release_tmps( c, mark ); - -	if (inst->CondUpdate) -	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); -	else -	    brw_set_predicate_control(p, BRW_PREDICATE_NONE); -    } -    post_wm_emit(c); - -    if (unlikely(INTEL_DEBUG & DEBUG_WM)) { -      printf("wm-native:\n"); -      for (i = 0; i < p->nr_insn; i++) -	 brw_disasm(stdout, &p->store[i], intel->gen); -      printf("\n"); -    } -} - -/** - * Do GPU code generation for shaders that use GLSL features such as - * flow control.  Other shaders will be compiled with the  - */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ -    if (unlikely(INTEL_DEBUG & DEBUG_WM)) { -        printf("brw_wm_glsl_emit:\n"); -    } - -    /* initial instruction translation/simplification */ -    brw_wm_pass_fp(c); - -    /* actual code generation */ -    brw_wm_emit_glsl(brw, c); - -    if (unlikely(INTEL_DEBUG & DEBUG_WM)) { -        brw_wm_print_program(c, "brw_wm_glsl_emit done"); -    } - -    c->prog_data.total_grf = num_grf_used(c); -    c->prog_data.total_scratch = 0; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76de7b7b6f..82835470a3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -132,7 +132,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)     /* _NEW_COLOR */     key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; -   key->is_glsl = bfp->isGLSL;     /* If using the fragment shader backend, the program is always      * 8-wide. diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 8968caef51..a039d7df29 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -128,8 +128,8 @@ upload_wm_state(struct brw_context *brw)     dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; -   /* BRW_NEW_FRAGMENT_PROGRAM */ -   if (fp->isGLSL) +   /* CACHE_NEW_WM_PROG */ +   if (brw->wm.prog_data->dispatch_width == 8)        dw5 |= GEN6_WM_8_DISPATCH_ENABLE;     else        dw5 |= GEN6_WM_16_DISPATCH_ENABLE; | 
