diff options
| author | Keith Whitwell <keithw@vmware.com> | 2009-10-23 17:01:32 +0100 | 
|---|---|---|
| committer | Keith Whitwell <keithw@vmware.com> | 2009-10-23 17:03:45 +0100 | 
| commit | 57a920cb1a0b6051068e730747b3fb475de88aca (patch) | |
| tree | 805821e52d27d0803ac0cd44c384a4d9a36aa5a8 | |
| parent | 2f5f7c07732577f60666e3cee69c75c9b035c145 (diff) | |
i965g: wip
40 files changed, 891 insertions, 2583 deletions
| diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c new file mode 100644 index 0000000000..e7a4dac666 --- /dev/null +++ b/src/gallium/drivers/i965/brw_bo.c @@ -0,0 +1,12 @@ + + +void brw_buffer_subdata() +{ +      if (intel->intelScreen->kernel_exec_fencing) { +	 drm_intel_gem_bo_map_gtt(bo); +	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); +	 drm_intel_gem_bo_unmap_gtt(bo); +      } else { +	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); +      } +} diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 1088a7a607..9ab5638137 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -62,84 +62,21 @@ const struct brw_tracked_state brw_cc_vp = {  };  struct brw_cc_unit_key { -   GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - -   GLenum stencil_func[2], stencil_fail_op[2]; -   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; -   GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; -   GLenum logic_op; - -   GLenum blend_eq_rgb, blend_eq_a; -   GLenum blend_src_rgb, blend_src_a; -   GLenum blend_dst_rgb, blend_dst_a; - -   GLenum alpha_func; -   GLclampf alpha_ref; - -   GLboolean dither; - -   GLboolean depth_test, depth_write; -   GLenum depth_func; +   struct pipe_depth_stencil_alpha_state dsa; +   struct pipe_blend_state blend; /* no color mask */  };  static void  cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)  { -   GLcontext *ctx = &brw->intel.ctx; -   const unsigned back = ctx->Stencil._BackFace; -     memset(key, 0, sizeof(*key)); +    +   key->dsa = brw->curr.dsa.base; +   key->blend = brw->curr.blend.base; -   key->stencil = ctx->Stencil._Enabled; -   key->stencil_two_side = ctx->Stencil._TestTwoSide; - -   if (key->stencil) { -      key->stencil_func[0] = ctx->Stencil.Function[0]; -      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; -      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; -      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; -      key->stencil_ref[0] = ctx->Stencil.Ref[0]; -      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; -      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; -   } -   if (key->stencil_two_side) { -      key->stencil_func[1] = ctx->Stencil.Function[back]; -      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; -      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; -      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; -      key->stencil_ref[1] = ctx->Stencil.Ref[back]; -      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; -      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; -   } - -   if (ctx->Color._LogicOpEnabled) -      key->logic_op = ctx->Color.LogicOp; -   else -      key->logic_op = GL_COPY; - -   key->color_blend = ctx->Color.BlendEnabled; -   if (key->color_blend) { -      key->blend_eq_rgb = ctx->Color.BlendEquationRGB; -      key->blend_eq_a = ctx->Color.BlendEquationA; -      key->blend_src_rgb = ctx->Color.BlendSrcRGB; -      key->blend_dst_rgb = ctx->Color.BlendDstRGB; -      key->blend_src_a = ctx->Color.BlendSrcA; -      key->blend_dst_a = ctx->Color.BlendDstA; -   } - -   key->alpha_enabled = ctx->Color.AlphaEnabled; -   if (key->alpha_enabled) { -      key->alpha_func = ctx->Color.AlphaFunc; -      key->alpha_ref = ctx->Color.AlphaRef; -   } - -   key->dither = ctx->Color.DitherFlag; - -   key->depth_test = ctx->Depth.Test; -   if (key->depth_test) { -      key->depth_func = ctx->Depth.Func; -      key->depth_write = ctx->Depth.Mask; -   } +   /* Clear non-respected values: +    */ +   key->blend.colormask = 0xf;  }  /** @@ -153,103 +90,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)     memset(&cc, 0, sizeof(cc)); -   /* _NEW_STENCIL */ -   if (key->stencil) { -      cc.cc0.stencil_enable = 1; -      cc.cc0.stencil_func = -	 intel_translate_compare_func(key->stencil_func[0]); -      cc.cc0.stencil_fail_op = -	 intel_translate_stencil_op(key->stencil_fail_op[0]); -      cc.cc0.stencil_pass_depth_fail_op = -	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); -      cc.cc0.stencil_pass_depth_pass_op = -	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); -      cc.cc1.stencil_ref = key->stencil_ref[0]; -      cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; -      cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - -      if (key->stencil_two_side) { -	 cc.cc0.bf_stencil_enable = 1; -	 cc.cc0.bf_stencil_func = -	    intel_translate_compare_func(key->stencil_func[1]); -	 cc.cc0.bf_stencil_fail_op = -	    intel_translate_stencil_op(key->stencil_fail_op[1]); -	 cc.cc0.bf_stencil_pass_depth_fail_op = -	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); -	 cc.cc0.bf_stencil_pass_depth_pass_op = -	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); -	 cc.cc1.bf_stencil_ref = key->stencil_ref[1]; -	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; -	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; -      } - -      /* Not really sure about this: -       */ -      if (key->stencil_write_mask[0] || -	  (key->stencil_two_side && key->stencil_write_mask[1])) -	 cc.cc0.stencil_write_enable = 1; -   } - -   /* _NEW_COLOR */ -   if (key->logic_op != GL_COPY) { -      cc.cc2.logicop_enable = 1; -      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); -   } else if (key->color_blend) { -      GLenum eqRGB = key->blend_eq_rgb; -      GLenum eqA = key->blend_eq_a; -      GLenum srcRGB = key->blend_src_rgb; -      GLenum dstRGB = key->blend_dst_rgb; -      GLenum srcA = key->blend_src_a; -      GLenum dstA = key->blend_dst_a; - -      if (eqRGB == GL_MIN || eqRGB == GL_MAX) { -	 srcRGB = dstRGB = GL_ONE; -      } - -      if (eqA == GL_MIN || eqA == GL_MAX) { -	 srcA = dstA = GL_ONE; -      } - -      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); -      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); -      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - -      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); -      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); -      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); - -      cc.cc3.blend_enable = 1; -      cc.cc3.ia_blend_enable = (srcA != srcRGB || -				dstA != dstRGB || -				eqA != eqRGB); -   } - -   if (key->alpha_enabled) { -      cc.cc3.alpha_test = 1; -      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); -      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - -      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); -   } - -   if (key->dither) { -      cc.cc5.dither_enable = 1; -      cc.cc6.y_dither_offset = 0; -      cc.cc6.x_dither_offset = 0; -   } - -   /* _NEW_DEPTH */ -   if (key->depth_test) { -      cc.cc2.depth_test = 1; -      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); -      cc.cc2.depth_write_enable = key->depth_write; -   } +   cc.cc0 = brw->dsa.cc0; +   cc.cc1 = brw->dsa.cc1; +   cc.cc2 = brw->dsa.cc2; +   cc.cc3 = brw->dsa.cc3 | brw->blend.cc3;     /* CACHE_NEW_CC_VP */     cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ -   if (INTEL_DEBUG & DEBUG_STATS) -      cc.cc5.statistics_enable = 1; +   cc.cc5 = brw->blend.cc5 | brw->debug.cc5; +     bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,  			 key, sizeof(*key), @@ -286,7 +136,7 @@ static void prepare_cc_unit( struct brw_context *brw )  const struct brw_tracked_state brw_cc_unit = {     .dirty = { -      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, +      .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,        .brw = 0,        .cache = CACHE_NEW_CC_VP     }, diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 20a927cf38..df1b3718d0 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -29,9 +29,9 @@    *   Keith Whitwell <keith@tungstengraphics.com>    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_state.h" + +#include "util/u_math.h"  #include "intel_batchbuffer.h" @@ -83,7 +83,7 @@ static void compile_clip_prog( struct brw_context *brw,  	 delta += ATTR_SIZE;        } -   c.nr_attrs = brw_count_bits(c.key.attrs); +   c.nr_attrs = util_count_bits(c.key.attrs);     if (BRW_IS_IGDNG(brw))         c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */ @@ -104,16 +104,16 @@ static void compile_clip_prog( struct brw_context *brw,      * do all three:      */     switch (key->primitive) { -   case GL_TRIANGLES:  +   case PIPE_PRIM_TRIANGLES:         if (key->do_unfilled)  	 brw_emit_unfilled_clip( &c );        else  	 brw_emit_tri_clip( &c );        break; -   case GL_LINES: +   case PIPE_PRIM_LINES:        brw_emit_line_clip( &c );        break; -   case GL_POINTS: +   case PIPE_PRIM_POINTS:        brw_emit_point_clip( &c );        break;     default: @@ -143,7 +143,6 @@ static void compile_clip_prog( struct brw_context *brw,   */  static void upload_clip_prog(struct brw_context *brw)  { -   GLcontext *ctx = &brw->intel.ctx;     struct brw_clip_prog_key key;     memset(&key, 0, sizeof(key)); @@ -151,101 +150,51 @@ static void upload_clip_prog(struct brw_context *brw)     /* Populate the key:      */     /* BRW_NEW_REDUCED_PRIMITIVE */ -   key.primitive = brw->intel.reduced_primitive; +   key.primitive = brw->reduced_primitive;     /* CACHE_NEW_VS_PROG */     key.attrs = brw->vs.prog_data->outputs_written; -   /* _NEW_LIGHT */ -   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); -   /* _NEW_TRANSFORM */ -   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); +   /* PIPE_NEW_RAST */ +   key.do_flat_shading = brw->rast.base.flatshade; +   /* PIPE_NEW_UCP */ +   key.nr_userclip = brw->nr_ucp;     if (BRW_IS_IGDNG(brw))         key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;     else         key.clip_mode = BRW_CLIPMODE_NORMAL; -   /* _NEW_POLYGON */ -   if (key.primitive == GL_TRIANGLES) { -      if (ctx->Polygon.CullFlag && -	  ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) +   /* PIPE_NEW_RAST */ +   if (key.primitive == PIPE_PRIM_TRIANGLES) { +      if (brw->rast->cull_mode = PIPE_WINDING_BOTH)  	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;        else { -	 GLuint fill_front = CLIP_CULL; -	 GLuint fill_back = CLIP_CULL; -	 GLuint offset_front = 0; -	 GLuint offset_back = 0; +	 key.fill_ccw = CLIP_CULL; +	 key.fill_cw = CLIP_CULL; -	 if (!ctx->Polygon.CullFlag || -	     ctx->Polygon.CullFaceMode != GL_FRONT) { -	    switch (ctx->Polygon.FrontMode) { -	    case GL_FILL:  -	       fill_front = CLIP_FILL;  -	       offset_front = 0; -	       break; -	    case GL_LINE: -	       fill_front = CLIP_LINE; -	       offset_front = ctx->Polygon.OffsetLine; -	       break; -	    case GL_POINT: -	       fill_front = CLIP_POINT; -	       offset_front = ctx->Polygon.OffsetPoint; -	       break; -	    } +	 if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { +	    key.fill_ccw = translate_fill(brw->rast.fill_ccw);  	 } -	 if (!ctx->Polygon.CullFlag || -	     ctx->Polygon.CullFaceMode != GL_BACK) { -	    switch (ctx->Polygon.BackMode) { -	    case GL_FILL:  -	       fill_back = CLIP_FILL;  -	       offset_back = 0; -	       break; -	    case GL_LINE: -	       fill_back = CLIP_LINE; -	       offset_back = ctx->Polygon.OffsetLine; -	       break; -	    case GL_POINT: -	       fill_back = CLIP_POINT; -	       offset_back = ctx->Polygon.OffsetPoint; -	       break; -	    } +	 if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { +	    key.fill_cw = translate_fill(brw->rast.fill_cw);  	 } -	 if (ctx->Polygon.BackMode != GL_FILL || -	     ctx->Polygon.FrontMode != GL_FILL) { +	 if (key.fill_cw != CLIP_FILL || +	     key.fill_ccw != CLIP_FILL) {  	    key.do_unfilled = 1; - -	    /* Most cases the fixed function units will handle.  Cases where -	     * one or more polygon faces are unfilled will require help: -	     */  	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; +	 } + +	 key.offset_ccw = brw->rast.offset_ccw; +	 key.offset_cw = brw->rast.offset_cw; -	    if (offset_back || offset_front) { -	       /* _NEW_POLYGON, _NEW_BUFFERS */ -	       key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; -	       key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; -	    } +	 if (brw->rast.light_twoside && +	     key.fill_cw != CLIP_CULL)  +	    key.copy_bfc_cw = 1; -	    switch (ctx->Polygon.FrontFace) { -	    case GL_CCW: -	       key.fill_ccw = fill_front; -	       key.fill_cw = fill_back; -	       key.offset_ccw = offset_front; -	       key.offset_cw = offset_back; -	       if (ctx->Light.Model.TwoSide && -		   key.fill_cw != CLIP_CULL)  -		  key.copy_bfc_cw = 1; -	       break; -	    case GL_CW: -	       key.fill_cw = fill_front; -	       key.fill_ccw = fill_back; -	       key.offset_cw = offset_front; -	       key.offset_ccw = offset_back; -	       if (ctx->Light.Model.TwoSide && -		   key.fill_ccw != CLIP_CULL)  -		  key.copy_bfc_ccw = 1; -	       break; -	    } +	 if (brw->rast.light_twoside && +	     key.fill_ccw != CLIP_CULL)  +	    key.copy_bfc_ccw = 1;  	 }        }     } @@ -262,10 +211,8 @@ static void upload_clip_prog(struct brw_context *brw)  const struct brw_tracked_state brw_clip_prog = {     .dirty = { -      .mesa  = (_NEW_LIGHT |  -		_NEW_TRANSFORM | -		_NEW_POLYGON |  -		_NEW_BUFFERS), +      .mesa  = (PIPE_NEW_RAST |  +		PIPE_NEW_UCP),        .brw   = (BRW_NEW_REDUCED_PRIMITIVE),        .cache = CACHE_NEW_VS_PROG     }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 957df441ab..d80ec819b9 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -43,6 +43,7 @@   */  struct brw_clip_prog_key {     GLuint attrs:32;		 +     GLuint primitive:4;     GLuint nr_userclip:3;     GLuint do_flat_shading:1; @@ -51,12 +52,10 @@ struct brw_clip_prog_key {     GLuint fill_ccw:2;		/* includes cull information */     GLuint offset_cw:1;     GLuint offset_ccw:1; -   GLuint pad0:17; -     GLuint copy_bfc_cw:1;     GLuint copy_bfc_ccw:1;     GLuint clip_mode:3; -   GLuint pad1:27; +   GLuint pad1:12;     GLfloat offset_factor;     GLfloat offset_units; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 048ca620fa..6b4da25644 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -29,13 +29,6 @@    *   Keith Whitwell <keith@tungstengraphics.com>    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" -  #include "brw_defines.h"  #include "brw_context.h"  #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index 8458f61c5a..b2cf7b2011 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -29,13 +29,6 @@    *   Keith Whitwell <keith@tungstengraphics.com>    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" -  #include "brw_defines.h"  #include "brw_context.h"  #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 234b3744bf..72e27205e2 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -32,7 +32,6 @@  #include "brw_context.h"  #include "brw_state.h"  #include "brw_defines.h" -#include "main/macros.h"  struct brw_clip_unit_key {     unsigned int total_grf; @@ -66,8 +65,8 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)     key->nr_urb_entries = brw->urb.nr_clip_entries;     key->urb_size = brw->urb.vsize; -   /* _NEW_TRANSOFORM */ -   key->depth_clamp = ctx->Transform.DepthClamp; +   /*  */ +   key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;  }  static dri_bo * @@ -175,7 +174,7 @@ static void upload_clip_unit( struct brw_context *brw )  const struct brw_tracked_state brw_clip_unit = {     .dirty = { -      .mesa  = _NEW_TRANSFORM, +      .mesa  = 0,        .brw   = (BRW_NEW_CURBE_OFFSETS |  		BRW_NEW_URB_FENCE),        .cache = CACHE_NEW_CLIP_PROG diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 0efd77225e..d8feca6a87 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -29,13 +29,6 @@    *   Keith Whitwell <keith@tungstengraphics.com>    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" -  #include "brw_defines.h"  #include "brw_context.h"  #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index ad1bfa435f..4baff55806 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,11 +29,6 @@    *   Keith Whitwell <keith@tungstengraphics.com>    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -  #include "intel_batchbuffer.h"  #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 5a73abdfee..7a6c46ce07 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -30,13 +30,6 @@    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" -  #include "brw_defines.h"  #include "brw_context.h"  #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index c300c33adc..bf0ec89e13 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -52,122 +52,77 @@  #include "utils.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -static void brwUseProgram(GLcontext *ctx, GLuint program) -{ -   _mesa_use_program(ctx, program); -} - -static void brwInitProgFuncs( struct dd_function_table *functions ) -{ -   functions->UseProgram = brwUseProgram; -} -static void brwInitDriverFunctions( struct dd_function_table *functions ) -{ -   intelInitDriverFunctions( functions ); - -   brwInitFragProgFuncs( functions ); -   brwInitProgFuncs( functions ); -   brw_init_queryobj_functions(functions); - -   functions->Viewport = intel_viewport; -}  GLboolean brwCreateContext( const __GLcontextModes *mesaVis,  			    __DRIcontextPrivate *driContextPriv,  			    void *sharedContextPrivate)  { -   struct dd_function_table functions;     struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); -   struct intel_context *intel = &brw->intel; -   GLcontext *ctx = &intel->ctx;     if (!brw) { -      _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); +      debug_printf("%s: failed to alloc context\n", __FUNCTION__);        return GL_FALSE;     } -   brwInitVtbl( brw ); -   brwInitDriverFunctions( &functions ); - -   if (!intelInitContext( intel, mesaVis, driContextPriv, -			  sharedContextPrivate, &functions )) { -      _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); -      FREE(brw); -      return GL_FALSE; -   } - -   /* Initialize swrast, tnl driver tables: */ -   intelInitSpanFuncs(ctx); - -   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - -   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; -   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ -   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, -                                     ctx->Const.MaxTextureImageUnits); -   ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - -   /* Mesa limits textures to 4kx4k; it would be nice to fix that someday -    */ -   ctx->Const.MaxTextureLevels = 13; -   ctx->Const.Max3DTextureLevels = 9; -   ctx->Const.MaxCubeTextureLevels = 12; -   ctx->Const.MaxTextureRectSize = (1<<12); -    -   ctx->Const.MaxTextureMaxAnisotropy = 16.0; - -   /* if conformance mode is set, swrast can handle any size AA point */ -   ctx->Const.MaxPointSizeAA = 255.0; -     /* We want the GLSL compiler to emit code that uses condition codes */     ctx->Shader.EmitCondCodes = GL_TRUE;     ctx->Shader.EmitNVTempInitialization = GL_TRUE; -   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); -   ctx->Const.VertexProgram.MaxAluInstructions = 0; -   ctx->Const.VertexProgram.MaxTexInstructions = 0; -   ctx->Const.VertexProgram.MaxTexIndirections = 0; -   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; -   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; -   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; -   ctx->Const.VertexProgram.MaxNativeAttribs = 16; -   ctx->Const.VertexProgram.MaxNativeTemps = 256; -   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; -   ctx->Const.VertexProgram.MaxNativeParameters = 1024; -   ctx->Const.VertexProgram.MaxEnvParams = -      MIN2(ctx->Const.VertexProgram.MaxNativeParameters, -	   ctx->Const.VertexProgram.MaxEnvParams); - -   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); -   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); -   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); -   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); -   ctx->Const.FragmentProgram.MaxNativeAttribs = 12; -   ctx->Const.FragmentProgram.MaxNativeTemps = 256; -   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; -   ctx->Const.FragmentProgram.MaxNativeParameters = 1024; -   ctx->Const.FragmentProgram.MaxEnvParams = -      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, -	   ctx->Const.FragmentProgram.MaxEnvParams); +   brw_init_query( brw );     brw_init_state( brw ); +   brw_draw_init( brw );     brw->state.dirty.mesa = ~0;     brw->state.dirty.brw = ~0;     brw->emit_state_always = 0; -   ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; -   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; -     make_empty_list(&brw->query.active_head); -   brw_draw_init( brw );     return GL_TRUE;  } +/** + * called from intelDestroyContext() + */ +static void brw_destroy_context( struct intel_context *intel ) +{ +   struct brw_context *brw = brw_context(&intel->ctx); +   int i; + +   brw_destroy_state(brw); +   brw_draw_destroy( brw ); + +   _mesa_free(brw->wm.compile_data); + +   for (i = 0; i < brw->state.nr_color_regions; i++) +      intel_region_release(&brw->state.color_regions[i]); +   brw->state.nr_color_regions = 0; +   intel_region_release(&brw->state.depth_region); + +   dri_bo_unreference(brw->curbe.curbe_bo); +   dri_bo_unreference(brw->vs.prog_bo); +   dri_bo_unreference(brw->vs.state_bo); +   dri_bo_unreference(brw->vs.bind_bo); +   dri_bo_unreference(brw->gs.prog_bo); +   dri_bo_unreference(brw->gs.state_bo); +   dri_bo_unreference(brw->clip.prog_bo); +   dri_bo_unreference(brw->clip.state_bo); +   dri_bo_unreference(brw->clip.vp_bo); +   dri_bo_unreference(brw->sf.prog_bo); +   dri_bo_unreference(brw->sf.state_bo); +   dri_bo_unreference(brw->sf.vp_bo); +   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) +      dri_bo_unreference(brw->wm.sdc_bo[i]); +   dri_bo_unreference(brw->wm.bind_bo); +   for (i = 0; i < BRW_WM_MAX_SURF; i++) +      dri_bo_unreference(brw->wm.surf_bo[i]); +   dri_bo_unreference(brw->wm.sampler_bo); +   dri_bo_unreference(brw->wm.prog_bo); +   dri_bo_unreference(brw->wm.state_bo); +   dri_bo_unreference(brw->cc.prog_bo); +   dri_bo_unreference(brw->cc.state_bo); +   dri_bo_unreference(brw->cc.vp_bo); +} diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index fa3e32c7ff..009e28b227 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -115,7 +115,6 @@   * Handles blending and (presumably) depth and stencil testing.   */ -#define BRW_FALLBACK_TEXTURE		 0x1  #define BRW_MAX_CURBE                    (32*16)  struct brw_context; @@ -450,11 +449,9 @@ struct brw_query_object {   */  struct brw_context   { -   struct intel_context intel;  /**< base class, must be first field */     GLuint primitive;     GLboolean emit_state_always; -   GLboolean tmp_fallback;     GLboolean no_batch_wrap;     struct { @@ -692,7 +689,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,  /*======================================================================   * brw_queryobj.c   */ -void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_init_query(struct brw_context *brw);  void brw_prepare_query_begin(struct brw_context *brw);  void brw_emit_query_begin(struct brw_context *brw);  void brw_emit_query_end(struct brw_context *brw); @@ -730,7 +727,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst);   * macros used previously:   */  static INLINE struct brw_context * -brw_context( GLcontext *ctx ) +brw_context( struct pipe_context *ctx )  {     return (struct brw_context *)ctx;  } diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 4be6c77aa1..3e32c4983d 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,14 +30,6 @@    */ - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h"  #include "intel_batchbuffer.h"  #include "intel_regions.h"  #include "brw_context.h" @@ -64,31 +56,17 @@ static void calculate_curbe_offsets( struct brw_context *brw )     GLuint nr_clip_regs = 0;     GLuint total_regs; -   /* _NEW_TRANSFORM */ -   if (ctx->Transform.ClipPlanesEnabled) { -      GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); +   /* PIPE_NEW_UCP */ +   if (brw->nr_ucp) { +      GLuint nr_planes = 6 + brw->nr_ucp;        nr_clip_regs = (nr_planes * 4 + 15) / 16;     }     total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; -   /* This can happen - what to do?  Probably rather than falling -    * back, the best thing to do is emit programs which code the -    * constants as immediate values.  Could do this either as a static -    * cap on WM and VS, or adaptively. -    * -    * Unfortunately, this is currently dependent on the results of the -    * program generation process (in the case of wm), so this would -    * introduce the need to re-generate programs in the event of a -    * curbe allocation failure. -    */ -   /* Max size is 32 - just large enough to -    * hold the 128 parameters allowed by -    * the fragment and vertex program -    * api's.  It's not clear what happens -    * when both VP and FP want to use 128 -    * parameters, though.  +   /* When this is > 32, want to use a true constant buffer to hold +    * the extra constants.      */     assert(total_regs <= 32); @@ -113,8 +91,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )        brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;        brw->curbe.total_size = reg; -      if (0) -	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", +      if (BRW_DEBUG & DEBUG_CURBE) +	 debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",  		      brw->curbe.wm_start,  		      brw->curbe.wm_size,  		      brw->curbe.clip_start, @@ -129,7 +107,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )  const struct brw_tracked_state brw_curbe_offsets = {     .dirty = { -      .mesa = _NEW_TRANSFORM, +      .mesa = PIPE_NEW_UCP,        .brw  = BRW_NEW_VERTEX_PROGRAM,        .cache = CACHE_NEW_WM_PROG     }, @@ -204,11 +182,13 @@ static void prepare_constant_buffer(struct brw_context *brw)     if (brw->curbe.wm_size) {        GLuint offset = brw->curbe.wm_start * 16; -      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);  +      /* map fs constant buffer */        /* copy float constants */        for (i = 0; i < brw->wm.prog_data->nr_params; i++)   	 buf[offset + i] = *brw->wm.prog_data->param[i]; + +      /* unmap fs constant buffer */     } @@ -228,18 +208,15 @@ static void prepare_constant_buffer(struct brw_context *brw)  	 buf[offset + i * 4 + 3] = fixed_plane[i][3];        } -      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to -       * clip-space: +      /* Clip planes:         */ -      assert(MAX_CLIP_PLANES == 6); -      for (j = 0; j < MAX_CLIP_PLANES; j++) { -	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { -	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; -	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; -	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; -	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; -	    i++; -	 } +      assert(brw->nr_ucp <= 6); +      for (j = 0; j < brw->nr_ucp; j++) { +	 buf[offset + i * 4 + 0] = brw->ucp[j][0]; +	 buf[offset + i * 4 + 1] = brw->ucp[j][1]; +	 buf[offset + i * 4 + 2] = brw->ucp[j][2]; +	 buf[offset + i * 4 + 3] = brw->ucp[j][3]; +	 i++;        }     } @@ -248,13 +225,7 @@ static void prepare_constant_buffer(struct brw_context *brw)        GLuint offset = brw->curbe.vs_start * 16;        GLuint nr = brw->vs.prog_data->nr_params / 4; -      if (brw->vertex_program->IsNVProgram) -	 _mesa_load_tracked_matrices(ctx); - -      /* Updates the ParamaterValues[i] pointers for all parameters of the -       * basic type of PROGRAM_STATE_VAR. -       */ -      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);  +      /* map vs constant buffer */        /* XXX just use a memcpy here */        for (i = 0; i < nr; i++) { @@ -264,14 +235,16 @@ static void prepare_constant_buffer(struct brw_context *brw)  	 buf[offset + i * 4 + 2] = value[2];  	 buf[offset + i * 4 + 3] = value[3];        } + +      /* unmap vs constant buffer */     }     if (0) {        for (i = 0; i < sz*16; i+=4)  -	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, +	 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,  		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]); -      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", +      debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",  		   brw->curbe.last_buf, buf,  		   bufsz, brw->curbe.last_bufsz,  		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); @@ -282,12 +255,12 @@ static void prepare_constant_buffer(struct brw_context *brw)         bufsz == brw->curbe.last_bufsz &&         memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {        /* constants have not changed */ -      _mesa_free(buf); +      FREE(buf);     }      else {        /* constants have changed */        if (brw->curbe.last_buf) -	 _mesa_free(brw->curbe.last_buf); +	 FREE(brw->curbe.last_buf);        brw->curbe.last_buf = buf;        brw->curbe.last_bufsz = bufsz; @@ -353,15 +326,11 @@ static void emit_constant_buffer(struct brw_context *brw)     ADVANCE_BATCH();  } -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs.  This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */  const struct brw_tracked_state brw_constant_buffer = {     .dirty = { -      .mesa = _NEW_PROGRAM_CONSTANTS, +      .mesa = (PIPE_NEW_FS_CONSTANTS | +	       PIPE_NEW_VS_CONSTANTS | +	       PIPE_NEW_UCP),        .brw  = (BRW_NEW_FRAGMENT_PROGRAM |  	       BRW_NEW_VERTEX_PROGRAM |  	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 78d457ad2b..282c5b18f4 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@  #include "intel_chipset.h" -#define BRW_IS_G4X(brw)         (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_G4X(brw)         (IS_G4X((brw)->brw_screen->deviceID)) +#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->brw_screen->deviceID))  #define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))  #define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)  #define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 9fef230507..a84c581c03 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -27,8 +27,6 @@  #include <unistd.h>  #include <stdarg.h> -#include "main/mtypes.h" -  #include "brw_context.h"  #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 44bb7bd588..8cd117c24f 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -39,14 +39,13 @@  #include "brw_defines.h"  #include "brw_context.h"  #include "brw_state.h" -#include "brw_fallback.h"  #include "intel_batchbuffer.h"  #include "intel_buffer_objects.h"  #define FILE_DEBUG_FLAG DEBUG_BATCH -static GLuint prim_to_hw_prim[GL_POLYGON+1] = { +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {     _3DPRIM_POINTLIST,     _3DPRIM_LINELIST,     _3DPRIM_LINELOOP, @@ -60,19 +59,6 @@ static GLuint prim_to_hw_prim[GL_POLYGON+1] = {  }; -static const GLenum reduced_prim[GL_POLYGON+1] = {   -   GL_POINTS, -   GL_LINES, -   GL_LINES, -   GL_LINES, -   GL_TRIANGLES, -   GL_TRIANGLES, -   GL_TRIANGLES, -   GL_TRIANGLES, -   GL_TRIANGLES, -   GL_TRIANGLES -}; -  /* When the primitive changes, set a state bit and re-validate.  Not   * the nicest and would rather deal with this by having all the @@ -196,102 +182,6 @@ static void brw_merge_inputs( struct brw_context *brw,        brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;  } -/* XXX: could split the primitive list to fallback only on the - * non-conformant primitives. - */ -static GLboolean check_fallbacks( struct brw_context *brw, -				  const struct _mesa_prim *prim, -				  GLuint nr_prims ) -{ -   GLcontext *ctx = &brw->intel.ctx; -   GLuint i; - -   /* If we don't require strict OpenGL conformance, never  -    * use fallbacks.  If we're forcing fallbacks, always -    * use fallfacks. -    */ -   if (brw->intel.conformance_mode == 0) -      return GL_FALSE; - -   if (brw->intel.conformance_mode == 2) -      return GL_TRUE; - -   if (ctx->Polygon.SmoothFlag) { -      for (i = 0; i < nr_prims; i++) -	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES)  -	    return GL_TRUE; -   } - -   /* BRW hardware will do AA lines, but they are non-conformant it -    * seems.  TBD whether we keep this fallback: -    */ -   if (ctx->Line.SmoothFlag) { -      for (i = 0; i < nr_prims; i++) -	 if (reduced_prim[prim[i].mode] == GL_LINES)  -	    return GL_TRUE; -   } - -   /* Stipple -- these fallbacks could be resolved with a little -    * bit of work? -    */ -   if (ctx->Line.StippleFlag) { -      for (i = 0; i < nr_prims; i++) { -	 /* GS doesn't get enough information to know when to reset -	  * the stipple counter?!? -	  */ -	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)  -	    return GL_TRUE; -	     -	 if (prim[i].mode == GL_POLYGON && -	     (ctx->Polygon.FrontMode == GL_LINE || -	      ctx->Polygon.BackMode == GL_LINE)) -	    return GL_TRUE; -      } -   } - -   if (ctx->Point.SmoothFlag) { -      for (i = 0; i < nr_prims; i++) -	 if (prim[i].mode == GL_POINTS)  -	    return GL_TRUE; -   } - -   /* BRW hardware doesn't handle GL_CLAMP texturing correctly; -    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP -    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and -    * we want strict conformance, force the fallback. -    * Right now, we only do this for 2D textures. -    */ -   { -      int u; -      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { -         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; -         if (texUnit->Enabled) { -            if (texUnit->Enabled & TEXTURE_1D_BIT) { -               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { -                   return GL_TRUE; -               } -            } -            if (texUnit->Enabled & TEXTURE_2D_BIT) { -               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || -                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { -                   return GL_TRUE; -               } -            } -            if (texUnit->Enabled & TEXTURE_3D_BIT) { -               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || -                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || -                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { -                   return GL_TRUE; -               } -            } -         } -      } -   } -       -   /* Nothing stopping us from the fast path now */ -   return GL_FALSE; -} -  /* May fail if out of video memory for texture or vbo upload, or on   * fallback conditions.   */ @@ -308,23 +198,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,     GLboolean retval = GL_FALSE;     GLboolean warn = GL_FALSE;     GLboolean first_time = GL_TRUE; +   uint32_t hw_prim;     GLuint i;     if (ctx->NewState)        _mesa_update_state( ctx ); -   /* We have to validate the textures *before* checking for fallbacks; -    * otherwise, the software fallback won't be able to rely on the -    * texture state, the firstLevel and lastLevel fields won't be -    * set in the intel texture object (they'll both be 0), and the  -    * software fallback will segfault if it attempts to access any -    * texture level other than level 0. -    */ -   brw_validate_textures( brw ); - -   if (check_fallbacks(brw, prim, nr_prims)) -      return GL_FALSE; -     /* Bind all inputs, derive varying and size information:      */     brw_merge_inputs( brw, arrays ); @@ -336,90 +215,30 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,     brw->vb.max_index = max_index;     brw->state.dirty.brw |= BRW_NEW_VERTICES; -   /* Have to validate state quite late.  Will rebuild tnl_program, -    * which depends on varying information.   -    *  -    * Note this is where brw->vs->prog_data.inputs_read is calculated, -    * so can't access it earlier. -    */ - -   LOCK_HARDWARE(intel); - -   if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { -      UNLOCK_HARDWARE(intel); -      return GL_TRUE; -   } - -   for (i = 0; i < nr_prims; i++) { -      uint32_t hw_prim; - -      /* Flush the batch if it's approaching full, so that we don't wrap while -       * we've got validated state that needs to be in the same batch as the -       * primitives.  This fraction is just a guess (minimal full state plus -       * a primitive is around 512 bytes), and would be better if we had -       * an upper bound of how much we might emit in a single -       * brw_try_draw_prims(). -       */ -      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, -				      LOOP_CLIPRECTS); - -      hw_prim = brw_set_prim(brw, prim[i].mode); - -      if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { -	 first_time = GL_FALSE; - -	 brw_validate_state(brw); - -	 /* Various fallback checks:  */ -	 if (brw->intel.Fallback) -	    goto out; +   hw_prim = brw_set_prim(brw, prim[i].mode); -	 /* Check that we can fit our state in with our existing batchbuffer, or -	  * flush otherwise. -	  */ -	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, -					     brw->state.validated_bo_count)) { -	    static GLboolean warned; -	    intel_batchbuffer_flush(intel->batch); +   brw_validate_state(brw); -	    /* Validate the state after we flushed the batch (which would have -	     * changed the set of dirty state).  If we still fail to -	     * check_aperture, warn of what's happening, but attempt to continue -	     * on since it may succeed anyway, and the user would probably rather -	     * see a failure and a warning than a fallback. -	     */ -	    brw_validate_state(brw); -	    if (!warned && -		dri_bufmgr_check_aperture_space(brw->state.validated_bos, -						brw->state.validated_bo_count)) { -	       warn = GL_TRUE; -	       warned = GL_TRUE; -	    } -	 } - -	 brw_upload_state(brw); -      } - -      brw_emit_prim(brw, &prim[i], hw_prim); +   /* Check that we can fit our state in with our existing batchbuffer, or +    * flush otherwise. +    */ +   ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, +					 brw->state.validated_bo_count); +   if (ret) +      return ret; -      retval = GL_TRUE; -   } +   ret = brw_upload_state(brw); +   if (ret) +      return ret; +    +   ret = brw_emit_prim(brw, &prim[i], hw_prim); +   if (ret) +      return ret;     if (intel->always_flush_batch)        intel_batchbuffer_flush(intel->batch); - out: -   UNLOCK_HARDWARE(intel); - -   brw_state_cache_check_size(brw); -   if (warn) -      fprintf(stderr, "i965: Single primitive emit potentially exceeded " -	      "available aperture space\n"); - -   if (!retval) -      DBG("%s failed\n", __FUNCTION__); - -   return retval; +   return 0;  }  void brw_draw_prims( GLcontext *ctx, @@ -431,37 +250,26 @@ void brw_draw_prims( GLcontext *ctx,  		     GLuint min_index,  		     GLuint max_index )  { -   GLboolean retval; +   enum pipe_error ret;     if (!vbo_all_varyings_in_vbos(arrays)) {        if (!index_bounds_valid)  	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - -      /* Decide if we want to rebase.  If so we end up recursing once -       * only into this function. -       */ -      if (min_index != 0) { -	 vbo_rebase_prims(ctx, arrays, -			  prim, nr_prims, -			  ib, min_index, max_index, -			  brw_draw_prims ); -	 return; -      }     }     /* Make a first attempt at drawing:      */ -   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +   ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);     /* Otherwise, we really are out of memory.  Pass the drawing      * command to the software tnl module and which will in turn call      * swrast to do the drawing.      */ -   if (!retval) { -       _swsetup_Wakeup(ctx); -      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +   if (ret != 0) { +      intel_batchbuffer_flush(intel->batch); +      ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +      assert(ret == 0);     } -  }  void brw_draw_init( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index a3ff6c58d8..ad3ef6b7dd 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -25,13 +25,9 @@   *    **************************************************************************/ +#include "pipe/p_context.h" -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/state.h" -#include "main/api_validate.h" -#include "main/enums.h" +#include "util/u_upload_mgr.h"  #include "brw_draw.h"  #include "brw_defines.h" @@ -43,303 +39,157 @@  #include "intel_buffer_objects.h"  #include "intel_tex.h" -static GLuint double_types[5] = { -   0, -   BRW_SURFACEFORMAT_R64_FLOAT, -   BRW_SURFACEFORMAT_R64G64_FLOAT, -   BRW_SURFACEFORMAT_R64G64B64_FLOAT, -   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT -}; -static GLuint float_types[5] = { -   0, -   BRW_SURFACEFORMAT_R32_FLOAT, -   BRW_SURFACEFORMAT_R32G32_FLOAT, -   BRW_SURFACEFORMAT_R32G32B32_FLOAT, -   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT -}; -static GLuint uint_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R32_UNORM, -   BRW_SURFACEFORMAT_R32G32_UNORM, -   BRW_SURFACEFORMAT_R32G32B32_UNORM, -   BRW_SURFACEFORMAT_R32G32B32A32_UNORM -}; -static GLuint uint_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R32_USCALED, -   BRW_SURFACEFORMAT_R32G32_USCALED, -   BRW_SURFACEFORMAT_R32G32B32_USCALED, -   BRW_SURFACEFORMAT_R32G32B32A32_USCALED -}; +unsigned brw_translate_surface_format( unsigned id ) +{ +   switch (id) { +   case PIPE_FORMAT_R64_FLOAT: +      return BRW_SURFACEFORMAT_R64_FLOAT; +   case PIPE_FORMAT_R64G64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64_FLOAT; +   case PIPE_FORMAT_R64G64B64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64B64_FLOAT; +   case PIPE_FORMAT_R64G64B64A64_FLOAT: +      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; -static GLuint int_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R32_SNORM, -   BRW_SURFACEFORMAT_R32G32_SNORM, -   BRW_SURFACEFORMAT_R32G32B32_SNORM, -   BRW_SURFACEFORMAT_R32G32B32A32_SNORM -}; +   case PIPE_FORMAT_R32_FLOAT: +      return BRW_SURFACEFORMAT_R32_FLOAT; +   case PIPE_FORMAT_R32G32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32_FLOAT; +   case PIPE_FORMAT_R32G32B32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32B32_FLOAT; +   case PIPE_FORMAT_R32G32B32A32_FLOAT: +      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; -static GLuint int_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R32_SSCALED, -   BRW_SURFACEFORMAT_R32G32_SSCALED, -   BRW_SURFACEFORMAT_R32G32B32_SSCALED, -   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED -}; +   case PIPE_FORMAT_R32_UNORM: +      return BRW_SURFACEFORMAT_R32_UNORM; +   case PIPE_FORMAT_R32G32_UNORM: +      return BRW_SURFACEFORMAT_R32G32_UNORM; +   case PIPE_FORMAT_R32G32B32_UNORM: +      return BRW_SURFACEFORMAT_R32G32B32_UNORM; +   case PIPE_FORMAT_R32G32B32A32_UNORM: +      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; -static GLuint ushort_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R16_UNORM, -   BRW_SURFACEFORMAT_R16G16_UNORM, -   BRW_SURFACEFORMAT_R16G16B16_UNORM, -   BRW_SURFACEFORMAT_R16G16B16A16_UNORM -}; +   case PIPE_FORMAT_R32_USCALED: +      return BRW_SURFACEFORMAT_R32_USCALED; +   case PIPE_FORMAT_R32G32_USCALED: +      return BRW_SURFACEFORMAT_R32G32_USCALED; +   case PIPE_FORMAT_R32G32B32_USCALED: +      return BRW_SURFACEFORMAT_R32G32B32_USCALED; +   case PIPE_FORMAT_R32G32B32A32_USCALED: +      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; -static GLuint ushort_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R16_USCALED, -   BRW_SURFACEFORMAT_R16G16_USCALED, -   BRW_SURFACEFORMAT_R16G16B16_USCALED, -   BRW_SURFACEFORMAT_R16G16B16A16_USCALED -}; +   case PIPE_FORMAT_R32_SNORM: +      return BRW_SURFACEFORMAT_R32_SNORM; +   case PIPE_FORMAT_R32G32_SNORM: +      return BRW_SURFACEFORMAT_R32G32_SNORM; +   case PIPE_FORMAT_R32G32B32_SNORM: +      return BRW_SURFACEFORMAT_R32G32B32_SNORM; +   case PIPE_FORMAT_R32G32B32A32_SNORM: +      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; -static GLuint short_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R16_SNORM, -   BRW_SURFACEFORMAT_R16G16_SNORM, -   BRW_SURFACEFORMAT_R16G16B16_SNORM, -   BRW_SURFACEFORMAT_R16G16B16A16_SNORM -}; +   case PIPE_FORMAT_R32_SSCALED: +      return BRW_SURFACEFORMAT_R32_SSCALED; +   case PIPE_FORMAT_R32G32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32_SSCALED; +   case PIPE_FORMAT_R32G32B32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32B32_SSCALED; +   case PIPE_FORMAT_R32G32B32A32_SSCALED: +      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; -static GLuint short_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R16_SSCALED, -   BRW_SURFACEFORMAT_R16G16_SSCALED, -   BRW_SURFACEFORMAT_R16G16B16_SSCALED, -   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED -}; +   case PIPE_FORMAT_R16_UNORM: +      return BRW_SURFACEFORMAT_R16_UNORM; +   case PIPE_FORMAT_R16G16_UNORM: +      return BRW_SURFACEFORMAT_R16G16_UNORM; +   case PIPE_FORMAT_R16G16B16_UNORM: +      return BRW_SURFACEFORMAT_R16G16B16_UNORM; +   case PIPE_FORMAT_R16G16B16A16_UNORM: +      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; -static GLuint ubyte_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R8_UNORM, -   BRW_SURFACEFORMAT_R8G8_UNORM, -   BRW_SURFACEFORMAT_R8G8B8_UNORM, -   BRW_SURFACEFORMAT_R8G8B8A8_UNORM -}; +   case PIPE_FORMAT_R16_USCALED: +      return BRW_SURFACEFORMAT_R16_USCALED; +   case PIPE_FORMAT_R16G16_USCALED: +      return BRW_SURFACEFORMAT_R16G16_USCALED; +   case PIPE_FORMAT_R16G16B16_USCALED: +      return BRW_SURFACEFORMAT_R16G16B16_USCALED; +   case PIPE_FORMAT_R16G16B16A16_USCALED: +      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; -static GLuint ubyte_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R8_USCALED, -   BRW_SURFACEFORMAT_R8G8_USCALED, -   BRW_SURFACEFORMAT_R8G8B8_USCALED, -   BRW_SURFACEFORMAT_R8G8B8A8_USCALED -}; +   case PIPE_FORMAT_R16_SNORM: +      return BRW_SURFACEFORMAT_R16_SNORM; +   case PIPE_FORMAT_R16G16_SNORM: +      return BRW_SURFACEFORMAT_R16G16_SNORM; +   case PIPE_FORMAT_R16G16B16_SNORM: +      return BRW_SURFACEFORMAT_R16G16B16_SNORM; +   case PIPE_FORMAT_R16G16B16A16_SNORM: +      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; -static GLuint byte_types_norm[5] = { -   0, -   BRW_SURFACEFORMAT_R8_SNORM, -   BRW_SURFACEFORMAT_R8G8_SNORM, -   BRW_SURFACEFORMAT_R8G8B8_SNORM, -   BRW_SURFACEFORMAT_R8G8B8A8_SNORM -}; +   case PIPE_FORMAT_R16_SSCALED: +      return BRW_SURFACEFORMAT_R16_SSCALED; +   case PIPE_FORMAT_R16G16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16_SSCALED; +   case PIPE_FORMAT_R16G16B16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16B16_SSCALED; +   case PIPE_FORMAT_R16G16B16A16_SSCALED: +      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; -static GLuint byte_types_scale[5] = { -   0, -   BRW_SURFACEFORMAT_R8_SSCALED, -   BRW_SURFACEFORMAT_R8G8_SSCALED, -   BRW_SURFACEFORMAT_R8G8B8_SSCALED, -   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED -}; +   case PIPE_FORMAT_R8_UNORM: +      return BRW_SURFACEFORMAT_R8_UNORM; +   case PIPE_FORMAT_R8G8_UNORM: +      return BRW_SURFACEFORMAT_R8G8_UNORM; +   case PIPE_FORMAT_R8G8B8_UNORM: +      return BRW_SURFACEFORMAT_R8G8B8_UNORM; +   case PIPE_FORMAT_R8G8B8A8_UNORM: +      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; +   case PIPE_FORMAT_R8_USCALED: +      return BRW_SURFACEFORMAT_R8_USCALED; +   case PIPE_FORMAT_R8G8_USCALED: +      return BRW_SURFACEFORMAT_R8G8_USCALED; +   case PIPE_FORMAT_R8G8B8_USCALED: +      return BRW_SURFACEFORMAT_R8G8B8_USCALED; +   case PIPE_FORMAT_R8G8B8A8_USCALED: +      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -static GLuint get_surface_type( GLenum type, GLuint size, -                                GLenum format, GLboolean normalized ) -{ -   if (INTEL_DEBUG & DEBUG_VERTS) -      _mesa_printf("type %s size %d normalized %d\n",  -		   _mesa_lookup_enum_by_nr(type), size, normalized); +   case PIPE_FORMAT_R8_SNORM: +      return BRW_SURFACEFORMAT_R8_SNORM; +   case PIPE_FORMAT_R8G8_SNORM: +      return BRW_SURFACEFORMAT_R8G8_SNORM; +   case PIPE_FORMAT_R8G8B8_SNORM: +      return BRW_SURFACEFORMAT_R8G8B8_SNORM; +   case PIPE_FORMAT_R8G8B8A8_SNORM: +      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; -   if (normalized) { -      switch (type) { -      case GL_DOUBLE: return double_types[size]; -      case GL_FLOAT: return float_types[size]; -      case GL_INT: return int_types_norm[size]; -      case GL_SHORT: return short_types_norm[size]; -      case GL_BYTE: return byte_types_norm[size]; -      case GL_UNSIGNED_INT: return uint_types_norm[size]; -      case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; -      case GL_UNSIGNED_BYTE: -         if (format == GL_BGRA) { -            /* See GL_EXT_vertex_array_bgra */ -            assert(size == 4); -            return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; -         } -         else { -            return ubyte_types_norm[size]; -         } -      default: assert(0); return 0; -      }       -   } -   else { -      assert(format == GL_RGBA); /* sanity check */ -      switch (type) { -      case GL_DOUBLE: return double_types[size]; -      case GL_FLOAT: return float_types[size]; -      case GL_INT: return int_types_scale[size]; -      case GL_SHORT: return short_types_scale[size]; -      case GL_BYTE: return byte_types_scale[size]; -      case GL_UNSIGNED_INT: return uint_types_scale[size]; -      case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; -      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; -      default: assert(0); return 0; -      }       -   } -} +   case PIPE_FORMAT_R8_SSCALED: +      return BRW_SURFACEFORMAT_R8_SSCALED; +   case PIPE_FORMAT_R8G8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8_SSCALED; +   case PIPE_FORMAT_R8G8B8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8B8_SSCALED; +   case PIPE_FORMAT_R8G8B8A8_SSCALED: +      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; - -static GLuint get_size( GLenum type ) -{ -   switch (type) { -   case GL_DOUBLE: return sizeof(GLdouble); -   case GL_FLOAT: return sizeof(GLfloat); -   case GL_INT: return sizeof(GLint); -   case GL_SHORT: return sizeof(GLshort); -   case GL_BYTE: return sizeof(GLbyte); -   case GL_UNSIGNED_INT: return sizeof(GLuint); -   case GL_UNSIGNED_SHORT: return sizeof(GLushort); -   case GL_UNSIGNED_BYTE: return sizeof(GLubyte); -   default: return 0; -   }       +   default: +      assert(0); +      return 0; +   }  } -static GLuint get_index_type(GLenum type)  +static unsigned get_index_type(int type)  {     switch (type) { -   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE; -   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; -   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD; +   case 1: return BRW_INDEX_BYTE; +   case 2: return BRW_INDEX_WORD; +   case 4: return BRW_INDEX_DWORD;     default: assert(0); return 0;     }  } -static void wrap_buffers( struct brw_context *brw, -			  GLuint size ) -{ -   if (size < BRW_UPLOAD_INIT_SIZE) -      size = BRW_UPLOAD_INIT_SIZE; - -   brw->vb.upload.offset = 0; - -   if (brw->vb.upload.bo != NULL) -      dri_bo_unreference(brw->vb.upload.bo); -   brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", -				    size, 1); - -   /* Set the internal VBO\ to no-backing-store.  We only use them as a -    * temporary within a brw_try_draw_prims while the lock is held. -    */ -   /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH -      FAKE TO PUSH THIS STUFF */ -//   if (!brw->intel.ttm) -//      dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); -} - -static void get_space( struct brw_context *brw, -		       GLuint size, -		       dri_bo **bo_return, -		       GLuint *offset_return ) -{ -   size = ALIGN(size, 64); -   if (brw->vb.upload.bo == NULL || -       brw->vb.upload.offset + size > brw->vb.upload.bo->size) { -      wrap_buffers(brw, size); -   } -   assert(*bo_return == NULL); -   dri_bo_reference(brw->vb.upload.bo); -   *bo_return = brw->vb.upload.bo; -   *offset_return = brw->vb.upload.offset; -   brw->vb.upload.offset += size; -} - -static void -copy_array_to_vbo_array( struct brw_context *brw, -			 struct brw_vertex_element *element, -			 GLuint dst_stride) -{ -   struct intel_context *intel = &brw->intel; -   GLuint size = element->count * dst_stride; - -   get_space(brw, size, &element->bo, &element->offset); - -   if (element->glarray->StrideB == 0) { -      assert(element->count == 1); -      element->stride = 0; -   } else { -      element->stride = dst_stride; -   } - -   if (dst_stride == element->glarray->StrideB) { -      if (intel->intelScreen->kernel_exec_fencing) { -	 drm_intel_gem_bo_map_gtt(element->bo); -	 memcpy((char *)element->bo->virtual + element->offset, -		element->glarray->Ptr, size); -	 drm_intel_gem_bo_unmap_gtt(element->bo); -      } else { -	 dri_bo_subdata(element->bo, -			element->offset, -			size, -			element->glarray->Ptr); -      } -   } else { -      char *dest; -      const unsigned char *src = element->glarray->Ptr; -      int i; - -      if (intel->intelScreen->kernel_exec_fencing) { -	 drm_intel_gem_bo_map_gtt(element->bo); -	 dest = element->bo->virtual; -	 dest += element->offset; - -	 for (i = 0; i < element->count; i++) { -	    memcpy(dest, src, dst_stride); -	    src += element->glarray->StrideB; -	    dest += dst_stride; -	 } - -	 drm_intel_gem_bo_unmap_gtt(element->bo); -      } else { -	 void *data; - -	 data = _mesa_malloc(dst_stride * element->count); -	 dest = data; -	 for (i = 0; i < element->count; i++) { -	    memcpy(dest, src, dst_stride); -	    src += element->glarray->StrideB; -	    dest += dst_stride; -	 } - -	 dri_bo_subdata(element->bo, -			element->offset, -			size, -			data); - -	 _mesa_free(data); -      } -   } -} - -static void brw_prepare_vertices(struct brw_context *brw) +static boolean brw_prepare_vertices(struct brw_context *brw)  {     GLcontext *ctx = &brw->intel.ctx;     struct intel_context *intel = intel_context(ctx); @@ -358,123 +208,38 @@ static void brw_prepare_vertices(struct brw_context *brw)     if (0)        _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); -   /* Accumulate the list of enabled arrays. */ -   brw->vb.nr_enabled = 0; -   while (vs_inputs) { -      GLuint i = _mesa_ffsll(vs_inputs) - 1; -      struct brw_vertex_element *input = &brw->vb.inputs[i]; - -      vs_inputs &= ~(1 << i); -      brw->vb.enabled[brw->vb.nr_enabled++] = input; -   } -   /* XXX: In the rare cases where this happens we fallback all -    * the way to software rasterization, although a tnl fallback -    * would be sufficient.  I don't know of *any* real world -    * cases with > 17 vertex attributes enabled, so it probably -    * isn't an issue at this point. -    */ -   if (brw->vb.nr_enabled >= BRW_VEP_MAX) { -      intel->Fallback = 1; -      return; -   }     for (i = 0; i < brw->vb.nr_enabled; i++) {        struct brw_vertex_element *input = brw->vb.enabled[i];        input->element_size = get_size(input->glarray->Type) * input->glarray->Size; -      if (_mesa_is_bufferobj(input->glarray->BufferObj)) { -	 struct intel_buffer_object *intel_buffer = -	    intel_buffer_object(input->glarray->BufferObj); - -	 /* Named buffer object: Just reference its contents directly. */ -	 dri_bo_unreference(input->bo); -	 input->bo = intel_bufferobj_buffer(intel, intel_buffer, -					    INTEL_READ); -	 dri_bo_reference(input->bo); -	 input->offset = (unsigned long)input->glarray->Ptr; -	 input->stride = input->glarray->StrideB; -	 input->count = input->glarray->_MaxElement; - -	 /* This is a common place to reach if the user mistakenly supplies -	  * a pointer in place of a VBO offset.  If we just let it go through, -	  * we may end up dereferencing a pointer beyond the bounds of the -	  * GTT.  We would hope that the VBO's max_index would save us, but -	  * Mesa appears to hand us min/max values not clipped to the -	  * array object's _MaxElement, and _MaxElement frequently appears -	  * to be wrong anyway. -	  * -	  * The VBO spec allows application termination in this case, and it's -	  * probably a service to the poor programmer to do so rather than -	  * trying to just not render. -	  */ -	 assert(input->offset < input->bo->size); -      } else { -	 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; -	 if (input->bo != NULL) { -	    /* Already-uploaded vertex data is present from a previous -	     * prepare_vertices, but we had to re-validate state due to -	     * check_aperture failing and a new batch being produced. -	     */ -	    continue; -	 } - -	 /* Queue the buffer object up to be uploaded in the next pass, -	  * when we've decided if we're doing interleaved or not. -	  */ -	 if (input->attrib == VERT_ATTRIB_POS) { -	    /* Position array not properly enabled: -	     */ -            if (input->glarray->StrideB == 0) { -               intel->Fallback = 1; -               return; -            } - -	    interleave = input->glarray->StrideB; -	    ptr = input->glarray->Ptr; -	 } -	 else if (interleave != input->glarray->StrideB || -		  (const unsigned char *)input->glarray->Ptr - ptr < 0 || -		  (const unsigned char *)input->glarray->Ptr - ptr > interleave) -	 { -	    interleave = 0; -	 } - -	 upload[nr_uploads++] = input; -	  -	 /* We rebase drawing to start at element zero only when -	  * varyings are not in vbos, which means we can end up -	  * uploading non-varying arrays (stride != 0) when min_index -	  * is zero.  This doesn't matter as the amount to upload is -	  * the same for these arrays whether the draw call is rebased -	  * or not - we just have to upload the one element. -	  */ -	 assert(min_index == 0 || input->glarray->StrideB == 0); +      if (brw_is_user_buffer(vb)) { +	 u_upload_buffer( brw->upload,  +			  min_index * vb->stride, +			  (max_index + 1 - min_index) * vb->stride, +			  &offset, +			  &buffer ); +      } +      else +      { +	 offset = 0; +	 buffer = vb->buffer; +	 count = stride == 0 ? 1 : max_index + 1 - min_index;        } -   } -   /* Handle any arrays to be uploaded. */ -   if (nr_uploads > 1 && interleave && interleave <= 256) { -      /* All uploads are interleaved, so upload the arrays together as -       * interleaved.  First, upload the contents and set up upload[0]. -       */ -      copy_array_to_vbo_array(brw, upload[0], interleave); +      /* Named buffer object: Just reference its contents directly. */ +      dri_bo_unreference(input->bo); +      input->bo = intel_bufferobj_buffer(intel, intel_buffer, +					 INTEL_READ); +      dri_bo_reference(input->bo); -      for (i = 1; i < nr_uploads; i++) { -	 /* Then, just point upload[i] at upload[0]'s buffer. */ -	 upload[i]->stride = interleave; -	 upload[i]->offset = upload[0]->offset + -	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr); -	 upload[i]->bo = upload[0]->bo; -	 dri_bo_reference(upload[i]->bo); -      } -   } -   else { -      /* Upload non-interleaved arrays */ -      for (i = 0; i < nr_uploads; i++) { -          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); -      } +      input->offset = (unsigned long)offset; +      input->stride = vb->stride; +      input->count = count; + +      assert(input->offset < input->bo->size);     }     brw_prepare_query_begin(brw); @@ -632,13 +397,8 @@ static void brw_prepare_indices(struct brw_context *brw)        /* Straight upload         */ -      if (intel->intelScreen->kernel_exec_fencing) { -	 drm_intel_gem_bo_map_gtt(bo); -	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); -	 drm_intel_gem_bo_unmap_gtt(bo); -      } else { -	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); -      } +      brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); +     } else {        offset = (GLuint) (unsigned long) index_buffer->ptr;        brw->ib.start_vertex_offset = 0; diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 48c2b9a41c..5ec0c585fe 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -58,7 +58,7 @@ static void compile_gs_prog( struct brw_context *brw,     /* Need to locate the two positions present in vertex + header.      * These are currently hardcoded:      */ -   c.nr_attrs = brw_count_bits(c.key.attrs); +   c.nr_attrs = util_count_bits(c.key.attrs);     if (BRW_IS_IGDNG(brw))         c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 0000000000..b351794dce --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,41 @@ + +   /* _NEW_COLOR */ +   if (key->logic_op != GL_COPY) { +      cc.cc2.logicop_enable = 1; +      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); +   } else if (key->color_blend) { +      GLenum eqRGB = key->blend_eq_rgb; +      GLenum eqA = key->blend_eq_a; +      GLenum srcRGB = key->blend_src_rgb; +      GLenum dstRGB = key->blend_dst_rgb; +      GLenum srcA = key->blend_src_a; +      GLenum dstA = key->blend_dst_a; + +      if (eqRGB == GL_MIN || eqRGB == GL_MAX) { +	 srcRGB = dstRGB = GL_ONE; +      } + +      if (eqA == GL_MIN || eqA == GL_MAX) { +	 srcA = dstA = GL_ONE; +      } + +      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); +      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); +      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + +      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); +      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); +      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + +      cc.cc3.blend_enable = 1; +      cc.cc3.ia_blend_enable = (srcA != srcRGB || +				dstA != dstRGB || +				eqA != eqRGB); +   } + +   if (key->dither) { +      cc.cc5.dither_enable = 1; +      cc.cc6.y_dither_offset = 0; +      cc.cc6.x_dither_offset = 0; +   } + diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c new file mode 100644 index 0000000000..34d6d4028a --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_debug.c @@ -0,0 +1,2 @@ +   if (INTEL_DEBUG & DEBUG_STATS) +      cc.cc5.statistics_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 0000000000..da29bc8bcb --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,52 @@ +   /* _NEW_STENCIL */ +   if (key->dsa.stencil[0].enable) { +      cc.cc0.stencil_enable = 1; +      cc.cc0.stencil_func = +	 intel_translate_compare_func(key->stencil_func[0]); +      cc.cc0.stencil_fail_op = +	 intel_translate_stencil_op(key->stencil_fail_op[0]); +      cc.cc0.stencil_pass_depth_fail_op = +	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); +      cc.cc0.stencil_pass_depth_pass_op = +	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); +      cc.cc1.stencil_ref = key->stencil_ref[0]; +      cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; +      cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + +      if (key->stencil_two_side) { +	 cc.cc0.bf_stencil_enable = 1; +	 cc.cc0.bf_stencil_func = +	    intel_translate_compare_func(key->stencil_func[1]); +	 cc.cc0.bf_stencil_fail_op = +	    intel_translate_stencil_op(key->stencil_fail_op[1]); +	 cc.cc0.bf_stencil_pass_depth_fail_op = +	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); +	 cc.cc0.bf_stencil_pass_depth_pass_op = +	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); +	 cc.cc1.bf_stencil_ref = key->stencil_ref[1]; +	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; +	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; +      } + +      /* Not really sure about this: +       */ +      if (key->stencil_write_mask[0] || +	  (key->stencil_two_side && key->stencil_write_mask[1])) +	 cc.cc0.stencil_write_enable = 1; +   } + + +   if (key->alpha_enabled) { +      cc.cc3.alpha_test = 1; +      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); +      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + +      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); +   } + +   /* _NEW_DEPTH */ +   if (key->depth_test) { +      cc.cc2.depth_test = 1; +      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); +      cc.cc2.depth_write_enable = key->depth_write; +   } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 0000000000..d4ae332f46 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,25 @@ + +/** + * called from intelDrawBuffer() + */ +static void brw_set_draw_region( struct intel_context *intel,  +                                 struct intel_region *color_regions[], +                                 struct intel_region *depth_region, +                                 GLuint num_color_regions) +{ +   struct brw_context *brw = brw_context(&intel->ctx); +   GLuint i; + +   /* release old color/depth regions */ +   if (brw->state.depth_region != depth_region) +      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; +   for (i = 0; i < brw->state.nr_color_regions; i++) +       intel_region_release(&brw->state.color_regions[i]); +   intel_region_release(&brw->state.depth_region); + +   /* reference new color/depth regions */ +   for (i = 0; i < num_color_regions; i++) +       intel_region_reference(&brw->state.color_regions[i], color_regions[i]); +   intel_region_reference(&brw->state.depth_region, depth_region); +   brw->state.nr_color_regions = num_color_regions; +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 0000000000..008f623151 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,64 @@ + +/** + * called from intel_batchbuffer_flush and children before sending a + * batchbuffer off. + */ +static void brw_finish_batch(struct intel_context *intel) +{ +   struct brw_context *brw = brw_context(&intel->ctx); +   brw_emit_query_end(brw); +} + + +/** + * called from intelFlushBatchLocked + */ +static void brw_new_batch( struct intel_context *intel ) +{ +   struct brw_context *brw = brw_context(&intel->ctx); + +   /* Check that we didn't just wrap our batchbuffer at a bad time. */ +   assert(!brw->no_batch_wrap); + +   brw->curbe.need_new_bo = GL_TRUE; + +   /* Mark all context state as needing to be re-emitted. +    * This is probably not as severe as on 915, since almost all of our state +    * is just in referenced buffers. +    */ +   brw->state.dirty.brw |= BRW_NEW_CONTEXT; + +   brw->state.dirty.mesa |= ~0; +   brw->state.dirty.brw |= ~0; +   brw->state.dirty.cache |= ~0; + +   /* Move to the end of the current upload buffer so that we'll force choosing +    * a new buffer next time. +    */ +   if (brw->vb.upload.bo != NULL) { +      dri_bo_unreference(brw->vb.upload.bo); +      brw->vb.upload.bo = NULL; +      brw->vb.upload.offset = 0; +   } +} + + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) +{ +   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; +} + +/* called from intelWaitForIdle() and intelFlush() + * + * For now, just flush everything.  Could be smarter later. + */ +static GLuint brw_flush_cmd( void ) +{ +   struct brw_mi_flush flush; +   flush.opcode = CMD_MI_FLUSH; +   flush.pad = 0; +   flush.flags = BRW_FLUSH_STATE_CACHE; +   return *(GLuint *)&flush; +} + + diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 0000000000..d199d0b81a --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,27 @@ +   /* _NEW_BUFFERS */ +   if (IS_965(intel->intelScreen->deviceID) && +       !IS_G4X(intel->intelScreen->deviceID)) { +      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { +	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; +	 struct intel_renderbuffer *irb = intel_renderbuffer(rb); + +	 /* The original gen4 hardware couldn't set up WM surfaces pointing +	  * at an offset within a tile, which can happen when rendering to +	  * anything but the base level of a texture or the +X face/0 depth. +	  * This was fixed with the 4 Series hardware. +	  * +	  * For these original chips, you would have to make the depth and +	  * color destination surfaces include information on the texture +	  * type, LOD, face, and various limits to use them as a destination. +	  * I would have done this, but there's also a nasty requirement that +	  * the depth and the color surfaces all be of the same LOD, which +	  * may be a worse requirement than this alignment.  (Also, we may +	  * want to just demote the texture to untiled, instead). +	  */ +	 if (irb->region &&  +	     irb->region->tiling != I915_TILING_NONE && +	     (irb->region->draw_offset & 4095)) { +	    DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); +	    return GL_TRUE; +	 } +      } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1c2c7777b..90513245ee 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -59,9 +59,9 @@ static void compile_sf_prog( struct brw_context *brw,     brw_init_compile(brw, &c.func);     c.key = *key; -   c.nr_attrs = brw_count_bits(c.key.attrs); +   c.nr_attrs = util_count_bits(c.key.attrs);     c.nr_attr_regs = (c.nr_attrs+1)/2; -   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); +   c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS);     c.nr_setup_regs = (c.nr_setup_attrs+1)/2;     c.prog_data.urb_read_length = c.nr_attr_regs; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index ca8f97f9f9..4cc427a935 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -150,7 +150,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )  {     struct brw_compile *p = &c->func;     struct brw_reg ip = brw_ip_reg(); -   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); +   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);     GLuint jmpi = 1;     if (!nr) @@ -188,7 +188,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )  {     struct brw_compile *p = &c->func;     struct brw_reg ip = brw_ip_reg(); -   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); +   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);     GLuint jmpi = 1;     if (!nr) diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b817b741e7..6801084616 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -270,7 +270,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)  /***********************************************************************   * Emit all state:   */ -void brw_validate_state( struct brw_context *brw ) +enum pipe_error brw_validate_state( struct brw_context *brw )  {     GLcontext *ctx = &brw->intel.ctx;     struct intel_context *intel = &brw->intel; @@ -278,10 +278,6 @@ void brw_validate_state( struct brw_context *brw )     GLuint i;     brw_clear_validated_bos(brw); - -   state->mesa |= brw->intel.NewGLState; -   brw->intel.NewGLState = 0; -     brw_add_validated_bo(brw, intel->batch->buf);     if (brw->emit_state_always) { @@ -290,36 +286,23 @@ void brw_validate_state( struct brw_context *brw )        state->cache |= ~0;     } -   if (brw->fragment_program != ctx->FragmentProgram._Current) { -      brw->fragment_program = ctx->FragmentProgram._Current; -      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; -   } - -   if (brw->vertex_program != ctx->VertexProgram._Current) { -      brw->vertex_program = ctx->VertexProgram._Current; -      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; -   } -     if (state->mesa == 0 &&         state->cache == 0 &&         state->brw == 0) -      return; +      return 0;     if (brw->state.dirty.brw & BRW_NEW_CONTEXT)        brw_clear_batch_cache(brw); -   brw->intel.Fallback = 0; -     /* do prepare stage for all atoms */     for (i = 0; i < Elements(atoms); i++) {        const struct brw_tracked_state *atom = atoms[i]; -      if (brw->intel.Fallback) -         break; -        if (check_state(state, &atom->dirty)) {           if (atom->prepare) { -            atom->prepare(brw); +            ret = atom->prepare(brw); +	    if (ret) +	       return ret;          }        }     } @@ -329,17 +312,18 @@ void brw_validate_state( struct brw_context *brw )      * If this fails, we can experience GPU lock-ups.      */     { -      const struct brw_fragment_program *fp; -      fp = brw_fragment_program_const(brw->fragment_program); +      const struct brw_fragment_program *fp = brw->fragment_program;        if (fp) { -         assert((fp->tex_units_used & ctx->Texture._EnabledUnits) -                == fp->tex_units_used); +         assert(fp->info.max_sampler <= brw->nr_samplers && +		fp->info.max_texture <= brw->nr_textures);        }     } + +   return 0;  } -void brw_upload_state(struct brw_context *brw) +enum pipe_error brw_upload_state(struct brw_context *brw)  {     struct brw_state_flags *state = &brw->state.dirty;     int i; @@ -356,7 +340,7 @@ void brw_upload_state(struct brw_context *brw)        _mesa_memset(&examined, 0, sizeof(examined));        prev = *state; -      for (i = 0; i < Elements(atoms); i++) {	  +      for (i = 0; i < Elements(atoms); i++) {  	 const struct brw_tracked_state *atom = atoms[i];  	 struct brw_state_flags generated; @@ -364,12 +348,11 @@ void brw_upload_state(struct brw_context *brw)  		atom->dirty.brw ||  		atom->dirty.cache); -	 if (brw->intel.Fallback) -	    break; -  	 if (check_state(state, &atom->dirty)) {  	    if (atom->emit) { -	       atom->emit( brw ); +	       ret = atom->emit( brw ); +	       if (ret) +		  return ret;  	    }  	 } @@ -388,12 +371,11 @@ void brw_upload_state(struct brw_context *brw)        for (i = 0; i < Elements(atoms); i++) {	   	 const struct brw_tracked_state *atom = atoms[i]; -	 if (brw->intel.Fallback) -	    break; -  	 if (check_state(state, &atom->dirty)) {  	    if (atom->emit) { -	       atom->emit( brw ); +	       ret = atom->emit( brw ); +	       if (ret) +		  return ret;  	    }  	 }        } @@ -407,10 +389,11 @@ void brw_upload_state(struct brw_context *brw)  	 brw_print_dirty_count(mesa_bits, state->mesa);  	 brw_print_dirty_count(brw_bits, state->brw);  	 brw_print_dirty_count(cache_bits, state->cache); -	 fprintf(stderr, "\n"); +	 debug_printf("\n");        }     } - -   if (!brw->intel.Fallback) -      memset(state, 0, sizeof(*state)); +    +   /* Clear dirty flags: +    */ +   memset(state, 0, sizeof(*state));  } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c new file mode 100644 index 0000000000..6684f442d5 --- /dev/null +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -0,0 +1,114 @@ + +/* XXX: could split the primitive list to fallback only on the + * non-conformant primitives. + */ +static GLboolean check_fallbacks( struct brw_context *brw, +				  const struct _mesa_prim *prim, +				  GLuint nr_prims ) +{ +   GLcontext *ctx = &brw->intel.ctx; +   GLuint i; + +   /* If we don't require strict OpenGL conformance, never  +    * use fallbacks.  If we're forcing fallbacks, always +    * use fallfacks. +    */ +   if (brw->intel.conformance_mode == 0) +      return GL_FALSE; + +   if (brw->intel.conformance_mode == 2) +      return GL_TRUE; + +   if (ctx->Polygon.SmoothFlag) { +      for (i = 0; i < nr_prims; i++) +	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES)  +	    return GL_TRUE; +   } + +   /* BRW hardware will do AA lines, but they are non-conformant it +    * seems.  TBD whether we keep this fallback: +    */ +   if (ctx->Line.SmoothFlag) { +      for (i = 0; i < nr_prims; i++) +	 if (reduced_prim[prim[i].mode] == GL_LINES)  +	    return GL_TRUE; +   } + +   /* Stipple -- these fallbacks could be resolved with a little +    * bit of work? +    */ +   if (ctx->Line.StippleFlag) { +      for (i = 0; i < nr_prims; i++) { +	 /* GS doesn't get enough information to know when to reset +	  * the stipple counter?!? +	  */ +	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)  +	    return GL_TRUE; +	     +	 if (prim[i].mode == GL_POLYGON && +	     (ctx->Polygon.FrontMode == GL_LINE || +	      ctx->Polygon.BackMode == GL_LINE)) +	    return GL_TRUE; +      } +   } + +   if (ctx->Point.SmoothFlag) { +      for (i = 0; i < nr_prims; i++) +	 if (prim[i].mode == GL_POINTS)  +	    return GL_TRUE; +   } + +   /* BRW hardware doesn't handle GL_CLAMP texturing correctly; +    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP +    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and +    * we want strict conformance, force the fallback. +    * Right now, we only do this for 2D textures. +    */ +   { +      int u; +      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { +         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; +         if (texUnit->Enabled) { +            if (texUnit->Enabled & TEXTURE_1D_BIT) { +               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { +                   return GL_TRUE; +               } +            } +            if (texUnit->Enabled & TEXTURE_2D_BIT) { +               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || +                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { +                   return GL_TRUE; +               } +            } +            if (texUnit->Enabled & TEXTURE_3D_BIT) { +               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || +                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || +                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { +                   return GL_TRUE; +               } +            } +         } +      } +   } + +   /* Exceeding hw limits on number of VS inputs? +    */ +   if (brw->nr_ve == 0 || +       brw->nr_ve >= BRW_VEP_MAX) { +      return TRUE; +   } + +   /* Position array with zero stride? +    */ +   if (brw->vs[brw->ve[0]]->stride == 0) +      return TRUE; + + +       +   /* Nothing stopping us from the fast path now */ +   return GL_FALSE; +} + + + + diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h new file mode 100644 index 0000000000..32b62848da --- /dev/null +++ b/src/gallium/drivers/i965/brw_types.h @@ -0,0 +1,11 @@ +#ifndef BRW_TYPES_H +#define BRW_TYPES_H + +typedef GLuint uint32_t; +typedef GLubyte uint8_t; +typedef GLushort uint16_t; +/* no GLenum, translate all away */ + +typedef GLboolean uint8_t; + +#endif diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index ce21aa4869..17f671a8fa 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -35,14 +35,6 @@  #include "brw_util.h"  #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) -{ -   GLuint i; -   for (i = 0; val ; val >>= 1) -      if (val & 1) -	 i++; -   return i; -}  GLuint brw_translate_blend_equation( GLenum mode ) diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index f0c79efbd9..53a5560105 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -61,9 +61,7 @@ static void do_vs_prog( struct brw_context *brw,     }     if (0) -      _mesa_print_program(&c.vp->program.Base); - - +      tgsi_dump(&c.vp->tokens, 0);     /* Emit GEN4 code.      */ @@ -96,9 +94,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)      * the inputs it asks for, whether they are varying or not.      */     key.program_string_id = vp->id; -   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); -   key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || -			ctx->Polygon.BackMode != GL_FILL); +   key.nr_userclip = brw->nr_userclip; +   key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL || +			brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL);     /* Make an early check for the key.      */ @@ -116,7 +114,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)   */  const struct brw_tracked_state brw_vs_prog = {     .dirty = { -      .mesa  = _NEW_TRANSFORM | _NEW_POLYGON, +      .mesa  = PIPE_NEW_UCP | PIPE_NEW_RAST,        .brw   = BRW_NEW_VERTEX_PROGRAM,        .cache = 0     }, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 1638ef8111..7f20c4baca 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -33,7 +33,7 @@  #include "main/macros.h"  #include "shader/program.h"  #include "shader/prog_parameter.h" -#include "shader/prog_print.h" +#include "pipe/p_shader_tokens.h"  #include "brw_context.h"  #include "brw_vs.h" @@ -129,6 +129,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )  	 reg++;        }     } +     /* If there are no inputs, we'll still be reading one attribute's worth      * because it's required -- see urb_read_length setting.      */ @@ -226,6 +227,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )      * vertex urb, so is half the amount:      */     c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; +     /* Setting this field to 0 leads to undefined behavior according to the      * the VS_STATE docs.  Our VUEs will always have at least one attribute      * sitting in them, even if it's padding. @@ -960,9 +962,6 @@ static void emit_arl( struct brw_vs_compile *c,  /**   * Return the brw reg for the given instruction's src argument. - * Will return mangled results for SWZ op.  The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account.   */  static struct brw_reg get_arg( struct brw_vs_compile *c,                                 const struct prog_instruction *inst, @@ -1024,74 +1023,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,  } -static void emit_swz( struct brw_vs_compile *c,  -		      struct brw_reg dst, -                      const struct prog_instruction *inst) -{ -   const GLuint argIndex = 0; -   const struct prog_src_register src = inst->SrcReg[argIndex]; -   struct brw_compile *p = &c->func; -   GLuint zeros_mask = 0; -   GLuint ones_mask = 0; -   GLuint src_mask = 0; -   GLubyte src_swz[4]; -   GLboolean need_tmp = (src.Negate && -			 dst.file != BRW_GENERAL_REGISTER_FILE); -   struct brw_reg tmp = dst; -   GLuint i; - -   if (need_tmp) -      tmp = get_tmp(c); - -   for (i = 0; i < 4; i++) { -      if (dst.dw1.bits.writemask & (1<<i)) { -	 GLubyte s = GET_SWZ(src.Swizzle, i); -	 switch (s) { -	 case SWIZZLE_X: -	 case SWIZZLE_Y: -	 case SWIZZLE_Z: -	 case SWIZZLE_W: -	    src_mask |= 1<<i; -	    src_swz[i] = s; -	    break; -	 case SWIZZLE_ZERO: -	    zeros_mask |= 1<<i; -	    break; -	 case SWIZZLE_ONE: -	    ones_mask |= 1<<i; -	    break; -	 } -      } -   } -    -   /* Do src first, in case dst aliases src: -    */ -   if (src_mask) { -      struct brw_reg arg0; - -      arg0 = get_src_reg(c, inst, argIndex); - -      arg0 = brw_swizzle(arg0,  -			 src_swz[0], src_swz[1],  -			 src_swz[2], src_swz[3]); - -      brw_MOV(p, brw_writemask(tmp, src_mask), arg0); -   }  -    -   if (zeros_mask)  -      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); - -   if (ones_mask)  -      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - -   if (src.Negate) -      brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); -    -   if (need_tmp) { -      brw_MOV(p, dst, tmp); -      release_tmp(c, tmp); -   } -}  /** @@ -1332,20 +1263,6 @@ void brw_vs_emit(struct brw_vs_compile *c )     brw_set_compression_control(p, BRW_COMPRESSION_NONE);     brw_set_access_mode(p, BRW_ALIGN_16); -   /* Message registers can't be read, so copy the output into GRF register -      if they are used in source registers */ -   for (insn = 0; insn < nr_insns; insn++) { -       GLuint i; -       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; -       for (i = 0; i < 3; i++) { -	   struct prog_src_register *src = &inst->SrcReg[i]; -	   GLuint index = src->Index; -	   GLuint file = src->File;	 -	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) -	       c->output_regs[index].used_in_src = GL_TRUE; -       } -   } -     /* Static register allocation      */     brw_vs_alloc_regs(c); @@ -1362,18 +1279,14 @@ void brw_vs_emit(struct brw_vs_compile *c )        _mesa_print_instruction(inst);  #endif -      /* Get argument regs.  SWZ is special and does this itself. +      /* Get argument regs.         */ -      if (inst->Opcode != OPCODE_SWZ) -	  for (i = 0; i < 3; i++) { -	      const struct prog_src_register *src = &inst->SrcReg[i]; -	      index = src->Index; -	      file = src->File;	 -	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) -		  args[i] = c->output_regs[index].reg; -	      else -                  args[i] = get_arg(c, inst, i); -	  } +      for (i = 0; i < 3; i++) { +	 const struct prog_src_register *src = &inst->SrcReg[i]; +	 index = src->Index; +	 file = src->File;	 +	 args[i] = get_arg(c, inst, i); +      }        /* Get dest regs.  Note that it is possible for a reg to be both         * dst and arg, given the static allocation of registers.  So @@ -1381,10 +1294,7 @@ void brw_vs_emit(struct brw_vs_compile *c )         */         index = inst->DstReg.Index;        file = inst->DstReg.File; -      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) -	  dst = c->output_regs[index].reg; -      else -	  dst = get_dst(c, inst->DstReg); +      dst = get_dst(c, inst->DstReg);        if (inst->SaturateMode != SATURATE_OFF) {  	 _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", @@ -1392,151 +1302,144 @@ void brw_vs_emit(struct brw_vs_compile *c )        }        switch (inst->Opcode) { -      case OPCODE_ABS: +      case TGSI_OPCODE_ABS:  	 brw_MOV(p, dst, brw_abs(args[0]));  	 break; -      case OPCODE_ADD: +      case TGSI_OPCODE_ADD:  	 brw_ADD(p, dst, args[0], args[1]);  	 break; -      case OPCODE_COS: +      case TGSI_OPCODE_COS:  	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; -      case OPCODE_DP3: +      case TGSI_OPCODE_DP3:  	 brw_DP3(p, dst, args[0], args[1]);  	 break; -      case OPCODE_DP4: +      case TGSI_OPCODE_DP4:  	 brw_DP4(p, dst, args[0], args[1]);  	 break; -      case OPCODE_DPH: +      case TGSI_OPCODE_DPH:  	 brw_DPH(p, dst, args[0], args[1]);  	 break; -      case OPCODE_NRM3: +      case TGSI_OPCODE_NRM3:  	 emit_nrm(c, dst, args[0], 3);  	 break; -      case OPCODE_NRM4: +      case TGSI_OPCODE_NRM4:  	 emit_nrm(c, dst, args[0], 4);  	 break; -      case OPCODE_DST: +      case TGSI_OPCODE_DST:  	 unalias2(c, dst, args[0], args[1], emit_dst_noalias);   	 break; -      case OPCODE_EXP: +      case TGSI_OPCODE_EXP:  	 unalias1(c, dst, args[0], emit_exp_noalias);  	 break; -      case OPCODE_EX2: +      case TGSI_OPCODE_EX2:  	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; -      case OPCODE_ARL: +      case TGSI_OPCODE_ARL:  	 emit_arl(c, dst, args[0]);  	 break; -      case OPCODE_FLR: +      case TGSI_OPCODE_FLR:  	 brw_RNDD(p, dst, args[0]);  	 break; -      case OPCODE_FRC: +      case TGSI_OPCODE_FRC:  	 brw_FRC(p, dst, args[0]);  	 break; -      case OPCODE_LOG: +      case TGSI_OPCODE_LOG:  	 unalias1(c, dst, args[0], emit_log_noalias);  	 break; -      case OPCODE_LG2: +      case TGSI_OPCODE_LG2:  	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; -      case OPCODE_LIT: +      case TGSI_OPCODE_LIT:  	 unalias1(c, dst, args[0], emit_lit_noalias);  	 break; -      case OPCODE_LRP: +      case TGSI_OPCODE_LRP:  	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);  	 break; -      case OPCODE_MAD: +      case TGSI_OPCODE_MAD:  	 brw_MOV(p, brw_acc_reg(), args[2]);  	 brw_MAC(p, dst, args[0], args[1]);  	 break; -      case OPCODE_MAX: +      case TGSI_OPCODE_MAX:  	 emit_max(p, dst, args[0], args[1]);  	 break; -      case OPCODE_MIN: +      case TGSI_OPCODE_MIN:  	 emit_min(p, dst, args[0], args[1]);  	 break; -      case OPCODE_MOV: +      case TGSI_OPCODE_MOV:  	 brw_MOV(p, dst, args[0]);  	 break; -      case OPCODE_MUL: +      case TGSI_OPCODE_MUL:  	 brw_MUL(p, dst, args[0], args[1]);  	 break; -      case OPCODE_POW: +      case TGSI_OPCODE_POW:  	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);   	 break; -      case OPCODE_RCP: +      case TGSI_OPCODE_RCP:  	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; -      case OPCODE_RSQ: +      case TGSI_OPCODE_RSQ:  	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; - -      case OPCODE_SEQ: +      case TGSI_OPCODE_SEQ:           emit_seq(p, dst, args[0], args[1]);           break; -      case OPCODE_SIN: +      case TGSI_OPCODE_SIN:  	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);  	 break; -      case OPCODE_SNE: +      case TGSI_OPCODE_SNE:           emit_sne(p, dst, args[0], args[1]);           break; -      case OPCODE_SGE: +      case TGSI_OPCODE_SGE:  	 emit_sge(p, dst, args[0], args[1]);  	 break; -      case OPCODE_SGT: +      case TGSI_OPCODE_SGT:           emit_sgt(p, dst, args[0], args[1]);           break; -      case OPCODE_SLT: +      case TGSI_OPCODE_SLT:  	 emit_slt(p, dst, args[0], args[1]);  	 break; -      case OPCODE_SLE: +      case TGSI_OPCODE_SLE:           emit_sle(p, dst, args[0], args[1]);           break; -      case OPCODE_SUB: +      case TGSI_OPCODE_SUB:  	 brw_ADD(p, dst, args[0], negate(args[1]));  	 break; -      case OPCODE_SWZ: -	 /* The args[0] value can't be used here as it won't have -	  * correctly encoded the full swizzle: -	  */ -	 emit_swz(c, dst, inst); -	 break; -      case OPCODE_TRUNC: +      case TGSI_OPCODE_TRUNC:           /* round toward zero */  	 brw_RNDZ(p, dst, args[0]);  	 break; -      case OPCODE_XPD: +      case TGSI_OPCODE_XPD:  	 emit_xpd(p, dst, args[0], args[1]);  	 break; -      case OPCODE_IF: +      case TGSI_OPCODE_IF:  	 assert(if_depth < MAX_IF_DEPTH);  	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);  	 /* Note that brw_IF smashes the predicate_control field. */  	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);  	 if_depth++;  	 break; -      case OPCODE_ELSE: +      case TGSI_OPCODE_ELSE:  	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);  	 break; -      case OPCODE_ENDIF: +      case TGSI_OPCODE_ENDIF:           assert(if_depth > 0);  	 brw_ENDIF(p, if_inst[--if_depth]);  	 break;			 -      case OPCODE_BGNLOOP: +      case TGSI_OPCODE_BGNLOOP:           loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);           break; -      case OPCODE_BRK: +      case TGSI_OPCODE_BRK:  	 brw_set_predicate_control(p, get_predicate(inst));           brw_BREAK(p);  	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);           break; -      case OPCODE_CONT: +      case TGSI_OPCODE_CONT:  	 brw_set_predicate_control(p, get_predicate(inst));           brw_CONT(p);           brw_set_predicate_control(p, BRW_PREDICATE_NONE);           break; -      case OPCODE_ENDLOOP:  +      case TGSI_OPCODE_ENDLOOP:            {              struct brw_instruction *inst0, *inst1;  	    GLuint br = 1; @@ -1550,23 +1453,23 @@ void brw_vs_emit(struct brw_vs_compile *c )              /* patch all the BREAK/CONT instructions from last BEGINLOOP */              while (inst0 > loop_inst[loop_depth]) {                 inst0--; -               if (inst0->header.opcode == BRW_OPCODE_BREAK) { +               if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) {                    inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);                    inst0->bits3.if_else.pop_count = 0;                 } -               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { +               else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) {                    inst0->bits3.if_else.jump_count = br * (inst1 - inst0);                    inst0->bits3.if_else.pop_count = 0;                 }              }           }           break; -      case OPCODE_BRA: +      case TGSI_OPCODE_BRA:  	 brw_set_predicate_control(p, get_predicate(inst));           brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));  	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);           break; -      case OPCODE_CAL: +      case TGSI_OPCODE_CAL:  	 brw_set_access_mode(p, BRW_ALIGN_1);  	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));  	 brw_set_access_mode(p, BRW_ALIGN_16); @@ -1575,27 +1478,27 @@ void brw_vs_emit(struct brw_vs_compile *c )           brw_save_call(p, inst->Comment, p->nr_insn);  	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));           break; -      case OPCODE_RET: +      case TGSI_OPCODE_RET:  	 brw_ADD(p, get_addr_reg(stack_index),  			 get_addr_reg(stack_index), brw_imm_d(-4));  	 brw_set_access_mode(p, BRW_ALIGN_1);           brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));  	 brw_set_access_mode(p, BRW_ALIGN_16);  	 break; -      case OPCODE_END:	 +      case TGSI_OPCODE_END:	           end_offset = p->nr_insn;           /* this instruction will get patched later to jump past subroutine            * code, etc.            */           brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));           break; -      case OPCODE_PRINT: +      case TGSI_OPCODE_PRINT:           /* no-op */           break; -      case OPCODE_BGNSUB: +      case TGSI_OPCODE_BGNSUB:           brw_save_label(p, inst->Comment, p->nr_insn);           break; -      case OPCODE_ENDSUB: +      case TGSI_OPCODE_ENDSUB:           /* no-op */           break;        default: @@ -1618,33 +1521,6 @@ void brw_vs_emit(struct brw_vs_compile *c )  	 hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;        } -      if ((inst->DstReg.File == PROGRAM_OUTPUT) -          && (inst->DstReg.Index != VERT_RESULT_HPOS) -          && c->output_regs[inst->DstReg.Index].used_in_src) { -         brw_MOV(p, get_dst(c, inst->DstReg), dst); -      } - -      /* Result color clamping. -       * -       * When destination register is an output register and -       * it's primary/secondary front/back color, we have to clamp -       * the result to [0,1]. This is done by enabling the -       * saturation bit for the last instruction. -       * -       * We don't use brw_set_saturate() as it modifies -       * p->current->header.saturate, which affects all the subsequent -       * instructions. Instead, we directly modify the header -       * of the last (already stored) instruction. -       */ -      if (inst->DstReg.File == PROGRAM_OUTPUT) { -         if ((inst->DstReg.Index == VERT_RESULT_COL0) -             || (inst->DstReg.Index == VERT_RESULT_COL1) -             || (inst->DstReg.Index == VERT_RESULT_BFC0) -             || (inst->DstReg.Index == VERT_RESULT_BFC1)) { -            p->store[p->nr_insn-1].header.saturate = 1; -         } -      } -        release_tmps(c);     } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 2292de94c4..20d31880b4 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -269,61 +269,46 @@ static void brw_wm_populate_key( struct brw_context *brw,  		    uses_depth,  		    key); +   /* Revisit this, figure out if it's really useful, and either push +    * it into the state tracker so that everyone benefits (use to +    * create fs varients with TEX rather than TXP), or discard. +    */ +   key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ -   /* BRW_NEW_WM_INPUT_DIMENSIONS */ -   key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; - -   /* _NEW_LIGHT */ -   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); +   /* PIPE_NEW_RAST */ +   key->flat_shade = brw->rast.flat_shade; -   /* _NEW_HINT */ -   key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); +   /* This can be determined by looking at the INTERP mode each input decl. +    */ +   key->linear_color = 0;     /* _NEW_TEXTURE */     for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { -      const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - -      if (unit->_ReallyEnabled) { -         const struct gl_texture_object *t = unit->_Current; -         const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; +      if (i < brw->nr_textures) { +	 const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; +	 const struct gl_texture_object *t = unit->_Current; +	 const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; +	   	 if (img->InternalFormat == GL_YCBCR_MESA) {  	    key->yuvtex_mask |= 1 << i;  	    if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) -		key->yuvtex_swap_mask |= 1 << i; +	       key->yuvtex_swap_mask |= 1 << i;  	 } -         key->tex_swizzles[i] = t->_Swizzle; +	 key->tex_swizzles[i] = t->_Swizzle; +	  +	 if (0) +	    key->shadowtex_mask |= 1<<i;        }        else {           key->tex_swizzles[i] = SWIZZLE_NOOP;        }     } -   /* Shadow */ -   key->shadowtex_mask = fp->program.Base.ShadowSamplers; -   /* _NEW_BUFFERS */ -   /* -    * Include the draw buffer origin and height so that we can calculate -    * fragment position values relative to the bottom left of the drawable, -    * from the incoming screen origin relative position we get as part of our -    * payload. -    * -    * We could avoid recompiling by including this as a constant referenced by -    * our program, but if we were to do that it would also be nice to handle -    * getting that constant updated at batchbuffer submit time (when we -    * hold the lock and know where the buffer really is) rather than at emit -    * time when we don't hold the lock and are just guessing.  We could also -    * just avoid using this as key data if the program doesn't use -    * fragment.position. -    * -    * This pretty much becomes moot with DRI2 and redirected buffers anyway, -    * as our origins will always be zero then. -    */ +   /* _NEW_FRAMEBUFFER */     if (brw->intel.driDrawable != NULL) { -      key->origin_x = brw->intel.driDrawable->x; -      key->origin_y = brw->intel.driDrawable->y; -      key->drawable_height = brw->intel.driDrawable->h; +      key->drawable_height = brw->fb.cbufs[0].height;     }     /* CACHE_NEW_VS_PROG */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 872b1f3ecf..756a680150 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,6 @@ struct brw_wm_prog_key {     GLuint tex_swizzles[BRW_MAX_TEX_UNIT];     GLuint program_string_id:32; -   GLuint origin_x, origin_y;     GLuint drawable_height;     GLuint vp_outputs_written;  }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index bf80a2942a..9c47c46a3d 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -125,23 +125,21 @@ static void emit_wpos_xy(struct brw_wm_compile *c,  {     struct brw_compile *p = &c->func; -   /* Calculate the pixel offset from window bottom left into destination -    * X and Y channels. -    */     if (mask & WRITEMASK_X) { -      /* X' = X - origin */ -      brw_ADD(p, +      /* X' = X */ +      brw_MOV(p,  	      dst[0], -	      retype(arg0[0], BRW_REGISTER_TYPE_W), -	      brw_imm_d(0 - c->key.origin_x)); +	      retype(arg0[0], BRW_REGISTER_TYPE_W));     } +   /* XXX: is this needed any more, or is this a NOOP? +    */     if (mask & WRITEMASK_Y) { -      /* Y' = height - (Y - origin_y) = height + origin_y - Y */ +      /* Y' = height - 1 - Y */        brw_ADD(p,  	      dst[1],  	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), -	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); +	      brw_imm_d(c->key.drawable_height - 1));     }  } @@ -1376,7 +1374,6 @@ void brw_wm_emit( struct brw_wm_compile *c )  	 break;        case OPCODE_MOV: -      case OPCODE_SWZ:  	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);  	 break; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 4e3edfbbff..5f47d86f71 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,25 +30,12 @@    */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_shader_constants.h" +  #include "brw_context.h"  #include "brw_wm.h"  #include "brw_util.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" - - -/** An invalid texture target */ -#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS - -/** An invalid texture unit */ -#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS  #define X    0  #define Y    1 @@ -68,11 +55,6 @@ static const char *wm_opcode_strings[] = {     "FRONTFACING",  }; -#if 0 -static const char *wm_file_strings[] = {    -   "PAYLOAD" -}; -#endif  /*********************************************************************** @@ -165,13 +147,13 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )     }     c->fp_temp |= 1<<(bit-1); -   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); +   return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));  }  static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )  { -   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); +   c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));  } @@ -192,58 +174,29 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c,     return inst;  } -static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, -				       GLuint op, -				       struct prog_dst_register dest, -				       GLuint saturate, -				       GLuint tex_src_unit, -				       GLuint tex_src_target, -				       GLuint tex_shadow, -				       struct prog_src_register src0, -				       struct prog_src_register src1, -				       struct prog_src_register src2 ) +static struct prog_instruction * emit_op(struct brw_wm_compile *c, +					 GLuint op, +					 struct prog_dst_register dest, +					 GLuint saturate, +					 struct prog_src_register src0, +					 struct prog_src_register src1, +					 struct prog_src_register src2 )  {     struct prog_instruction *inst = get_fp_inst(c); -   assert(tex_src_unit < BRW_MAX_TEX_UNIT || -          tex_src_unit == TEX_UNIT_NONE); -   assert(tex_src_target < NUM_TEXTURE_TARGETS || -          tex_src_target == TEX_TARGET_NONE); - -   /* update mask of which texture units are referenced by this program */ -   if (tex_src_unit != TEX_UNIT_NONE) -      c->fp->tex_units_used |= (1 << tex_src_unit); -     memset(inst, 0, sizeof(*inst));     inst->Opcode = op;     inst->DstReg = dest;     inst->SaturateMode = saturate;    -   inst->TexSrcUnit = tex_src_unit; -   inst->TexSrcTarget = tex_src_target; -   inst->TexShadow = tex_shadow;     inst->SrcReg[0] = src0;     inst->SrcReg[1] = src1;     inst->SrcReg[2] = src2;     return inst;  } -    - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, -				       GLuint op, -				       struct prog_dst_register dest, -				       GLuint saturate, -				       struct prog_src_register src0, -				       struct prog_src_register src1, -				       struct prog_src_register src2 ) -{ -   return emit_tex_op(c, op, dest, saturate, -                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */ -                      src0, src1, src2); -} -/* Many Mesa opcodes produce the same value across all the result channels. +/* Many opcodes produce the same value across all the result channels.   * We'd rather not have to support that splatting in the opcode implementations,   * and brw_wm_pass*.c wants to optimize them out by shuffling references around   * anyway.  We can easily get both by emitting the opcode to one channel, and @@ -267,7 +220,7 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,     other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);     if (other_channel_mask != 0) {        inst = emit_op(c, -		     OPCODE_MOV, +		     TGSI_OPCODE_MOV,  		     dst_mask(inst0->DstReg, other_channel_mask),  		     0,  		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), @@ -356,7 +309,9 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )  }  static void emit_interp( struct brw_wm_compile *c, -			 GLuint idx ) +			 GLuint semantic, +			 GLuint semantic_index, +			 GLuint interp_mode )  {     struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);     struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); @@ -366,7 +321,7 @@ static void emit_interp( struct brw_wm_compile *c,      * multiplied by 1/W in the SF program, and LINTERP on those      * which have not:      */ -   switch (idx) { +   switch (semantic) {     case FRAG_ATTRIB_WPOS:        /* Have to treat wpos.xy specially:         */ @@ -390,8 +345,8 @@ static void emit_interp( struct brw_wm_compile *c,  	      deltas,  	      src_undef());        break; -   case FRAG_ATTRIB_COL0: -   case FRAG_ATTRIB_COL1: + +   case TGSI_SEMANTIC_COLOR:        if (c->key.flat_shade) {  	 emit_op(c,  		 WM_CINTERP, @@ -402,25 +357,13 @@ static void emit_interp( struct brw_wm_compile *c,  		 src_undef());        }        else { -         if (c->key.linear_color) { -            emit_op(c, -                    WM_LINTERP, -                    dst, -                    0, -                    interp, -                    deltas, -                    src_undef()); -         } -         else { -            /* perspective-corrected color interpolation */ -            emit_op(c, -                    WM_PINTERP, -                    dst, -                    0, -                    interp, -                    deltas, -                    get_pixel_w(c)); -         } +	 emit_op(c, +		 translate_interp_mode(interp_mode), +		 dst, +		 0, +		 interp, +		 deltas, +		 src_undef());        }        break;     case FRAG_ATTRIB_FOGC: @@ -434,7 +377,7 @@ static void emit_interp( struct brw_wm_compile *c,  	      get_pixel_w(c));        emit_op(c, -	      OPCODE_MOV, +	      TGSI_OPCODE_MOV,  	      dst_mask(dst, WRITEMASK_YZW),  	      0,  	      src_swizzle(interp, @@ -468,7 +411,7 @@ static void emit_interp( struct brw_wm_compile *c,  	      get_pixel_w(c));        emit_op(c, -	      OPCODE_MOV, +	      TGSI_OPCODE_MOV,  	      dst_mask(dst, WRITEMASK_ZW),  	      0,  	      src_swizzle(interp, @@ -482,7 +425,7 @@ static void emit_interp( struct brw_wm_compile *c,     default:        emit_op(c, -	      WM_PINTERP, +	      translate_interp_mode(interp_mode),  	      dst,  	      0,  	      interp, @@ -490,8 +433,6 @@ static void emit_interp( struct brw_wm_compile *c,  	      get_pixel_w(c));        break;     } - -   c->fp_interp_emitted |= 1<<idx;  }  /*********************************************************************** @@ -581,7 +522,7 @@ static void precalc_dst( struct brw_wm_compile *c,        /* dst.y = mul src0.y, src1.y         */        emit_op(c, -	      OPCODE_MUL, +	      TGSI_OPCODE_MUL,  	      dst_mask(dst, WRITEMASK_Y),  	      inst->SaturateMode,  	      src0, @@ -596,7 +537,7 @@ static void precalc_dst( struct brw_wm_compile *c,        /* dst.xz = swz src0.1zzz         */        swz = emit_op(c, -		    OPCODE_SWZ, +		    TGSI_OPCODE_MOV,  		    dst_mask(dst, WRITEMASK_XZ),  		    inst->SaturateMode,  		    src_swizzle(src0, SWIZZLE_ONE, z, z, z), @@ -609,7 +550,7 @@ static void precalc_dst( struct brw_wm_compile *c,        /* dst.w = mov src1.w         */        emit_op(c, -	      OPCODE_MOV, +	      TGSI_OPCODE_MOV,  	      dst_mask(dst, WRITEMASK_W),  	      inst->SaturateMode,  	      src1, @@ -631,7 +572,7 @@ static void precalc_lit( struct brw_wm_compile *c,        /* dst.xw = swz src0.1111         */        swz = emit_op(c, -		    OPCODE_SWZ, +		    TGSI_OPCODE_MOV,  		    dst_mask(dst, WRITEMASK_XW),  		    0,  		    src_swizzle1(src0, SWIZZLE_ONE), @@ -643,7 +584,7 @@ static void precalc_lit( struct brw_wm_compile *c,     if (dst.WriteMask & WRITEMASK_YZ) {        emit_op(c, -	      OPCODE_LIT, +	      TGSI_OPCODE_LIT,  	      dst_mask(dst, WRITEMASK_YZ),  	      inst->SaturateMode,  	      src0, @@ -681,7 +622,7 @@ static void precalc_tex( struct brw_wm_compile *c,         coord = src_reg_from_dst(tmpcoord);         /* tmpcoord = src0 (i.e.: coord = src0) */ -       out = emit_op(c, OPCODE_MOV, +       out = emit_op(c, TGSI_OPCODE_MOV,                       tmpcoord,                       0,                       src0, @@ -691,7 +632,7 @@ static void precalc_tex( struct brw_wm_compile *c,         out->SrcReg[0].Abs = 1;         /* tmp0 = MAX(coord.X, coord.Y) */ -       emit_op(c, OPCODE_MAX, +       emit_op(c, TGSI_OPCODE_MAX,                 tmp0,                 0,                 src_swizzle1(coord, X), @@ -699,7 +640,7 @@ static void precalc_tex( struct brw_wm_compile *c,                 src_undef());         /* tmp1 = MAX(tmp0, coord.Z) */ -       emit_op(c, OPCODE_MAX, +       emit_op(c, TGSI_OPCODE_MAX,                 tmp1,                 0,                 tmp0src, @@ -707,7 +648,7 @@ static void precalc_tex( struct brw_wm_compile *c,                 src_undef());         /* tmp0 = 1 / tmp1 */ -       emit_op(c, OPCODE_RCP, +       emit_op(c, TGSI_OPCODE_RCP,                 dst_mask(tmp0, WRITEMASK_X),                 0,                 tmp1src, @@ -715,7 +656,7 @@ static void precalc_tex( struct brw_wm_compile *c,                 src_undef());         /* tmpCoord = src0 * tmp0 */ -       emit_op(c, OPCODE_MUL, +       emit_op(c, TGSI_OPCODE_MUL,                 tmpcoord,                 0,                 src0, @@ -738,7 +679,7 @@ static void precalc_tex( struct brw_wm_compile *c,        /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }         */        emit_op(c, -	      OPCODE_MUL, +	      TGSI_OPCODE_MUL,  	      tmpcoord,  	      0,  	      inst->SrcReg[0], @@ -785,7 +726,7 @@ static void precalc_tex( struct brw_wm_compile *c,        /* tmp     = TEX ...         */        emit_tex_op(c,  -                  OPCODE_TEX, +                  TGSI_OPCODE_TEX,                    tmp,                    inst->SaturateMode,                    unit, @@ -798,7 +739,7 @@ static void precalc_tex( struct brw_wm_compile *c,        /* tmp.xyz =  ADD TMP, C0         */        emit_op(c, -	      OPCODE_ADD, +	      TGSI_OPCODE_ADD,  	      dst_mask(tmp, WRITEMASK_XYZ),  	      0,  	      tmpsrc, @@ -809,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c,         */        emit_op(c, -	      OPCODE_MUL, +	      TGSI_OPCODE_MUL,  	      dst_mask(tmp, WRITEMASK_Y),  	      0,  	      tmpsrc, @@ -824,7 +765,7 @@ static void precalc_tex( struct brw_wm_compile *c,         */        emit_op(c, -	      OPCODE_MAD, +	      TGSI_OPCODE_MAD,  	      dst_mask(dst, WRITEMASK_XYZ),  	      0,  	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), @@ -834,7 +775,7 @@ static void precalc_tex( struct brw_wm_compile *c,        /*  RGB.y   = MAD YUV.z, C1.w, RGB.y         */        emit_op(c, -	      OPCODE_MAD, +	      TGSI_OPCODE_MAD,  	      dst_mask(dst, WRITEMASK_Y),  	      0,  	      src_swizzle1(tmpsrc, Z), @@ -846,7 +787,7 @@ static void precalc_tex( struct brw_wm_compile *c,     else {        /* ordinary RGBA tex instruction */        emit_tex_op(c,  -                  OPCODE_TEX, +                  TGSI_OPCODE_TEX,                    inst->DstReg,                    inst->SaturateMode,                    unit, @@ -861,7 +802,7 @@ static void precalc_tex( struct brw_wm_compile *c,     if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {        /* swizzle the result of the TEX instruction */        struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); -      emit_op(c, OPCODE_SWZ, +      emit_op(c, TGSI_OPCODE_MOV,                inst->DstReg,                SATURATE_OFF, /* saturate already done above */                src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), @@ -884,7 +825,7 @@ static GLboolean projtex( struct brw_wm_compile *c,     const struct prog_src_register src = inst->SrcReg[0];     GLboolean retVal; -   assert(inst->Opcode == OPCODE_TXP); +   assert(inst->Opcode == TGSI_OPCODE_TXP);     /* Only try to detect the simplest cases.  Could detect (later)      * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -921,7 +862,7 @@ static void precalc_txp( struct brw_wm_compile *c,        /* tmp0.w = RCP inst.arg[0][3]         */        emit_op(c, -	      OPCODE_RCP, +	      TGSI_OPCODE_RCP,  	      dst_mask(tmp, WRITEMASK_W),  	      0,  	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), @@ -931,7 +872,7 @@ static void precalc_txp( struct brw_wm_compile *c,        /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww         */        emit_op(c, -	      OPCODE_MUL, +	      TGSI_OPCODE_MUL,  	      dst_mask(tmp, WRITEMASK_XYZ),  	      0,  	      src0, @@ -1015,6 +956,7 @@ static void validate_src_regs( struct brw_wm_compile *c,  	 GLuint idx = inst->SrcReg[i].Index;  	 if (!(c->fp_interp_emitted & (1<<idx))) {  	    emit_interp(c, idx); +	    c->fp_interp_emitted |= 1<<idx;  	 }        }     } @@ -1094,71 +1036,64 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )         */        switch (inst->Opcode) { -      case OPCODE_SWZ:  +      case TGSI_OPCODE_ABS:  	 out = emit_insn(c, inst); -	 out->Opcode = OPCODE_MOV; -	 break; -	  -      case OPCODE_ABS: -	 out = emit_insn(c, inst); -	 out->Opcode = OPCODE_MOV; +	 out->Opcode = TGSI_OPCODE_MOV;  	 out->SrcReg[0].Negate = NEGATE_NONE;  	 out->SrcReg[0].Abs = 1;  	 break; -      case OPCODE_SUB:  +      case TGSI_OPCODE_SUB:   	 out = emit_insn(c, inst); -	 out->Opcode = OPCODE_ADD; +	 out->Opcode = TGSI_OPCODE_ADD;  	 out->SrcReg[1].Negate ^= NEGATE_XYZW;  	 break; -      case OPCODE_SCS:  +      case TGSI_OPCODE_SCS:   	 out = emit_insn(c, inst);  	 /* This should probably be done in the parser.   	  */  	 out->DstReg.WriteMask &= WRITEMASK_XY;  	 break; -      case OPCODE_DST: +      case TGSI_OPCODE_DST:  	 precalc_dst(c, inst);  	 break; -      case OPCODE_LIT: +      case TGSI_OPCODE_LIT:  	 precalc_lit(c, inst);  	 break; -      case OPCODE_TEX: +      case TGSI_OPCODE_TEX:  	 precalc_tex(c, inst);  	 break; -      case OPCODE_TXP: +      case TGSI_OPCODE_TXP:  	 precalc_txp(c, inst);  	 break; -      case OPCODE_TXB: +      case TGSI_OPCODE_TXB:  	 out = emit_insn(c, inst);  	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];           assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);  	 break; -      case OPCODE_XPD:  +      case TGSI_OPCODE_XPD:   	 out = emit_insn(c, inst);  	 /* This should probably be done in the parser.   	  */  	 out->DstReg.WriteMask &= WRITEMASK_XYZ;  	 break; -      case OPCODE_KIL:  +      case TGSI_OPCODE_KIL:   	 out = emit_insn(c, inst);  	 /* This should probably be done in the parser.   	  */  	 out->DstReg.WriteMask = 0;  	 break; -      case OPCODE_END: +      case TGSI_OPCODE_END:  	 emit_fb_write(c);  	 break; -      case OPCODE_PRINT: -	 break;        default:  	 if (brw_wm_is_scalar_result(inst->Opcode))  	    emit_scalar_insn(c, inst); diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c9fe1dd8ad..d836e2fb34 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -6,9 +6,6 @@  #include "brw_eu.h"  #include "brw_wm.h" -enum _subroutine { -    SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 -};  static struct brw_reg get_dst_reg(struct brw_wm_compile *c,                                    const struct prog_instruction *inst, @@ -32,10 +29,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)  	    case OPCODE_CAL:  	    case OPCODE_BRK:  	    case OPCODE_RET: -	    case OPCODE_NOISE1: -	    case OPCODE_NOISE2: -	    case OPCODE_NOISE3: -	    case OPCODE_NOISE4:  	    case OPCODE_BGNLOOP:  		return GL_TRUE;   	    default: @@ -1495,1036 +1488,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg )  		   0, 16, 2 );  } -/* One-, two- and three-dimensional Perlin noise, similar to the description -   in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ -static void noise1_sub( struct brw_wm_compile *c ) { -    struct brw_compile *p = &c->func; -    struct brw_reg param, -	x0, x1, /* gradients at each end */        -	t, tmp[ 2 ], /* float temporaries */ -	itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ -    int i; -    int mark = mark_tmps( c ); - -    x0 = alloc_tmp( c ); -    x1 = alloc_tmp( c ); -    t = alloc_tmp( c ); -    tmp[ 0 ] = alloc_tmp( c ); -    tmp[ 1 ] = alloc_tmp( c ); -    itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); -    itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); -    itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); -    itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); -    itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); -     -    param = lookup_tmp( c, mark - 2 ); - -    brw_set_access_mode( p, BRW_ALIGN_1 ); - -    brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - -    /* Arrange the two end coordinates into scalars (itmp0/itmp1) to -       be hashed.  Also compute the remainder (offset within the unit -       length), interleaved to reduce register dependency penalties. */ -    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); -    brw_FRC( p, param, param ); -    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); -    brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ -    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - -    /* We're now ready to perform the hashing.  The two hashes are -       interleaved for performance.  The hash function used is -       designed to rapidly achieve avalanche and require only 32x16 -       bit multiplication, and 16-bit swizzles (which we get for -       free).  We can't use immediate operands in the multiplies, -       because immediates are permitted only in src1 and the 16-bit -       factor is permitted only in src0. */ -    for( i = 0; i < 2; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); -    for( i = 0; i < 2; i++ ) -       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		high_words( itmp[ i ] ) ); -    for( i = 0; i < 2; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); -    for( i = 0; i < 2; i++ ) -       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		high_words( itmp[ i ] ) ); -    for( i = 0; i < 2; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); -    for( i = 0; i < 2; i++ ) -       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		high_words( itmp[ i ] ) ); - -    /* Now we want to initialise the two gradients based on the -       hashes.  Format conversion from signed integer to float leaves -       everything scaled too high by a factor of pow( 2, 31 ), but -       we correct for that right at the end. */ -    brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); -    brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); -    brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); - -    brw_MUL( p, x0, x0, param ); -    brw_MUL( p, x1, x1, t ); -     -    /* We interpolate between the gradients using the polynomial -       6t^5 - 15t^4 + 10t^3 (Perlin). */ -    brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); -    brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the -					   pipeline */ -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); -    brw_MUL( p, param, tmp[ 0 ], param ); -    brw_MUL( p, x1, x1, param ); -    brw_ADD( p, x0, x0, x1 );     -    /* scale by pow( 2, -30 ), to compensate for the format conversion -       above and an extra factor of 2 so that a single gradient covers -       the [-1,1] range */ -    brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); - -    release_tmps( c, mark ); -} - -static void emit_noise1( struct brw_wm_compile *c, -			 const struct prog_instruction *inst ) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg src, param, dst; -    GLuint mask = inst->DstReg.WriteMask; -    int i; -    int mark = mark_tmps( c ); - -    assert( mark == 0 ); -     -    src = get_src_reg( c, inst, 0, 0 ); - -    param = alloc_tmp( c ); - -    brw_MOV( p, param, src ); - -    invoke_subroutine( c, SUB_NOISE1, noise1_sub ); -     -    /* Fill in the result: */ -    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); -    for (i = 0 ; i < 4; i++) { -	if (mask & (1<<i)) { -	    dst = get_dst_reg(c, inst, i); -	    brw_MOV( p, dst, param ); -	} -    } -    if( inst->SaturateMode == SATURATE_ZERO_ONE ) -	brw_set_saturate( p, 0 ); -     -    release_tmps( c, mark ); -} -     -static void noise2_sub( struct brw_wm_compile *c ) { - -    struct brw_compile *p = &c->func; -    struct brw_reg param0, param1, -	x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */        -	t, tmp[ 4 ], /* float temporaries */ -	itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ -    int i; -    int mark = mark_tmps( c ); - -    x0y0 = alloc_tmp( c ); -    x0y1 = alloc_tmp( c ); -    x1y0 = alloc_tmp( c ); -    x1y1 = alloc_tmp( c ); -    t = alloc_tmp( c ); -    for( i = 0; i < 4; i++ ) { -	tmp[ i ] = alloc_tmp( c ); -	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); -    } -    itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); -    itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); -    itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); -     -    param0 = lookup_tmp( c, mark - 3 ); -    param1 = lookup_tmp( c, mark - 2 ); - -    brw_set_access_mode( p, BRW_ALIGN_1 ); -     -    /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to -       be hashed.  Also compute the remainders (offsets within the unit -       square), interleaved to reduce register dependency penalties. */ -    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); -    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); -    brw_FRC( p, param0, param0 ); -    brw_FRC( p, param1, param1 ); -    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ -    brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), -	     low_words( itmp[ 1 ] ) ); -    brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ -    brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ -    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); -    brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); -    brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); - -    /* We're now ready to perform the hashing.  The four hashes are -       interleaved for performance.  The hash function used is -       designed to rapidly achieve avalanche and require only 32x16 -       bit multiplication, and 16-bit swizzles (which we get for -       free).  We can't use immediate operands in the multiplies, -       because immediates are permitted only in src1 and the 16-bit -       factor is permitted only in src0. */ -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		 high_words( itmp[ i ] ) ); -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		 high_words( itmp[ i ] ) ); -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), -		 high_words( itmp[ i ] ) ); - -    /* Now we want to initialise the four gradients based on the -       hashes.  Format conversion from signed integer to float leaves -       everything scaled too high by a factor of pow( 2, 15 ), but -       we correct for that right at the end. */ -    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); -    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); -    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); -    brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); -    brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); -     -    brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); -     -    brw_MUL( p, x1y0, x1y0, t ); -    brw_MUL( p, x1y1, x1y1, t ); -    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); -    brw_MUL( p, x0y0, x0y0, param0 ); -    brw_MUL( p, x0y1, x0y1, param0 ); - -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); -    brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); -    brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); - -    brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); -     -    /* We interpolate between the gradients using the polynomial -       6t^5 - 15t^4 + 10t^3 (Perlin). */ -    brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); -    brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); -    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); -    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the -						 pipeline */ -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); -    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); -    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the -						 pipeline */ -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); -    brw_MUL( p, param0, tmp[ 0 ], param0 ); -    brw_MUL( p, param1, tmp[ 1 ], param1 ); -     -    /* Here we interpolate in the y dimension... */ -    brw_MUL( p, x0y1, x0y1, param1 ); -    brw_MUL( p, x1y1, x1y1, param1 ); -    brw_ADD( p, x0y0, x0y0, x0y1 ); -    brw_ADD( p, x1y0, x1y0, x1y1 ); - -    /* And now in x.  There are horrible register dependencies here, -       but we have nothing else to do. */ -    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); -    brw_MUL( p, x1y0, x1y0, param0 ); -    brw_ADD( p, x0y0, x0y0, x1y0 ); -     -    /* scale by pow( 2, -15 ), as described above */ -    brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); - -    release_tmps( c, mark ); -} - -static void emit_noise2( struct brw_wm_compile *c, -			 const struct prog_instruction *inst ) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg src0, src1, param0, param1, dst; -    GLuint mask = inst->DstReg.WriteMask; -    int i; -    int mark = mark_tmps( c ); - -    assert( mark == 0 ); -     -    src0 = get_src_reg( c, inst, 0, 0 ); -    src1 = get_src_reg( c, inst, 0, 1 ); - -    param0 = alloc_tmp( c ); -    param1 = alloc_tmp( c ); - -    brw_MOV( p, param0, src0 ); -    brw_MOV( p, param1, src1 ); - -    invoke_subroutine( c, SUB_NOISE2, noise2_sub ); -     -    /* Fill in the result: */ -    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); -    for (i = 0 ; i < 4; i++) { -	if (mask & (1<<i)) { -	    dst = get_dst_reg(c, inst, i); -	    brw_MOV( p, dst, param0 ); -	} -    } -    if( inst->SaturateMode == SATURATE_ZERO_ONE ) -	brw_set_saturate( p, 0 ); -     -    release_tmps( c, mark ); -} - -/** - * The three-dimensional case is much like the one- and two- versions above, - * but since the number of corners is rapidly growing we now pack 16 16-bit - * hashes into each register to extract more parallelism from the EUs. - */ -static void noise3_sub( struct brw_wm_compile *c ) { - -    struct brw_compile *p = &c->func; -    struct brw_reg param0, param1, param2, -	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ -	xi, yi, zi, /* interpolation coefficients */ -	t, tmp[ 8 ], /* float temporaries */ -	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ -	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ -    int i; -    int mark = mark_tmps( c ); - -    x0y0 = alloc_tmp( c ); -    x0y1 = alloc_tmp( c ); -    x1y0 = alloc_tmp( c ); -    x1y1 = alloc_tmp( c ); -    xi = alloc_tmp( c ); -    yi = alloc_tmp( c ); -    zi = alloc_tmp( c ); -    t = alloc_tmp( c ); -    for( i = 0; i < 8; i++ ) { -	tmp[ i ] = alloc_tmp( c ); -	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); -	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); -    } -     -    param0 = lookup_tmp( c, mark - 4 ); -    param1 = lookup_tmp( c, mark - 3 ); -    param2 = lookup_tmp( c, mark - 2 ); - -    brw_set_access_mode( p, BRW_ALIGN_1 ); -     -    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to -       be hashed.  Also compute the remainders (offsets within the unit -       cube), interleaved to reduce register dependency penalties. */ -    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); -    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); -    brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); -    brw_FRC( p, param0, param0 ); -    brw_FRC( p, param1, param1 ); -    brw_FRC( p, param2, param2 ); -    /* Since we now have only 16 bits of precision in the hash, we must -       be more careful about thorough mixing to maintain entropy as we -       squash the input vector into a small scalar. */ -    brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); -    brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); -    brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), -	     brw_imm_uw( 0x9B93 ) ); -    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), -	     brw_imm_uw( 0xBC8F ) ); - -    /* Temporarily disable the execution mask while we work with ExecSize=16 -       channels (the mask is set for ExecSize=8 and is probably incorrect). -       Although this might cause execution of unwanted channels, the code -       writes only to temporary registers and has no side effects, so -       disabling the mask is harmless. */ -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); -    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); -    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - -    /* We're now ready to perform the hashing.  The eight hashes are -       interleaved for performance.  The hash function used is -       designed to rapidly achieve avalanche and require only 16x16 -       bit multiplication, and 8-bit swizzles (which we get for -       free). */ -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), -		 odd_bytes( wtmp[ i ] ) ); -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), -		 odd_bytes( wtmp[ i ] ) ); -    brw_pop_insn_state( p ); - -    /* Now we want to initialise the four rear gradients based on the -       hashes.  Format conversion from signed integer to float leaves -       everything scaled too high by a factor of pow( 2, 15 ), but -       we correct for that right at the end. */ -    /* x component */ -    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); -    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); -    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); -    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); -    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); -    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); -    brw_pop_insn_state( p ); -     -    brw_MUL( p, x1y0, x1y0, t ); -    brw_MUL( p, x1y1, x1y1, t ); -    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); -    brw_MUL( p, x0y0, x0y0, param0 ); -    brw_MUL( p, x0y1, x0y1, param0 ); - -    /* y component */ -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); -     -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); -    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); -     -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -     -    /* z component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -     -    /* We interpolate between the gradients using the polynomial -       6t^5 - 15t^4 + 10t^3 (Perlin). */ -    brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); -    brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); -    brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); -    brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); -    brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); -    brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); -    brw_MUL( p, xi, xi, param0 ); -    brw_MUL( p, yi, yi, param1 ); -    brw_MUL( p, zi, zi, param2 ); -    brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); -    brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); -    brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); -    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ -    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ -    brw_MUL( p, xi, xi, param0 ); -    brw_MUL( p, yi, yi, param1 ); -    brw_MUL( p, zi, zi, param2 ); -    brw_MUL( p, xi, xi, param0 ); -    brw_MUL( p, yi, yi, param1 ); -    brw_MUL( p, zi, zi, param2 ); -    brw_MUL( p, xi, xi, param0 ); -    brw_MUL( p, yi, yi, param1 ); -    brw_MUL( p, zi, zi, param2 ); -     -    /* Here we interpolate in the y dimension... */ -    brw_MUL( p, x0y1, x0y1, yi ); -    brw_MUL( p, x1y1, x1y1, yi ); -    brw_ADD( p, x0y0, x0y0, x0y1 ); -    brw_ADD( p, x1y0, x1y0, x1y1 ); - -    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */ -    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); -    brw_MUL( p, x1y0, x1y0, xi ); -    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - -    /* Now do the same thing for the front four gradients... */ -    /* x component */ -    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); -    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); -    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); -    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); -    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, x1y0, x1y0, t ); -    brw_MUL( p, x1y1, x1y1, t ); -    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); -    brw_MUL( p, x0y0, x0y0, param0 ); -    brw_MUL( p, x0y1, x0y1, param0 ); - -    /* y component */ -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); -     -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); -    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -    brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); -     -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -     -    /* z component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -     -    /* The interpolation coefficients are still around from last time, so -       again interpolate in the y dimension... */ -    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); -    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); -    brw_MUL( p, x0y1, x0y1, yi ); -    brw_MUL( p, x1y1, x1y1, yi ); -    brw_ADD( p, x0y0, x0y0, x0y1 ); -    brw_ADD( p, x1y0, x1y0, x1y1 ); - -    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this -       time put the front face in tmp[ 1 ] and we're nearly there... */ -    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); -    brw_MUL( p, x1y0, x1y0, xi ); -    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - -    /* The final interpolation, in the z dimension: */ -    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );     -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); -     -    /* scale by pow( 2, -15 ), as described above */ -    brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - -    release_tmps( c, mark ); -} - -static void emit_noise3( struct brw_wm_compile *c, -			 const struct prog_instruction *inst ) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg src0, src1, src2, param0, param1, param2, dst; -    GLuint mask = inst->DstReg.WriteMask; -    int i; -    int mark = mark_tmps( c ); - -    assert( mark == 0 ); -     -    src0 = get_src_reg( c, inst, 0, 0 ); -    src1 = get_src_reg( c, inst, 0, 1 ); -    src2 = get_src_reg( c, inst, 0, 2 ); - -    param0 = alloc_tmp( c ); -    param1 = alloc_tmp( c ); -    param2 = alloc_tmp( c ); - -    brw_MOV( p, param0, src0 ); -    brw_MOV( p, param1, src1 ); -    brw_MOV( p, param2, src2 ); - -    invoke_subroutine( c, SUB_NOISE3, noise3_sub ); -     -    /* Fill in the result: */ -    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); -    for (i = 0 ; i < 4; i++) { -	if (mask & (1<<i)) { -	    dst = get_dst_reg(c, inst, i); -	    brw_MOV( p, dst, param0 ); -	} -    } -    if( inst->SaturateMode == SATURATE_ZERO_ONE ) -	brw_set_saturate( p, 0 ); -     -    release_tmps( c, mark ); -} -     -/** - * For the four-dimensional case, the little micro-optimisation benefits - * we obtain by unrolling all the loops aren't worth the massive bloat it - * now causes.  Instead, we loop twice around performing a similar operation - * to noise3, once for the w=0 cube and once for the w=1, with a bit more - * code to glue it all together. - */ -static void noise4_sub( struct brw_wm_compile *c ) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg param[ 4 ], -	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ -	w0, /* noise for the w=0 cube */ -	floors[ 2 ], /* integer coordinates of base corner of hypercube */ -	interp[ 4 ], /* interpolation coefficients */ -	t, tmp[ 8 ], /* float temporaries */ -	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ -	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ -    int i, j; -    int mark = mark_tmps( c ); -    GLuint loop, origin; -     -    x0y0 = alloc_tmp( c ); -    x0y1 = alloc_tmp( c ); -    x1y0 = alloc_tmp( c ); -    x1y1 = alloc_tmp( c ); -    t = alloc_tmp( c ); -    w0 = alloc_tmp( c );     -    floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); -    floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - -    for( i = 0; i < 4; i++ ) { -	param[ i ] = lookup_tmp( c, mark - 5 + i ); -	interp[ i ] = alloc_tmp( c ); -    } -     -    for( i = 0; i < 8; i++ ) { -	tmp[ i ] = alloc_tmp( c ); -	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); -	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); -    } - -    brw_set_access_mode( p, BRW_ALIGN_1 ); - -    /* We only want 16 bits of precision from the integral part of each -       co-ordinate, but unfortunately the RNDD semantics would saturate -       at 16 bits if we performed the operation directly to a 16-bit -       destination.  Therefore, we round to 32-bit temporaries where -       appropriate, and then store only the lower 16 bits. */ -    brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); -    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); -    brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); -    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); -    brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); -    brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); - -    /* Modify the flag register here, because the side effect is useful -       later (see below).  We know for certain that all flags will be -       cleared, since the FRC instruction cannot possibly generate -       negative results.  Even for exceptional inputs (infinities, denormals, -       NaNs), the architecture guarantees that the L conditional is false. */ -    brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); -    brw_FRC( p, param[ 0 ], param[ 0 ] ); -    brw_set_predicate_control( p, BRW_PREDICATE_NONE ); -    for( i = 1; i < 4; i++ )	 -	brw_FRC( p, param[ i ], param[ i ] ); -     -    /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first -       of all. */ -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); -    for( i = 0; i < 4; i++ ) -	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); -    for( i = 0; i < 4; i++ ) -	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); -    for( j = 0; j < 3; j++ ) -	for( i = 0; i < 4; i++ ) -	    brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - -    /* Mark the current address, as it will be a jump destination.  The -       following code will be executed twice: first, with the flag -       register clear indicating the w=0 case, and second with flags -       set for w=1. */ -    loop = p->nr_insn; -     -    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to -       be hashed.  Since we have only 16 bits of precision in the hash, we -       must be careful about thorough mixing to maintain entropy as we -       squash the input vector into a small scalar. */ -    brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), -	     brw_imm_uw( 0xBC8F ) ); -    brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), -	     brw_imm_uw( 0xD0BD ) ); -    brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), -	     brw_imm_uw( 0x9B93 ) ); -    brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), -	     brw_imm_uw( 0xA359 ) ); -    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), -	     brw_imm_uw( 0xBC8F ) ); - -    /* Temporarily disable the execution mask while we work with ExecSize=16 -       channels (the mask is set for ExecSize=8 and is probably incorrect). -       Although this might cause execution of unwanted channels, the code -       writes only to temporary registers and has no side effects, so -       disabling the mask is harmless. */ -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); -    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); -    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - -    /* We're now ready to perform the hashing.  The eight hashes are -       interleaved for performance.  The hash function used is -       designed to rapidly achieve avalanche and require only 16x16 -       bit multiplication, and 8-bit swizzles (which we get for -       free). */ -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), -		 odd_bytes( wtmp[ i ] ) ); -    for( i = 0; i < 4; i++ ) -	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); -    for( i = 0; i < 4; i++ ) -	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), -		 odd_bytes( wtmp[ i ] ) ); -    brw_pop_insn_state( p ); - -    /* Now we want to initialise the four rear gradients based on the -       hashes.  Format conversion from signed integer to float leaves -       everything scaled too high by a factor of pow( 2, 15 ), but -       we correct for that right at the end. */ -    /* x component */ -    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); -    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); -    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); -    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); -    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); -     -    brw_MUL( p, x1y0, x1y0, t ); -    brw_MUL( p, x1y1, x1y1, t ); -    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); -    brw_MUL( p, x0y0, x0y0, param[ 0 ] ); -    brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - -    /* y component */ -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); -     -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );     -    /* prepare t for the w component (used below): w the first time through -       the loop; w - 1 the second time) */ -    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); -    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); -    p->current->header.predicate_inverse = 1; -    brw_MOV( p, t, param[ 3 ] ); -    p->current->header.predicate_inverse = 0; -    brw_set_predicate_control( p, BRW_PREDICATE_NONE ); -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); -     -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -     -    /* z component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - -    /* w component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - -    /* Here we interpolate in the y dimension... */ -    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); -    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); -    brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); -    brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); -    brw_ADD( p, x0y0, x0y0, x0y1 ); -    brw_ADD( p, x1y0, x1y0, x1y1 ); - -    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */ -    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); -    brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); -    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - -    /* Now do the same thing for the front four gradients... */ -    /* x component */ -    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); -    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); -    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); -    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, x1y0, x1y0, t ); -    brw_MUL( p, x1y1, x1y1, t ); -    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); -    brw_MUL( p, x0y0, x0y0, param[ 0 ] ); -    brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - -    /* y component */ -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); -     -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -    brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); -     -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -     -    /* z component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); -    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); -    brw_pop_insn_state( p ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -    /* prepare t for the w component (used below): w the first time through -       the loop; w - 1 the second time) */ -    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); -    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); -    p->current->header.predicate_inverse = 1; -    brw_MOV( p, t, param[ 3 ] ); -    p->current->header.predicate_inverse = 0; -    brw_set_predicate_control( p, BRW_PREDICATE_NONE ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - -    /* w component */ -    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); -    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); -    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - -    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); -    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); -    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); -    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); -     -    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); -    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); -    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); -    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - -    /* Interpolate in the y dimension: */ -    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); -    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); -    brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); -    brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); -    brw_ADD( p, x0y0, x0y0, x0y1 ); -    brw_ADD( p, x1y0, x1y0, x1y1 ); - -    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this -       time put the front face in tmp[ 1 ] and we're nearly there... */ -    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); -    brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); -    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - -    /* Another interpolation, in the z dimension: */ -    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );     -    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - -    /* Exit the loop if we've computed both cubes... */ -    origin = p->nr_insn; -    brw_push_insn_state( p ); -    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); -    brw_pop_insn_state( p ); - -    /* Save the result for the w=0 case, and increment the w coordinate: */ -    brw_MOV( p, w0, tmp[ 0 ] ); -    brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), -	     brw_imm_uw( 1 ) ); - -    /* Loop around for the other cube.  Explicitly set the flag register -       (unfortunately we must spend an extra instruction to do this: we -       can't rely on a side effect of the previous MOV or ADD because -       conditional modifiers which are normally true might be false in -       exceptional circumstances, e.g. given a NaN input; the add to -       brw_ip_reg() is not suitable because the IP is not an 8-vector). */ -    brw_push_insn_state( p ); -    brw_set_mask_control( p, BRW_MASK_DISABLE ); -    brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); -    brw_ADD( p, brw_ip_reg(), brw_ip_reg(), -	     brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); -    brw_pop_insn_state( p ); - -    /* Patch the previous conditional branch now that we know the -       destination address. */ -    brw_set_src1( p->store + origin, -		  brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); - -    /* The very last interpolation. */ -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );     -    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); -    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); - -    /* scale by pow( 2, -15 ), as described above */ -    brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - -    release_tmps( c, mark ); -} - -static void emit_noise4( struct brw_wm_compile *c, -			 const struct prog_instruction *inst ) -{ -    struct brw_compile *p = &c->func; -    struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; -    GLuint mask = inst->DstReg.WriteMask; -    int i; -    int mark = mark_tmps( c ); - -    assert( mark == 0 ); -     -    src0 = get_src_reg( c, inst, 0, 0 ); -    src1 = get_src_reg( c, inst, 0, 1 ); -    src2 = get_src_reg( c, inst, 0, 2 ); -    src3 = get_src_reg( c, inst, 0, 3 ); - -    param0 = alloc_tmp( c ); -    param1 = alloc_tmp( c ); -    param2 = alloc_tmp( c ); -    param3 = alloc_tmp( c ); - -    brw_MOV( p, param0, src0 ); -    brw_MOV( p, param1, src1 ); -    brw_MOV( p, param2, src2 ); -    brw_MOV( p, param3, src3 ); - -    invoke_subroutine( c, SUB_NOISE4, noise4_sub ); -     -    /* Fill in the result: */ -    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); -    for (i = 0 ; i < 4; i++) { -	if (mask & (1<<i)) { -	    dst = get_dst_reg(c, inst, i); -	    brw_MOV( p, dst, param0 ); -	} -    } -    if( inst->SaturateMode == SATURATE_ZERO_ONE ) -	brw_set_saturate( p, 0 ); -     -    release_tmps( c, mark ); -}  static void emit_wpos_xy(struct brw_wm_compile *c,                           const struct prog_instruction *inst) @@ -2543,19 +1507,18 @@ static void emit_wpos_xy(struct brw_wm_compile *c,       * X and Y channels.       */      if (mask & WRITEMASK_X) { -	/* X' = X - origin_x */ -	brw_ADD(p, +	/* X' = X */ +	brw_MOV(p,  		dst[0], -		retype(src0[0], BRW_REGISTER_TYPE_W), -		brw_imm_d(0 - c->key.origin_x)); +		retype(src0[0], BRW_REGISTER_TYPE_W));      }      if (mask & WRITEMASK_Y) { -	/* Y' = height - (Y - origin_y) = height + origin_y - Y */ +	/* Y' = height - 1 - Y */  	brw_ADD(p,  		dst[1],  		negate(retype(src0[1], BRW_REGISTER_TYPE_W)), -		brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); +		brw_imm_d(c->key.drawable_height - 1));      }  } @@ -2827,7 +1790,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)  		emit_trunc(c, inst);  		break;  	    case OPCODE_MOV: -	    case OPCODE_SWZ:  		emit_mov(c, inst);  		break;  	    case OPCODE_DP3: @@ -2903,18 +1865,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)  	    case OPCODE_MAD:  		emit_mad(c, inst);  		break; -	    case OPCODE_NOISE1: -		emit_noise1(c, inst); -		break; -	    case OPCODE_NOISE2: -		emit_noise2(c, inst); -		break; -	    case OPCODE_NOISE3: -		emit_noise3(c, inst); -		break; -	    case OPCODE_NOISE4: -		emit_noise4(c, inst); -		break;  	    case OPCODE_TEX:  		emit_tex(c, inst);  		break; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 6279258339..0c411b57f5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -422,7 +422,6 @@ void brw_wm_pass0( struct brw_wm_compile *c )         */              switch (inst->Opcode) {        case OPCODE_MOV:  -      case OPCODE_SWZ:   	 if (!inst->SaturateMode) {  	    pass0_precalc_mov(c, inst);  	 } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b449394029..d940ec09a9 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -120,7 +120,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )        GLuint writemask;        GLuint read0, read1, read2; -      if (inst->opcode == OPCODE_KIL) { +      if (inst->opcode == TGSI_OPCODE_KIL) {  	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */  	 continue;        } @@ -154,76 +154,75 @@ void brw_wm_pass1( struct brw_wm_compile *c )        /* Mark all inputs which contribute to the marked outputs:         */        switch (inst->opcode) { -      case OPCODE_ABS: -      case OPCODE_FLR: -      case OPCODE_FRC: -      case OPCODE_MOV: -      case OPCODE_SWZ: -      case OPCODE_TRUNC: +      case TGSI_OPCODE_ABS: +      case TGSI_OPCODE_FLR: +      case TGSI_OPCODE_FRC: +      case TGSI_OPCODE_MOV: +      case TGSI_OPCODE_TRUNC:  	 read0 = writemask;  	 break; -      case OPCODE_SUB: -      case OPCODE_SLT: -      case OPCODE_SLE: -      case OPCODE_SGE: -      case OPCODE_SGT: -      case OPCODE_SEQ: -      case OPCODE_SNE: -      case OPCODE_ADD: -      case OPCODE_MAX: -      case OPCODE_MIN: -      case OPCODE_MUL: +      case TGSI_OPCODE_SUB: +      case TGSI_OPCODE_SLT: +      case TGSI_OPCODE_SLE: +      case TGSI_OPCODE_SGE: +      case TGSI_OPCODE_SGT: +      case TGSI_OPCODE_SEQ: +      case TGSI_OPCODE_SNE: +      case TGSI_OPCODE_ADD: +      case TGSI_OPCODE_MAX: +      case TGSI_OPCODE_MIN: +      case TGSI_OPCODE_MUL:  	 read0 = writemask;  	 read1 = writemask;  	 break; -      case OPCODE_DDX: -      case OPCODE_DDY: +      case TGSI_OPCODE_DDX: +      case TGSI_OPCODE_DDY:  	 read0 = writemask;  	 break; -      case OPCODE_MAD:	 -      case OPCODE_CMP: -      case OPCODE_LRP: +      case TGSI_OPCODE_MAD:	 +      case TGSI_OPCODE_CMP: +      case TGSI_OPCODE_LRP:  	 read0 = writemask;  	 read1 = writemask;	  	 read2 = writemask;	  	 break; -      case OPCODE_XPD:  +      case TGSI_OPCODE_XPD:   	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	   	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	   	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;  	 read1 = read0;  	 break; -      case OPCODE_COS: -      case OPCODE_EX2: -      case OPCODE_LG2: -      case OPCODE_RCP: -      case OPCODE_RSQ: -      case OPCODE_SIN: -      case OPCODE_SCS: +      case TGSI_OPCODE_COS: +      case TGSI_OPCODE_EX2: +      case TGSI_OPCODE_LG2: +      case TGSI_OPCODE_RCP: +      case TGSI_OPCODE_RSQ: +      case TGSI_OPCODE_SIN: +      case TGSI_OPCODE_SCS:        case WM_CINTERP:        case WM_PIXELXY:  	 read0 = WRITEMASK_X;  	 break; -      case OPCODE_POW: +      case TGSI_OPCODE_POW:  	 read0 = WRITEMASK_X;  	 read1 = WRITEMASK_X;  	 break; -      case OPCODE_TEX: -      case OPCODE_TXP: +      case TGSI_OPCODE_TEX: +      case TGSI_OPCODE_TXP:  	 read0 = get_texcoord_mask(inst->tex_idx);           if (inst->tex_shadow)  	    read0 |= WRITEMASK_Z;  	 break; -      case OPCODE_TXB: +      case TGSI_OPCODE_TXB:  	 /* Shadow ignored for txb.  	  */  	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; @@ -254,28 +253,28 @@ void brw_wm_pass1( struct brw_wm_compile *c )  	 read2 = WRITEMASK_W; /* pixel w */  	 break; -      case OPCODE_DP3:	 +      case TGSI_OPCODE_DP3:	  	 read0 = WRITEMASK_XYZ;  	 read1 = WRITEMASK_XYZ;  	 break; -      case OPCODE_DPH: +      case TGSI_OPCODE_DPH:  	 read0 = WRITEMASK_XYZ;  	 read1 = WRITEMASK_XYZW;  	 break; -      case OPCODE_DP4: +      case TGSI_OPCODE_DP4:  	 read0 = WRITEMASK_XYZW;  	 read1 = WRITEMASK_XYZW;  	 break; -      case OPCODE_LIT:  +      case TGSI_OPCODE_LIT:   	 read0 = WRITEMASK_XYW;  	 break; -      case OPCODE_DST: +      case TGSI_OPCODE_DST:        case WM_FRONTFACING: -      case OPCODE_KIL_NV: +      case TGSI_OPCODE_KIL_NV:        default:  	 break;        } diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h index 3dc8653a73..3c38f1676c 100644 --- a/src/gallium/drivers/i965/intel_chipset.h +++ b/src/gallium/drivers/i965/intel_chipset.h @@ -66,7 +66,6 @@  #define PCI_CHIP_Q45_G                  0x2E12  #define PCI_CHIP_G45_G                  0x2E22  #define PCI_CHIP_G41_G                  0x2E32 -#define PCI_CHIP_B43_G                  0x2E42  #define PCI_CHIP_ILD_G                  0x0042  #define PCI_CHIP_ILM_G                  0x0046 @@ -84,8 +83,7 @@  #define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \                                   devid == PCI_CHIP_Q45_G || \                                   devid == PCI_CHIP_G45_G || \ -                                 devid == PCI_CHIP_G41_G || \ -                                 devid == PCI_CHIP_B43_G) +                                 devid == PCI_CHIP_G41_G)  #define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)  #define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid)) | 
