diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
72 files changed, 6314 insertions, 2906 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 37a470f2e2..6e9a9a29a3 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -9,10 +9,12 @@ DRIVER_SOURCES = \ intel_blit.c \ intel_buffer_objects.c \ intel_buffers.c \ + intel_clear.c \ intel_context.c \ intel_decode.c \ - intel_depthstencil.c \ + intel_extensions.c \ intel_fbo.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -21,7 +23,10 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ intel_pixel_draw.c \ + intel_pixel_read.c \ intel_state.c \ + intel_swapbuffers.c \ + intel_syncobj.c \ intel_tex.c \ intel_tex_copy.c \ intel_tex_format.c \ @@ -39,6 +44,7 @@ DRIVER_SOURCES = \ brw_clip_util.c \ brw_context.c \ brw_curbe.c \ + brw_disasm.c \ brw_draw.c \ brw_draw_upload.c \ brw_eu.c \ @@ -49,7 +55,6 @@ DRIVER_SOURCES = \ brw_gs.c \ brw_gs_emit.c \ brw_gs_state.c \ - brw_metaops.c \ brw_misc_state.c \ brw_program.c \ brw_queryobj.c \ @@ -68,6 +73,7 @@ DRIVER_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index fa8121e02d..1088a7a607 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -39,12 +39,14 @@ static void prepare_cc_vp( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; memset(&ccv, 0, sizeof(ccv)); - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; + /* _NEW_VIEWPORT */ + ccv.min_depth = ctx->Viewport.Near; + ccv.max_depth = ctx->Viewport.Far; dri_bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); @@ -52,7 +54,7 @@ static void prepare_cc_vp( struct brw_context *brw ) const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = 0, + .mesa = _NEW_VIEWPORT, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -83,59 +85,60 @@ struct brw_cc_unit_key { static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { - struct gl_stencil_attrib *stencil = brw->attribs.Stencil; + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; memset(key, 0, sizeof(*key)); - key->stencil = stencil->Enabled; - key->stencil_two_side = stencil->_TestTwoSide; + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; if (key->stencil) { - key->stencil_func[0] = stencil->Function[0]; - key->stencil_fail_op[0] = stencil->FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = stencil->ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = stencil->ZPassFunc[0]; - key->stencil_ref[0] = stencil->Ref[0]; - key->stencil_write_mask[0] = stencil->WriteMask[0]; - key->stencil_test_mask[0] = stencil->ValueMask[0]; + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; } if (key->stencil_two_side) { - key->stencil_func[1] = stencil->Function[1]; - key->stencil_fail_op[1] = stencil->FailFunc[1]; - key->stencil_pass_depth_fail_op[1] = stencil->ZFailFunc[1]; - key->stencil_pass_depth_pass_op[1] = stencil->ZPassFunc[1]; - key->stencil_ref[1] = stencil->Ref[1]; - key->stencil_write_mask[1] = stencil->WriteMask[1]; - key->stencil_test_mask[1] = stencil->ValueMask[1]; + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; } - if (brw->attribs.Color->_LogicOpEnabled) - key->logic_op = brw->attribs.Color->LogicOp; + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; else key->logic_op = GL_COPY; - key->color_blend = brw->attribs.Color->BlendEnabled; + key->color_blend = ctx->Color.BlendEnabled; if (key->color_blend) { - key->blend_eq_rgb = brw->attribs.Color->BlendEquationRGB; - key->blend_eq_a = brw->attribs.Color->BlendEquationA; - key->blend_src_rgb = brw->attribs.Color->BlendSrcRGB; - key->blend_dst_rgb = brw->attribs.Color->BlendDstRGB; - key->blend_src_a = brw->attribs.Color->BlendSrcA; - key->blend_dst_a = brw->attribs.Color->BlendDstA; + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; } - key->alpha_enabled = brw->attribs.Color->AlphaEnabled; + key->alpha_enabled = ctx->Color.AlphaEnabled; if (key->alpha_enabled) { - key->alpha_func = brw->attribs.Color->AlphaFunc; - key->alpha_ref = brw->attribs.Color->AlphaRef; + key->alpha_func = ctx->Color.AlphaFunc; + key->alpha_ref = ctx->Color.AlphaRef; } - key->dither = brw->attribs.Color->DitherFlag; + key->dither = ctx->Color.DitherFlag; - key->depth_test = brw->attribs.Depth->Test; + key->depth_test = ctx->Depth.Test; if (key->depth_test) { - key->depth_func = brw->attribs.Depth->Func; - key->depth_write = brw->attribs.Depth->Mask; + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; } } diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 38d8b704d7..20a927cf38 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -65,21 +65,31 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++) + if (BRW_IS_IGDNG(brw)) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + for (i = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.offset[i] = delta; delta += ATTR_SIZE; } c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ @@ -145,14 +155,19 @@ static void upload_clip_prog(struct brw_context *brw) /* CACHE_NEW_VS_PROG */ key.attrs = brw->vs.prog_data->outputs_written; /* _NEW_LIGHT */ - key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); + key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - key.clip_mode = BRW_CLIPMODE_NORMAL; + key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { - if (brw->attribs.Polygon->CullFaceMode == GL_FRONT_AND_BACK) + if (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { GLuint fill_front = CLIP_CULL; @@ -160,44 +175,44 @@ static void upload_clip_prog(struct brw_context *brw) GLuint offset_front = 0; GLuint offset_back = 0; - if (!brw->attribs.Polygon->CullFlag || - brw->attribs.Polygon->CullFaceMode != GL_FRONT) { - switch (brw->attribs.Polygon->FrontMode) { + if (!ctx->Polygon.CullFlag || + ctx->Polygon.CullFaceMode != GL_FRONT) { + switch (ctx->Polygon.FrontMode) { case GL_FILL: fill_front = CLIP_FILL; offset_front = 0; break; case GL_LINE: fill_front = CLIP_LINE; - offset_front = brw->attribs.Polygon->OffsetLine; + offset_front = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_front = CLIP_POINT; - offset_front = brw->attribs.Polygon->OffsetPoint; + offset_front = ctx->Polygon.OffsetPoint; break; } } - if (!brw->attribs.Polygon->CullFlag || - brw->attribs.Polygon->CullFaceMode != GL_BACK) { - switch (brw->attribs.Polygon->BackMode) { + if (!ctx->Polygon.CullFlag || + ctx->Polygon.CullFaceMode != GL_BACK) { + switch (ctx->Polygon.BackMode) { case GL_FILL: fill_back = CLIP_FILL; offset_back = 0; break; case GL_LINE: fill_back = CLIP_LINE; - offset_back = brw->attribs.Polygon->OffsetLine; + offset_back = ctx->Polygon.OffsetLine; break; case GL_POINT: fill_back = CLIP_POINT; - offset_back = brw->attribs.Polygon->OffsetPoint; + offset_back = ctx->Polygon.OffsetPoint; break; } } - if (brw->attribs.Polygon->BackMode != GL_FILL || - brw->attribs.Polygon->FrontMode != GL_FILL) { + if (ctx->Polygon.BackMode != GL_FILL || + ctx->Polygon.FrontMode != GL_FILL) { key.do_unfilled = 1; /* Most cases the fixed function units will handle. Cases where @@ -207,17 +222,17 @@ static void upload_clip_prog(struct brw_context *brw) if (offset_back || offset_front) { /* _NEW_POLYGON, _NEW_BUFFERS */ - key.offset_units = brw->attribs.Polygon->OffsetUnits * brw->intel.polygon_offset_scale; - key.offset_factor = brw->attribs.Polygon->OffsetFactor * ctx->DrawBuffer->_MRD; + key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; + key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; } - switch (brw->attribs.Polygon->FrontFace) { + switch (ctx->Polygon.FrontFace) { case GL_CCW: key.fill_ccw = fill_front; key.fill_cw = fill_back; key.offset_ccw = offset_front; key.offset_cw = offset_back; - if (brw->attribs.Light->Model.TwoSide && + if (ctx->Light.Model.TwoSide && key.fill_cw != CLIP_CULL) key.copy_bfc_cw = 1; break; @@ -226,7 +241,7 @@ static void upload_clip_prog(struct brw_context *brw) key.fill_ccw = fill_back; key.offset_cw = offset_front; key.offset_ccw = offset_back; - if (brw->attribs.Light->Model.TwoSide && + if (ctx->Light.Model.TwoSide && key.fill_ccw != CLIP_CULL) key.copy_bfc_ccw = 1; break; diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index e06747864b..957df441ab 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -100,6 +100,8 @@ struct brw_clip_compile { struct brw_reg fixed_planes; struct brw_reg plane_equation; + + struct brw_reg ff_sync; } reg; /* 3 different ways of expressing vertex size: @@ -117,6 +119,7 @@ struct brw_clip_compile { GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) @@ -171,5 +174,6 @@ struct brw_reg get_tmp( struct brw_clip_compile *c ); void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ); - +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index c45d48dff8..048ca620fa 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -85,6 +85,10 @@ static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } c->first_tmp = i; c->last_tmp = i; @@ -130,7 +134,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *is_negative; - struct brw_instruction *is_neg2; + struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); @@ -148,7 +152,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -181,34 +185,54 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); - brw_MOV(p, c->reg.t1, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (BRW_IS_965(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); } is_negative = brw_ELSE(p, is_negative); { - /* Coming back in. We know that both cannot be negative - * because the line would have been culled in that case. - */ + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (BRW_IS_965(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } - /* If both are positive, do nothing */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); - is_neg2 = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_neg2); - } + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (BRW_IS_965(p->brw)) { + brw_ENDIF(p, is_neg2); + } + } brw_ENDIF(p, is_negative); } brw_ENDIF(p, plane_active); @@ -243,6 +267,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) void brw_emit_line_clip( struct brw_clip_compile *c ) { brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); if (c->key.do_flat_shading) brw_clip_copy_colors(c, 0, 1); diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c index d17b199b89..8458f61c5a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_point.c +++ b/src/mesa/drivers/dri/i965/brw_clip_point.c @@ -50,5 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c ) /* Send an empty message to kill the thread: */ brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + brw_clip_kill_thread(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 9b0d7eab7b..234b3744bf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,14 @@ struct brw_clip_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; }; static void clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ @@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) /* BRW_NEW_URB_FENCE */ key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; + + /* _NEW_TRANSOFORM */ + key->depth_clamp = ctx->Transform.DepthClamp; } static dri_bo * @@ -95,7 +101,14 @@ clip_unit_create_from_key(struct brw_context *brw, * even number. */ assert(key->nr_urb_entries % 2 == 0); - clip.thread4.max_threads = 2 - 1; + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; } else { assert(key->nr_urb_entries >= 5); clip.thread4.max_threads = 1 - 1; @@ -110,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; clip.clip5.api_mode = BRW_CLIP_API_OGL; @@ -161,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 1dbba37fe7..0efd77225e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -77,6 +77,10 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, if (c->nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; + + if (BRW_IS_IGDNG(c->func.brw)) + delta = c->nr_attrs * 16 + 32 * 3; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } @@ -115,6 +119,11 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i++; } + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + c->first_tmp = i; c->last_tmp = i; @@ -455,6 +464,8 @@ static void brw_clip_test( struct brw_clip_compile *c ) struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); @@ -462,53 +473,87 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* test farz, xmax, ymax plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -523,10 +568,11 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); /* if -ve rhw workaround bit is set, do cliptest */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -543,11 +589,12 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) if (c->key.do_flat_shading) brw_clip_tri_flat_shade(c); - if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) do_clip_tri(c); else maybe_do_clip_tri(c); - + brw_clip_tri_emit_polygon(c); /* Send an empty message to kill the thread: diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index d7ca517927..ad1bfa435f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -453,6 +453,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); assert(c->offset[VERT_RESULT_EDGE]); diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 9d3b0be694..5a73abdfee 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -140,6 +140,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, /* Just copy the vertex header: */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) @@ -147,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + if (delta == c->offset[VERT_RESULT_EDGE]) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); @@ -177,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; + + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } @@ -202,6 +213,8 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, struct brw_compile *p = &c->func; GLuint start = c->last_mrf; + brw_clip_ff_sync(c); + assert(!(allocate && eot)); /* Cycle through mrf regs - probably futile as we have to wait for @@ -252,6 +265,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; + brw_clip_ff_sync(c); /* Send an empty message to kill the thread and release any * allocated urb entry: */ @@ -343,3 +357,40 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) } } +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d7a2bd95ee..3c5b848319 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -76,30 +76,6 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) functions->Viewport = intel_viewport; } - -static void brw_init_attribs( struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - brw->attribs.Color = &ctx->Color; - brw->attribs.Depth = &ctx->Depth; - brw->attribs.Fog = &ctx->Fog; - brw->attribs.Hint = &ctx->Hint; - brw->attribs.Light = &ctx->Light; - brw->attribs.Line = &ctx->Line; - brw->attribs.Point = &ctx->Point; - brw->attribs.Polygon = &ctx->Polygon; - brw->attribs.Scissor = &ctx->Scissor; - brw->attribs.Stencil = &ctx->Stencil; - brw->attribs.Texture = &ctx->Texture; - brw->attribs.Transform = &ctx->Transform; - brw->attribs.Viewport = &ctx->Viewport; - brw->attribs.VertexProgram = &ctx->VertexProgram; - brw->attribs.FragmentProgram = &ctx->FragmentProgram; - brw->attribs.PolygonStipple = &ctx->PolygonStipple[0]; -} - - GLboolean brwCreateContext( const __GLcontextModes *mesaVis, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate) @@ -135,21 +111,48 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - /* Advertise the full hardware capabilities. The new memory - * manager should cope much better with overload situations: + /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ - ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxTextureLevels = 13; ctx->Const.Max3DTextureLevels = 9; ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<11); + ctx->Const.MaxTextureRectSize = (1<<12); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; -/* ctx->Const.MaxNativeVertexProgramTemps = 32; */ + /* We want the GLSL compiler to emit code that uses condition codes */ + ctx->Shader.EmitCondCodes = GL_TRUE; + + ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.VertexProgram.MaxAluInstructions = 0; + ctx->Const.VertexProgram.MaxTexInstructions = 0; + ctx->Const.VertexProgram.MaxTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ctx->Const.VertexProgram.MaxNativeParameters = 1024; + ctx->Const.VertexProgram.MaxEnvParams = + MIN2(ctx->Const.VertexProgram.MaxNativeParameters, + ctx->Const.VertexProgram.MaxEnvParams); + + ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAttribs = 12; + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + ctx->Const.FragmentProgram.MaxNativeParameters = 1024; + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); - brw_init_attribs( brw ); - brw_init_metaops( brw ); brw_init_state( brw ); brw->state.dirty.mesa = ~0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 5d3f99e025..a5209ac41b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -46,7 +46,7 @@ * * CURBE - constant URB entry. An urb region (entry) used to hold * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. + * preload into the GRF when spawning a thread. * * VUE - vertex URB entry. An urb entry holding a vertex and usually * a vertex header. The header contains control information and @@ -63,7 +63,7 @@ * special and may be overwritten. * * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous + * by sending messages. Message parameters are placed in contiguous * MRF registers. All program output is via these messages. URB * entries are populated by sending a message to the shared URB * function containing the new data, together with a control word, @@ -129,9 +129,8 @@ struct brw_context; #define BRW_NEW_PRIMITIVE 0x40 #define BRW_NEW_CONTEXT 0x80 #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 -#define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 -#define BRW_NEW_METAOPS 0x1000 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -142,7 +141,9 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -155,19 +156,28 @@ struct brw_state_flags { GLuint cache; }; + +/** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; - +/** Subclass of Mesa fragment program */ struct brw_fragment_program { struct gl_fragment_program program; - GLuint id; -}; + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; + /** for debugging, which texture units are referenced */ + GLbitfield tex_units_used; +}; /* Data about a particular attempt to compile a program. Note that @@ -183,7 +193,7 @@ struct brw_wm_prog_data { GLuint total_grf; GLuint total_scratch; - GLuint nr_params; + GLuint nr_params; /**< number of float params/constants */ GLboolean error; /* Pointer to tracked values (only valid once @@ -222,6 +232,7 @@ struct brw_vs_prog_data { GLuint urb_read_length; GLuint total_grf; GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -238,8 +249,35 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + enum brw_cache_id { BRW_CC_VP, @@ -303,26 +341,6 @@ struct brw_cache { }; - -struct brw_state_pointers { - struct gl_colorbuffer_attrib *Color; - struct gl_depthbuffer_attrib *Depth; - struct gl_fog_attrib *Fog; - struct gl_hint_attrib *Hint; - struct gl_light_attrib *Light; - struct gl_line_attrib *Line; - struct gl_point_attrib *Point; - struct gl_polygon_attrib *Polygon; - GLuint *PolygonStipple; - struct gl_scissor_attrib *Scissor; - struct gl_stencil_attrib *Stencil; - struct gl_texture_attrib *Texture; - struct gl_transform_attrib *Transform; - struct gl_viewport_attrib *Viewport; - struct gl_vertex_program_state *VertexProgram; - struct gl_fragment_program_state *FragmentProgram; -}; - /* Considered adding a member to this struct to document which flags * an update might raise so that ordering of the state atoms can be * checked or derived at runtime. Dropped the idea in favor of having @@ -372,6 +390,8 @@ struct brw_cached_batch_item { struct brw_vertex_element { const struct gl_client_array *glarray; + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; /** Number of uploaded elements for this input. */ @@ -387,7 +407,6 @@ struct brw_vertex_element { struct brw_vertex_info { - GLuint varying; /* varying:1[VERT_ATTRIB_MAX] */ GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; @@ -425,9 +444,13 @@ struct brw_query_object { unsigned int count; }; + +/** + * brw_context is derived from intel_context. + */ struct brw_context { - struct intel_context intel; + struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; @@ -436,11 +459,9 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; - GLuint nr_draw_regions; - struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -457,13 +478,16 @@ struct brw_context int validated_bo_count; } state; - struct brw_state_pointers attribs; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) @@ -487,31 +511,16 @@ struct brw_context */ const struct _mesa_index_buffer *ib; + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ dri_bo *bo; unsigned int offset; - } ib; - - struct { - /* Will be allocated on demand if needed. + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. */ - struct brw_state_pointers attribs; - struct gl_vertex_program *vp; - struct gl_fragment_program *fp, *fp_tex; - - struct gl_buffer_object *vbo; - - struct intel_region *saved_draw_region; - GLuint saved_nr_draw_regions; - struct intel_region *saved_depth_region; - - GLuint restore_draw_buffers[MAX_DRAW_BUFFERS]; - GLuint restore_num_draw_buffers; - - struct gl_fragment_program *restore_fp; - - GLboolean active; - } metaops; - + unsigned int start_vertex_offset; + } ib; /* Active vertex program: */ @@ -556,19 +565,14 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; @@ -589,6 +593,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { @@ -620,9 +629,10 @@ struct brw_context struct brw_wm_prog_data *prog_data; struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. */ - GLuint input_size_masks[4]; + GLbitfield input_size_masks[4]; /** Array of surface default colors (texture border color) */ dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; @@ -631,7 +641,7 @@ struct brw_context GLuint nr_surfaces; GLuint max_threads; - dri_bo *scratch_buffer; + dri_bo *scratch_bo; GLuint sampler_count; dri_bo *sampler_bo; @@ -671,8 +681,6 @@ struct brw_context * brw_vtbl.c */ void brwInitVtbl( struct brw_context *brw ); -void brw_do_flush( struct brw_context *brw, - GLuint flags ); /*====================================================================== * brw_context.c @@ -703,13 +711,6 @@ void brw_FrameBufferTexInit( struct brw_context *brw, void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); -/*====================================================================== - * brw_metaops.c - */ - -void brw_init_metaops( struct brw_context *brw ); -void brw_destroy_metaops( struct brw_context *brw ); - /*====================================================================== * brw_program.c @@ -721,8 +722,12 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -void brw_upload_constant_buffer_state(struct brw_context *brw); +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); +/* brw_disasm.c */ +int brw_disasm (FILE *file, struct brw_instruction *inst); /*====================================================================== * Inline conversion functions. These are better-typed than the @@ -734,6 +739,32 @@ brw_context( GLcontext *ctx ) return (struct brw_context *)ctx; } +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + #define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index fbf473abf6..0b0e6931a0 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,30 +36,37 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" -/* Partition the CURBE between the various users of constant values: +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... */ static void calculate_curbe_offsets( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ - GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; /* _NEW_TRANSFORM */ - if (brw->attribs.Transform->ClipPlanesEnabled) { - GLuint nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + if (ctx->Transform.ClipPlanesEnabled) { + GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); nr_clip_regs = (nr_planes * 4 + 15) / 16; } @@ -137,24 +144,24 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_constant_buffer_state(struct brw_context *brw) +void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); } static GLfloat fixed_plane[6][4] = { @@ -173,42 +180,35 @@ static GLfloat fixed_plane[6][4] = { static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLuint sz = brw->curbe.total_size; - GLuint bufsz = sz * 16 * sizeof(GLfloat); + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { - if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; } - buf = (GLfloat *)malloc(bufsz); - - memset(buf, 0, bufsz); + buf = (GLfloat *) _mesa_calloc(bufsz); + /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + buf[offset + i] = *brw->wm.prog_data->param[i]; } @@ -233,28 +233,33 @@ static void prepare_constant_buffer(struct brw_context *brw) */ assert(MAX_CLIP_PLANES == 6); for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) { - buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3]; + if (ctx->Transform.ClipPlanesEnabled & (1<<j)) { + buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0]; + buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; + buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; + buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; i++; } } } - + /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = vp->program.Base.Parameters->NumParameters; + GLuint nr = brw->vs.prog_data->nr_params / 4; + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; } } @@ -273,11 +278,14 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); + /* constants have not changed */ + _mesa_free(buf); } else { + /* constants have changed */ if (brw->curbe.last_buf) - free(brw->curbe.last_buf); + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -324,7 +332,6 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; @@ -351,7 +358,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 39c32255f8..78d457ad2b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -139,6 +139,7 @@ #define BRW_CLIPMODE_CLIP_NON_REJECTED 2 #define BRW_CLIPMODE_REJECT_ALL 3 #define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 #define BRW_CLIP_NDCSPACE 0 #define BRW_CLIP_SCREENSPACE 1 @@ -225,6 +226,24 @@ #define BRW_RASTRULE_UPPER_LEFT 0 #define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 #define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 #define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 @@ -349,9 +368,10 @@ #define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 #define BRW_SURFACEFORMAT_I16_FLOAT 0x115 #define BRW_SURFACEFORMAT_L16_FLOAT 0x116 -#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 -#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 -#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A #define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B #define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C #define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D @@ -368,6 +388,7 @@ #define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 @@ -450,8 +471,9 @@ #define BRW_CONDITIONAL_GE 4 #define BRW_CONDITIONAL_L 5 #define BRW_CONDITIONAL_LE 6 -#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_R 7 #define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 #define BRW_DEBUG_NONE 0 #define BRW_DEBUG_BREAKPOINT 1 @@ -491,6 +513,7 @@ #define BRW_OPCODE_RSL 11 #define BRW_OPCODE_ASR 12 #define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 #define BRW_OPCODE_JMPI 32 #define BRW_OPCODE_IF 34 #define BRW_OPCODE_IFF 35 @@ -650,6 +673,25 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 @@ -734,7 +776,7 @@ #define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CS_URB_STATE 0x6001 #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 @@ -799,8 +841,11 @@ #include "intel_chipset.h" #define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */ +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) +#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c new file mode 100644 index 0000000000..9fef230507 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -0,0 +1,903 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <stdarg.h> + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 785fb784ca..44bb7bd588 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -25,13 +25,15 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" #include "main/enums.h" +#include "tnl/tnl.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" #include "brw_draw.h" #include "brw_defines.h" @@ -42,11 +44,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "tnl/tnl.h" -#include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" - #define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint prim_to_hw_prim[GL_POLYGON+1] = { @@ -84,15 +81,17 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { */ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) { + GLcontext *ctx = &brw->intel.ctx; + if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); /* Slight optimization to avoid the GS program when not needed: */ if (prim == GL_QUAD_STRIP && - brw->attribs.Light->ShadeModel != GL_FLAT && - brw->attribs.Polygon->FrontMode == GL_FILL && - brw->attribs.Polygon->BackMode == GL_FILL) + ctx->Light.ShadeModel != GL_FLAT && + ctx->Polygon.FrontMode == GL_FILL && + ctx->Polygon.BackMode == GL_FILL) prim = GL_TRIANGLE_STRIP; if (prim != brw->primitive) { @@ -125,6 +124,7 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -138,16 +138,35 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.verts_per_instance = trim(prim->mode, prim->count); prim_packet.start_vert_location = prim->start; + if (prim->indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; + prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + brw->no_batch_wrap = GL_FALSE; } @@ -165,24 +184,16 @@ static void brw_merge_inputs( struct brw_context *brw, for (i = 0; i < VERT_ATTRIB_MAX; i++) { brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; - /* XXX: metaops passes null arrays */ - if (arrays[i]) { - if (arrays[i]->StrideB != 0) - brw->vb.info.varying |= 1 << i; - + if (arrays[i]->StrideB != 0) brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << ((i%16) * 2); - } } - /* Raise statechanges if input sizes and varying have changed: - */ + /* Raise statechanges if input sizes have changed. */ if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; - - if (brw->vb.info.varying != old.varying) - brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING; } /* XXX: could split the primitive list to fallback only on the @@ -192,12 +203,20 @@ static GLboolean check_fallbacks( struct brw_context *brw, const struct _mesa_prim *prim, GLuint nr_prims ) { + GLcontext *ctx = &brw->intel.ctx; GLuint i; - if (!brw->intel.strict_conformance) + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) return GL_FALSE; - if (brw->attribs.Polygon->SmoothFlag) { + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + + if (ctx->Polygon.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_TRIANGLES) return GL_TRUE; @@ -206,7 +225,7 @@ static GLboolean check_fallbacks( struct brw_context *brw, /* BRW hardware will do AA lines, but they are non-conformant it * seems. TBD whether we keep this fallback: */ - if (brw->attribs.Line->SmoothFlag) { + if (ctx->Line.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_LINES) return GL_TRUE; @@ -215,28 +234,61 @@ static GLboolean check_fallbacks( struct brw_context *brw, /* Stipple -- these fallbacks could be resolved with a little * bit of work? */ - if (brw->attribs.Line->StippleFlag) { + if (ctx->Line.StippleFlag) { for (i = 0; i < nr_prims; i++) { /* GS doesn't get enough information to know when to reset * the stipple counter?!? */ - if (prim[i].mode == GL_LINE_LOOP) + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) return GL_TRUE; if (prim[i].mode == GL_POLYGON && - (brw->attribs.Polygon->FrontMode == GL_LINE || - brw->attribs.Polygon->BackMode == GL_LINE)) + (ctx->Polygon.FrontMode == GL_LINE || + ctx->Polygon.BackMode == GL_LINE)) return GL_TRUE; } } - - if (brw->attribs.Point->SmoothFlag) { + if (ctx->Point.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (prim[i].mode == GL_POINTS) return GL_TRUE; } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + /* Nothing stopping us from the fast path now */ return GL_FALSE; } @@ -261,11 +313,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures( brw ); + if (check_fallbacks(brw, prim, nr_prims)) return GL_FALSE; - brw_validate_textures( brw ); - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -346,9 +405,13 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, retval = GL_TRUE; } + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); out: UNLOCK_HARDWARE(intel); + brw_state_cache_check_size(brw); + if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); @@ -359,54 +422,31 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return retval; } -static GLboolean brw_need_rebase( GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_index_buffer *ib, - GLuint min_index ) -{ - if (min_index == 0) - return GL_FALSE; - - if (ib) { - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } - else { - /* Hmm. This isn't quite what I wanted. BRW can actually - * handle the mixed case well enough that we shouldn't need to - * rebase. However, it's probably not very common, nor hugely - * expensive to do it this way: - */ - if (!vbo_all_varyings_in_vbos(arrays)) - return GL_TRUE; - else - return GL_FALSE; - } -} - - void brw_draw_prims( GLcontext *ctx, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ) { GLboolean retval; - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (brw_need_rebase( ctx, arrays, ib, min_index )) { - vbo_rebase_prims( ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - - return; + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } } /* Make a first attempt at drawing: diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index 9aebbdb1b8..2a14db217f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -39,6 +39,7 @@ void brw_draw_prims( GLcontext *ctx, const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, GLuint min_index, GLuint max_index ); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 73d6dea01e..765ae5a2fe 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/state.h" #include "main/api_validate.h" @@ -156,7 +156,13 @@ static GLuint byte_types_scale[5] = { }; -static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) +/** + * Given vertex array type/size/format/normalized info, return + * the appopriate hardware surface type. + * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. + */ +static GLuint get_surface_type( GLenum type, GLuint size, + GLenum format, GLboolean normalized ) { if (INTEL_DEBUG & DEBUG_VERTS) _mesa_printf("type %s size %d normalized %d\n", @@ -171,11 +177,20 @@ static GLuint get_surface_type( GLenum type, GLuint size, GLboolean normalized ) case GL_BYTE: return byte_types_norm[size]; case GL_UNSIGNED_INT: return uint_types_norm[size]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_norm[size]; + case GL_UNSIGNED_BYTE: + if (format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + assert(size == 4); + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + else { + return ubyte_types_norm[size]; + } default: assert(0); return 0; } } else { + assert(format == GL_RGBA); /* sanity check */ switch (type) { case GL_DOUBLE: return double_types[size]; case GL_FLOAT: return float_types[size]; @@ -262,6 +277,7 @@ copy_array_to_vbo_array( struct brw_context *brw, struct brw_vertex_element *element, GLuint dst_stride) { + struct intel_context *intel = &brw->intel; GLuint size = element->count * dst_stride; get_space(brw, size, &element->bo, &element->offset); @@ -274,29 +290,52 @@ copy_array_to_vbo_array( struct brw_context *brw, } if (dst_stride == element->glarray->StrideB) { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } } else { - void *data; char *dest; - const char *src = element->glarray->Ptr; + const unsigned char *src = element->glarray->Ptr; int i; - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; + + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } - dri_bo_subdata(element->bo, - element->offset, - size, - data); - _mesa_free(data); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + void *data; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + dri_bo_subdata(element->bo, + element->offset, + size, + data); + + _mesa_free(data); + } } } @@ -304,16 +343,13 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled = 0; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -323,12 +359,13 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + brw->vb.nr_enabled = 0; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + vs_inputs &= ~(1 << i); + brw->vb.enabled[brw->vb.nr_enabled++] = input; } /* XXX: In the rare cases where this happens we fallback all @@ -337,18 +374,17 @@ static void brw_prepare_vertices(struct brw_context *brw) * cases with > 17 vertex attributes enabled, so it probably * isn't an issue at this point. */ - if (nr_enabled >= BRW_VEP_MAX) { + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { intel->Fallback = 1; return; } - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (input->glarray->BufferObj->Name != 0) { + if (_mesa_is_bufferobj(input->glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); @@ -359,7 +395,23 @@ static void brw_prepare_vertices(struct brw_context *brw) dri_bo_reference(input->bo); input->offset = (unsigned long)input->glarray->Ptr; input->stride = input->glarray->StrideB; + input->count = input->glarray->_MaxElement; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); } else { + input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; if (input->bo != NULL) { /* Already-uploaded vertex data is present from a previous * prepare_vertices, but we had to re-validate state due to @@ -371,7 +423,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (i == 0) { + if (input->attrib == VERT_ATTRIB_POS) { /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { @@ -427,8 +479,8 @@ static void brw_prepare_vertices(struct brw_context *brw) brw_prepare_query_begin(brw); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; brw_add_validated_bo(brw, input->bo); } @@ -438,34 +490,44 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; - GLuint nr_enabled = 0; - /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; + brw_emit_query_begin(brw); - tmp &= ~(1<<i); - enabled[nr_enabled++] = input; + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; } - brw_emit_query_begin(brw); - /* Now emit VB and VEP state packets. * * This still defines a hardware VB for each input, even if they * are interleaved or from the same VBO. TBD if this makes a * performance difference. */ - BEGIN_BATCH(1 + nr_enabled * 4, IGNORE_CLIPRECTS); + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + nr_enabled * 4) - 2)); + ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | BRW_VB0_ACCESS_VERTEXDATA | @@ -473,17 +535,30 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - OUT_BATCH(brw->vb.max_index); + if (BRW_IS_IGDNG(brw)) { + if (input->stride) { + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->stride * input->count); + } else { + assert(input->count == 1); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->element_size); + } + } else + OUT_BATCH(input->stride ? input->count : 0); OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); - BEGIN_BATCH(1 + nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr_enabled * 2) - 2)); - for (i = 0; i < nr_enabled; i++) { - struct brw_vertex_element *input = enabled[i]; + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, input->glarray->Size, + input->glarray->Format, input->glarray->Normalized); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; @@ -502,11 +577,18 @@ static void brw_emit_vertices(struct brw_context *brw) BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); } @@ -530,26 +612,36 @@ static void brw_prepare_indices(struct brw_context *brw) dri_bo *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; + GLuint ib_type_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_type_size = get_size(index_buffer->type); + ib_size = ib_type_size * index_buffer->count; bufferobj = index_buffer->obj;; /* Turn into a proper VBO: */ - if (!bufferobj->Name) { - + if (!_mesa_is_bufferobj(bufferobj)) { + brw->ib.start_vertex_offset = 0; + /* Get new bufferobj, offset: */ get_space(brw, ib_size, &bo, &offset); /* Straight upload */ - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } } else { - offset = (GLuint)index_buffer->ptr; + offset = (GLuint) (unsigned long) index_buffer->ptr; + brw->ib.start_vertex_offset = 0; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. @@ -570,39 +662,62 @@ static void brw_prepare_indices(struct brw_context *brw) bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); dri_bo_reference(bo); + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + offset = 0; + ib_size = bo->size; } } - dri_bo_unreference(brw->ib.bo); - brw->ib.bo = bo; - brw->ib.offset = offset; + if (brw->ib.bo != bo || + brw->ib.offset != offset || + brw->ib.size != ib_size) + { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + brw->ib.size = ib_size; + + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } else { + drm_intel_bo_unreference(bo); + } brw_add_validated_bo(brw, brw->ib.bo); } -static void brw_emit_indices(struct brw_context *brw) +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static void brw_emit_index_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; - /* Emit the indexbuffer packet: */ { struct brw_indexbuffer ib; memset(&ib, 0, sizeof(ib)); - + ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; ib.header.bits.index_format = get_index_type(index_buffer->type); ib.header.bits.cut_index_enable = 0; - BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); @@ -611,18 +726,17 @@ static void brw_emit_indices(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + ib_size); + brw->ib.offset + brw->ib.size); OUT_BATCH( 0 ); ADVANCE_BATCH(); } } -const struct brw_tracked_state brw_indices = { +const struct brw_tracked_state brw_index_buffer = { .dirty = { .mesa = 0, - .brw = BRW_NEW_BATCH | BRW_NEW_INDICES, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, .cache = 0, }, - .prepare = brw_prepare_indices, - .emit = brw_emit_indices, + .emit = brw_emit_index_buffer, }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b3ae4eef33..1df561386e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -62,7 +62,7 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) { - p->current->header.destreg__conditonalmod = conditional; + p->current->header.destreg__conditionalmod = conditional; } void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) @@ -129,3 +129,126 @@ const GLuint *brw_get_program( struct brw_compile *p, return (const GLuint *)p->store; } + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position) +{ + struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +{ + struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + _mesa_free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + _mesa_free(label); + } + c->first_label = NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9e2b39af9b..30603bdd0e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -91,8 +91,13 @@ struct brw_indirect { }; +struct brw_glsl_label; +struct brw_glsl_call; + + + #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -106,9 +111,22 @@ struct brw_compile { GLuint flag_value; GLboolean single_program_flow; struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ }; +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position); + +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + static INLINE int type_sz( GLuint type ) { @@ -152,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < BRW_MAX_MRF); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + reg.type = type; reg.file = file; reg.nr = nr; @@ -513,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { + assert(nr < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); @@ -705,6 +731,13 @@ static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset return ptr; } +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + static INLINE struct brw_instruction *current_insn( struct brw_compile *p) { return &p->store[p->nr_insn]; @@ -783,6 +816,19 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle); +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -802,7 +848,9 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot); + GLboolean eot, + GLuint header_present, + GLuint simd_mode); void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -823,12 +871,24 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c index 91dbbd5af6..29f3f6d02f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_debug.c +++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c @@ -65,6 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_8 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ _mesa_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && @@ -72,8 +73,12 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_1 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + _mesa_printf("imm %f", hwreg.dw1.f); + } else { _mesa_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4e099b5945..241cdc33f8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn, } static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn, void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -233,7 +241,8 @@ void brw_set_src1( struct brw_instruction *insn, -static void brw_set_math_message( struct brw_instruction *insn, +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint msg_length, GLuint response_length, GLuint function, @@ -244,18 +253,35 @@ static void brw_set_math_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.math.function = function; - insn->bits3.math.int_type = integer_type; - insn->bits3.math.precision = low_precision; - insn->bits3.math.saturate = saturate; - insn->bits3.math.data_type = dataType; - insn->bits3.math.response_length = response_length; - insn->bits3.math.msg_length = msg_length; - insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; - insn->bits3.math.end_of_thread = 0; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } } -static void brw_set_urb_message( struct brw_instruction *insn, + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, GLboolean allocate, GLboolean used, GLuint msg_length, @@ -265,21 +291,64 @@ static void brw_set_urb_message( struct brw_instruction *insn, GLuint offset, GLuint swizzle_control ) { - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - insn->bits3.urb.response_length = response_length; - insn->bits3.urb.msg_length = msg_length; - insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; - insn->bits3.urb.end_of_thread = end_of_thread; +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } } -static void brw_set_dp_write_message( struct brw_instruction *insn, +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -290,18 +359,33 @@ static void brw_set_dp_write_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; - insn->bits3.dp_write.response_length = response_length; - insn->bits3.dp_write.msg_length = msg_length; - insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits3.urb.end_of_thread = end_of_thread; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } } -static void brw_set_dp_read_message( struct brw_instruction *insn, +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -312,28 +396,57 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; - insn->bits3.dp_read.msg_control = msg_control; - insn->bits3.dp_read.msg_type = msg_type; - insn->bits3.dp_read.target_cache = target_cache; - insn->bits3.dp_read.response_length = response_length; - insn->bits3.dp_read.msg_length = msg_length; - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits3.dp_read.end_of_thread = end_of_thread; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLboolean eot) + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { + assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_G4X(brw)) { + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -368,8 +481,8 @@ static struct brw_instruction *next_insn( struct brw_compile *p, /* Reset this one-shot flag: */ - if (p->current->header.destreg__conditonalmod) { - p->current->header.destreg__conditonalmod = 0; + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; p->current->header.predicate_control = BRW_PREDICATE_NORMAL; } @@ -407,7 +520,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, * Convenience routines. */ #define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -415,7 +528,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -469,12 +582,16 @@ void brw_NOP(struct brw_compile *p) */ struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; @@ -531,6 +648,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -557,8 +678,8 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = insn - if_insn; - if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; if_insn->bits3.if_else.pad0 = 0; } @@ -568,6 +689,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing @@ -599,11 +725,11 @@ void brw_ENDIF(struct brw_compile *p, /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { @@ -674,9 +800,13 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) + struct brw_instruction *do_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) insn = next_insn(p, BRW_OPCODE_ADD); @@ -697,7 +827,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn + 1; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } @@ -716,11 +846,15 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); - jmp_insn->bits3.ud = (landing - jmp_insn) - 1; + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } @@ -737,7 +871,7 @@ void brw_CMP(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); - insn->header.destreg__conditonalmod = conditional; + insn->header.destreg__conditionalmod = conditional; brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -762,7 +896,7 @@ void brw_CMP(struct brw_compile *p, * Helpers for the various SEND message types: */ -/* Invert 8 values +/** Extended math function, float[8]. */ void brw_math( struct brw_compile *p, struct brw_reg dest, @@ -781,11 +915,12 @@ void brw_math( struct brw_compile *p, * instructions. */ insn->header.predicate_control = 0; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -794,7 +929,9 @@ void brw_math( struct brw_compile *p, data_type); } -/* Use 2 send instructions to invert 16 elements +/** + * Extended math function, float[16]. + * Use 2 send instructions. */ void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -815,11 +952,12 @@ void brw_math_16( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -831,11 +969,12 @@ void brw_math_16( struct brw_compile *p, */ insn = next_insn(p, BRW_OPCODE_SEND); insn->header.compression_control = BRW_COMPRESSION_2NDHALF; - insn->header.destreg__conditonalmod = msg_reg_nr+1; + insn->header.destreg__conditionalmod = msg_reg_nr+1; brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -847,22 +986,26 @@ void brw_math_16( struct brw_compile *p, } - - +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -873,13 +1016,14 @@ void brw_dp_WRITE_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_dp_write_message(insn, - 255, /* bti */ + brw_set_dp_write_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, @@ -887,24 +1031,29 @@ void brw_dp_WRITE_16( struct brw_compile *p, 0, /* response_length */ 0); /* eot */ } - } +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); + /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -913,16 +1062,17 @@ void brw_dp_READ_16( struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - brw_set_dp_read_message(insn, - 255, /* bti */ - 3, /* msg_control */ + brw_set_dp_read_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache */ + 1, /* target cache (render/scratch) */ 1, /* msg_length */ 2, /* response_length */ 0); /* eot */ @@ -930,24 +1080,156 @@ void brw_dp_READ_16( struct brw_compile *p, } +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - GLboolean eot) + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); - brw_set_dp_write_message(insn, + brw_set_dp_write_message(p->brw, + insn, binding_table_index, BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ @@ -958,7 +1240,11 @@ void brw_fb_WRITE(struct brw_compile *p, } - +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -969,12 +1255,14 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot) + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { GLboolean need_stall = 0; - if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1006,7 +1294,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1034,7 +1322,7 @@ void brw_SAMPLE(struct brw_compile *p, insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1044,17 +1332,18 @@ void brw_SAMPLE(struct brw_compile *p, msg_type, response_length, msg_length, - eot); + eot, + header_present, + simd_mode); } - if (need_stall) - { + if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } */ brw_push_insn_state(p); - brw_set_compression_control(p, GL_FALSE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, reg, reg); brw_pop_insn_state(p); } @@ -1080,15 +1369,16 @@ void brw_urb_WRITE(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); + assert(msg_length < BRW_MAX_MRF); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_urb_message(insn, + brw_set_urb_message(p->brw, + insn, allocate, used, msg_length, @@ -1099,3 +1389,37 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 4ea660a51a..d27c6c24ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -37,6 +37,9 @@ #include "tnl/tnl.h" #include "brw_context.h" #include "brw_fallback.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "glapi/glapi.h" @@ -44,23 +47,16 @@ static GLboolean do_check_fallback(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; - /* BRW_NEW_METAOPS - */ - if (brw->metaops.active) - return GL_FALSE; - if (brw->intel.no_rast) { DBG("FALLBACK: rasterization disabled\n"); return GL_TRUE; } /* _NEW_RENDERMODE - * - * XXX: need to save/restore RenderMode in metaops state, or - * somehow move to a new attribs pointer: */ if (ctx->RenderMode != GL_RENDER) { DBG("FALLBACK: render mode\n"); @@ -70,7 +66,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_TEXTURE: */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_ReallyEnabled) { struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; @@ -83,12 +79,39 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_STENCIL */ - if (brw->attribs.Stencil->Enabled && - !brw->intel.hw_stencil) { + if (ctx->Stencil._Enabled && + (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { DBG("FALLBACK: stencil\n"); return GL_TRUE; } + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } + } return GL_FALSE; } @@ -101,7 +124,7 @@ static void check_fallback(struct brw_context *brw) const struct brw_tracked_state brw_check_fallback = { .dirty = { .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, - .brw = BRW_NEW_METAOPS, + .brw = 0, .cache = 0 }, .prepare = check_fallback diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index a8b74a0afe..48c2b9a41c 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -54,12 +54,17 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 18a4537c32..bbb991ea2e 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -62,6 +62,7 @@ struct brw_gs_compile { GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 22e0d25c2e..a9b2aa2eac 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -101,6 +101,23 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, BRW_URB_SWIZZLE_NONE); } +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} void brw_gs_quads( struct brw_gs_compile *c ) @@ -110,6 +127,8 @@ void brw_gs_quads( struct brw_gs_compile *c ) /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); @@ -120,6 +139,8 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 4); + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); @@ -129,6 +150,9 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) void brw_gs_tris( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); @@ -137,6 +161,9 @@ void brw_gs_tris( struct brw_gs_compile *c ) void brw_gs_lines( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); } @@ -144,6 +171,9 @@ void brw_gs_lines( struct brw_gs_compile *c ) void brw_gs_points( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 27023cf034..ed9d2ffe60 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -93,7 +93,13 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; - gs.thread4.max_threads = 0; /* Hardware requirement */ + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; + + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; if (INTEL_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_metaops.c b/src/mesa/drivers/dri/i965/brw_metaops.c deleted file mode 100644 index 41bfa2e256..0000000000 --- a/src/mesa/drivers/dri/i965/brw_metaops.c +++ /dev/null @@ -1,583 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * frame buffer texture by Gary Wong <gtw@gnu.org> - */ - - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" - -#include "shader/arbprogparse.h" - -#include "intel_screen.h" -#include "intel_batchbuffer.h" -#include "intel_regions.h" -#include "intel_buffers.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_draw.h" -#include "brw_fallback.h" - -#define INIT(brw, STRUCT, ATTRIB) \ -do { \ - brw->attribs.ATTRIB = &ctx->ATTRIB; \ -} while (0) - -#define DUP(brw, STRUCT, ATTRIB) \ -do { \ - brw->metaops.attribs.ATTRIB = MALLOC_STRUCT(STRUCT); \ - memcpy(brw->metaops.attribs.ATTRIB, \ - brw->attribs.ATTRIB, \ - sizeof(struct STRUCT)); \ -} while (0) - - -#define INSTALL(brw, ATTRIB, STATE) \ -do { \ - brw->attribs.ATTRIB = brw->metaops.attribs.ATTRIB; \ - brw->state.dirty.mesa |= STATE; \ -} while (0) - -#define RESTORE(brw, ATTRIB, STATE) \ -do { \ - brw->attribs.ATTRIB = &brw->intel.ctx.ATTRIB; \ - brw->state.dirty.mesa |= STATE; \ -} while (0) - -static void init_attribs( struct brw_context *brw ) -{ - DUP(brw, gl_colorbuffer_attrib, Color); - DUP(brw, gl_depthbuffer_attrib, Depth); - DUP(brw, gl_fog_attrib, Fog); - DUP(brw, gl_hint_attrib, Hint); - DUP(brw, gl_light_attrib, Light); - DUP(brw, gl_line_attrib, Line); - DUP(brw, gl_point_attrib, Point); - DUP(brw, gl_polygon_attrib, Polygon); - DUP(brw, gl_scissor_attrib, Scissor); - DUP(brw, gl_stencil_attrib, Stencil); - DUP(brw, gl_texture_attrib, Texture); - DUP(brw, gl_transform_attrib, Transform); - DUP(brw, gl_viewport_attrib, Viewport); - DUP(brw, gl_vertex_program_state, VertexProgram); - DUP(brw, gl_fragment_program_state, FragmentProgram); -} - -static void install_attribs( struct brw_context *brw ) -{ - INSTALL(brw, Color, _NEW_COLOR); - INSTALL(brw, Depth, _NEW_DEPTH); - INSTALL(brw, Fog, _NEW_FOG); - INSTALL(brw, Hint, _NEW_HINT); - INSTALL(brw, Light, _NEW_LIGHT); - INSTALL(brw, Line, _NEW_LINE); - INSTALL(brw, Point, _NEW_POINT); - INSTALL(brw, Polygon, _NEW_POLYGON); - INSTALL(brw, Scissor, _NEW_SCISSOR); - INSTALL(brw, Stencil, _NEW_STENCIL); - INSTALL(brw, Texture, _NEW_TEXTURE); - INSTALL(brw, Transform, _NEW_TRANSFORM); - INSTALL(brw, Viewport, _NEW_VIEWPORT); - INSTALL(brw, VertexProgram, _NEW_PROGRAM); - INSTALL(brw, FragmentProgram, _NEW_PROGRAM); -} - -static void restore_attribs( struct brw_context *brw ) -{ - RESTORE(brw, Color, _NEW_COLOR); - RESTORE(brw, Depth, _NEW_DEPTH); - RESTORE(brw, Fog, _NEW_FOG); - RESTORE(brw, Hint, _NEW_HINT); - RESTORE(brw, Light, _NEW_LIGHT); - RESTORE(brw, Line, _NEW_LINE); - RESTORE(brw, Point, _NEW_POINT); - RESTORE(brw, Polygon, _NEW_POLYGON); - RESTORE(brw, Scissor, _NEW_SCISSOR); - RESTORE(brw, Stencil, _NEW_STENCIL); - RESTORE(brw, Texture, _NEW_TEXTURE); - RESTORE(brw, Transform, _NEW_TRANSFORM); - RESTORE(brw, Viewport, _NEW_VIEWPORT); - RESTORE(brw, VertexProgram, _NEW_PROGRAM); - RESTORE(brw, FragmentProgram, _NEW_PROGRAM); -} - - -static const char *vp_prog = - "!!ARBvp1.0\n" - "MOV result.color, vertex.color;\n" - "MOV result.position, vertex.position;\n" - "END\n"; - -static const char *fp_prog = - "!!ARBfp1.0\n" - "MOV result.color, fragment.color;\n" - "END\n"; - -static const char *fp_tex_prog = - "!!ARBfp1.0\n" - "TEMP a;\n" - "ADD a, fragment.position, program.local[0];\n" - "MUL a, a, program.local[1];\n" - "TEX result.color, a, texture[0], 2D;\n" - "MOV result.depth.z, fragment.position;\n" - "END\n"; - -/* Derived values of importance: - * - * FragmentProgram->_Current - * VertexProgram->_Enabled - * brw->vertex_program - * DrawBuffer->_ColorDrawBufferIndexes[0] - * - * - * More if drawpixels-through-texture is added. - */ -static void init_metaops_state( struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - brw->metaops.vbo = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); - - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - 4096, - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->metaops.vbo); - - brw->metaops.fp = (struct gl_fragment_program *) - ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 ); - - brw->metaops.fp_tex = (struct gl_fragment_program *) - ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 1 ); - - brw->metaops.vp = (struct gl_vertex_program *) - ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 1 ); - - _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, - fp_prog, strlen(fp_prog), - brw->metaops.fp); - - _mesa_parse_arb_fragment_program(ctx, GL_FRAGMENT_PROGRAM_ARB, - fp_tex_prog, strlen(fp_tex_prog), - brw->metaops.fp_tex); - - _mesa_parse_arb_vertex_program(ctx, GL_VERTEX_PROGRAM_ARB, - vp_prog, strlen(vp_prog), - brw->metaops.vp); - - brw->metaops.attribs.VertexProgram->_Current = brw->metaops.vp; - brw->metaops.attribs.VertexProgram->_Enabled = GL_TRUE; - - brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; -} - -static void meta_flat_shade( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.Light->ShadeModel = GL_FLAT; - brw->state.dirty.mesa |= _NEW_LIGHT; -} - - -static void meta_no_stencil_write( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.Stencil->Enabled = GL_FALSE; - brw->metaops.attribs.Stencil->WriteMask[0] = GL_FALSE; - brw->state.dirty.mesa |= _NEW_STENCIL; -} - -static void meta_no_depth_write( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.Depth->Test = GL_FALSE; - brw->metaops.attribs.Depth->Mask = GL_FALSE; - brw->state.dirty.mesa |= _NEW_DEPTH; -} - - -static void meta_depth_replace( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - /* ctx->Driver.Enable( ctx, GL_DEPTH_TEST, GL_TRUE ) - * ctx->Driver.DepthMask( ctx, GL_TRUE ) - */ - brw->metaops.attribs.Depth->Test = GL_TRUE; - brw->metaops.attribs.Depth->Mask = GL_TRUE; - brw->state.dirty.mesa |= _NEW_DEPTH; - - /* ctx->Driver.DepthFunc( ctx, GL_ALWAYS ) - */ - brw->metaops.attribs.Depth->Func = GL_ALWAYS; - - brw->state.dirty.mesa |= _NEW_DEPTH; -} - - -static void meta_stencil_replace( struct intel_context *intel, - GLuint s_mask, - GLuint s_clear) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.Stencil->Enabled = GL_TRUE; - brw->metaops.attribs.Stencil->WriteMask[0] = s_mask; - brw->metaops.attribs.Stencil->ValueMask[0] = 0xff; - brw->metaops.attribs.Stencil->Ref[0] = s_clear; - brw->metaops.attribs.Stencil->Function[0] = GL_ALWAYS; - brw->metaops.attribs.Stencil->FailFunc[0] = GL_REPLACE; - brw->metaops.attribs.Stencil->ZPassFunc[0] = GL_REPLACE; - brw->metaops.attribs.Stencil->ZFailFunc[0] = GL_REPLACE; - brw->state.dirty.mesa |= _NEW_STENCIL; -} - - -static void meta_color_mask( struct intel_context *intel, GLboolean state ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - if (state) - COPY_4V(brw->metaops.attribs.Color->ColorMask, - brw->intel.ctx.Color.ColorMask); - else - ASSIGN_4V(brw->metaops.attribs.Color->ColorMask, 0, 0, 0, 0); - - brw->state.dirty.mesa |= _NEW_COLOR; -} - -static void meta_no_texture( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp; - - brw->metaops.attribs.Texture->CurrentUnit = 0; - brw->metaops.attribs.Texture->_EnabledUnits = 0; - brw->metaops.attribs.Texture->_EnabledCoordUnits = 0; - brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = 0; - brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = 0; - - brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM; -} - -static void meta_texture_blend_replace(struct intel_context *intel) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw->metaops.attribs.Texture->CurrentUnit = 0; - brw->metaops.attribs.Texture->_EnabledUnits = 1; - brw->metaops.attribs.Texture->_EnabledCoordUnits = 1; - brw->metaops.attribs.Texture->Unit[ 0 ].Enabled = TEXTURE_2D_BIT; - brw->metaops.attribs.Texture->Unit[ 0 ]._ReallyEnabled = TEXTURE_2D_BIT; - brw->metaops.attribs.Texture->Unit[ 0 ].Current2D = - intel->frame_buffer_texobj; - brw->metaops.attribs.Texture->Unit[ 0 ]._Current = - intel->frame_buffer_texobj; - - brw->state.dirty.mesa |= _NEW_TEXTURE | _NEW_PROGRAM; -} - -static void meta_import_pixel_state(struct intel_context *intel) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - RESTORE(brw, Color, _NEW_COLOR); - RESTORE(brw, Depth, _NEW_DEPTH); - RESTORE(brw, Fog, _NEW_FOG); - RESTORE(brw, Scissor, _NEW_SCISSOR); - RESTORE(brw, Stencil, _NEW_STENCIL); - RESTORE(brw, Texture, _NEW_TEXTURE); - RESTORE(brw, FragmentProgram, _NEW_PROGRAM); -} - -static void meta_frame_buffer_texture( struct intel_context *intel, - GLint xoff, GLint yoff ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - struct intel_region *region = intel_drawbuf_region( intel ); - - INSTALL(brw, FragmentProgram, _NEW_PROGRAM); - - brw->metaops.attribs.FragmentProgram->_Current = brw->metaops.fp_tex; - /* This is unfortunate, but seems to be necessary, since later on we - will end up calling _mesa_load_state_parameters to lookup the - local params (below), and that will want to look in ctx.FragmentProgram - instead of brw->attribs.FragmentProgram. */ - intel->ctx.FragmentProgram.Current = brw->metaops.fp_tex; - - brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 0 ] = xoff; - brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 1 ] = yoff; - brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 2 ] = 0.0; - brw->metaops.fp_tex->Base.LocalParams[ 0 ][ 3 ] = 0.0; - brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 0 ] = - 1.0 / region->pitch; - brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 1 ] = - -1.0 / region->height; - brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 2 ] = 0.0; - brw->metaops.fp_tex->Base.LocalParams[ 1 ][ 3 ] = 1.0; - - brw->state.dirty.mesa |= _NEW_PROGRAM; -} - - -static void meta_draw_region( struct intel_context *intel, - struct intel_region *draw_region, - struct intel_region *depth_region ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - if (!brw->metaops.saved_draw_region) { - brw->metaops.saved_draw_region = brw->state.draw_regions[0]; - brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; - brw->metaops.saved_depth_region = brw->state.depth_region; - } - - brw->state.draw_regions[0] = draw_region; - brw->state.nr_draw_regions = 1; - brw->state.depth_region = depth_region; - - if (intel->frame_buffer_texobj != NULL) - brw_FrameBufferTexDestroy(brw); - - if (draw_region) - brw_FrameBufferTexInit(brw, draw_region); - - brw->state.dirty.mesa |= _NEW_BUFFERS; -} - - -static void meta_draw_quad(struct intel_context *intel, - GLfloat x0, GLfloat x1, - GLfloat y0, GLfloat y1, - GLfloat z, - GLuint color, - GLfloat s0, GLfloat s1, - GLfloat t0, GLfloat t1) -{ - GLcontext *ctx = &intel->ctx; - struct brw_context *brw = brw_context(&intel->ctx); - struct gl_client_array pos_array; - struct gl_client_array color_array; - struct gl_client_array *attribs[VERT_ATTRIB_MAX]; - struct _mesa_prim prim[1]; - GLfloat pos[4][3]; - - ctx->Driver.BufferData(ctx, - GL_ARRAY_BUFFER_ARB, - sizeof(pos) + sizeof(color), - NULL, - GL_DYNAMIC_DRAW_ARB, - brw->metaops.vbo); - - pos[0][0] = x0; - pos[0][1] = y0; - pos[0][2] = z; - - pos[1][0] = x1; - pos[1][1] = y0; - pos[1][2] = z; - - pos[2][0] = x1; - pos[2][1] = y1; - pos[2][2] = z; - - pos[3][0] = x0; - pos[3][1] = y1; - pos[3][2] = z; - - ctx->Driver.BufferSubData(ctx, - GL_ARRAY_BUFFER_ARB, - 0, - sizeof(pos), - pos, - brw->metaops.vbo); - - /* Convert incoming ARGB to required RGBA */ - /* Note this color is stored as GL_UNSIGNED_BYTE */ - color = (color & 0xff00ff00) | (((color >> 16) | (color << 16)) & 0xff00ff); - - ctx->Driver.BufferSubData(ctx, - GL_ARRAY_BUFFER_ARB, - sizeof(pos), - sizeof(color), - &color, - brw->metaops.vbo); - - /* Ignoring texture coords. - */ - - memset(attribs, 0, VERT_ATTRIB_MAX * sizeof(*attribs)); - - attribs[VERT_ATTRIB_POS] = &pos_array; - attribs[VERT_ATTRIB_POS]->Ptr = 0; - attribs[VERT_ATTRIB_POS]->Type = GL_FLOAT; - attribs[VERT_ATTRIB_POS]->Enabled = 1; - attribs[VERT_ATTRIB_POS]->Size = 3; - attribs[VERT_ATTRIB_POS]->StrideB = 3 * sizeof(GLfloat); - attribs[VERT_ATTRIB_POS]->Stride = 3 * sizeof(GLfloat); - attribs[VERT_ATTRIB_POS]->_MaxElement = 4; - attribs[VERT_ATTRIB_POS]->Normalized = 0; - attribs[VERT_ATTRIB_POS]->BufferObj = brw->metaops.vbo; - - attribs[VERT_ATTRIB_COLOR0] = &color_array; - attribs[VERT_ATTRIB_COLOR0]->Ptr = (const GLubyte *)sizeof(pos); - attribs[VERT_ATTRIB_COLOR0]->Type = GL_UNSIGNED_BYTE; - attribs[VERT_ATTRIB_COLOR0]->Enabled = 1; - attribs[VERT_ATTRIB_COLOR0]->Size = 4; - attribs[VERT_ATTRIB_COLOR0]->StrideB = 0; - attribs[VERT_ATTRIB_COLOR0]->Stride = 0; - attribs[VERT_ATTRIB_COLOR0]->_MaxElement = 1; - attribs[VERT_ATTRIB_COLOR0]->Normalized = 1; - attribs[VERT_ATTRIB_COLOR0]->BufferObj = brw->metaops.vbo; - - /* Just ignoring texture coordinates for now. - */ - - memset(prim, 0, sizeof(*prim)); - - prim[0].mode = GL_TRIANGLE_FAN; - prim[0].begin = 1; - prim[0].end = 1; - prim[0].weak = 0; - prim[0].pad = 0; - prim[0].start = 0; - prim[0].count = 4; - - brw_draw_prims(&brw->intel.ctx, - (const struct gl_client_array **)attribs, - prim, 1, - NULL, - 0, - 3 ); -} - - -static void install_meta_state( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - GLuint i; - - if (!brw->metaops.vbo) { - init_metaops_state(brw); - } - - install_attribs(brw); - - meta_no_texture(&brw->intel); - meta_flat_shade(&brw->intel); - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - brw->metaops.restore_draw_buffers[i] - = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; - } - brw->metaops.restore_num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; - - brw->metaops.restore_fp = ctx->FragmentProgram.Current; - - /* This works without adjusting refcounts. Fix later? - */ - brw->metaops.saved_draw_region = brw->state.draw_regions[0]; - brw->metaops.saved_nr_draw_regions = brw->state.nr_draw_regions; - brw->metaops.saved_depth_region = brw->state.depth_region; - brw->metaops.active = 1; - - brw->state.dirty.brw |= BRW_NEW_METAOPS; -} - -static void leave_meta_state( struct intel_context *intel ) -{ - GLcontext *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - GLuint i; - - restore_attribs(brw); - - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - ctx->DrawBuffer->_ColorDrawBufferIndexes[i] - = brw->metaops.restore_draw_buffers[i]; - } - ctx->DrawBuffer->_NumColorDrawBuffers = brw->metaops.restore_num_draw_buffers; - - ctx->FragmentProgram.Current = brw->metaops.restore_fp; - - brw->state.draw_regions[0] = brw->metaops.saved_draw_region; - brw->state.nr_draw_regions = brw->metaops.saved_nr_draw_regions; - brw->state.depth_region = brw->metaops.saved_depth_region; - brw->metaops.saved_draw_region = NULL; - brw->metaops.saved_depth_region = NULL; - brw->metaops.active = 0; - - brw->state.dirty.mesa |= _NEW_BUFFERS; - brw->state.dirty.brw |= BRW_NEW_METAOPS; -} - - - -void brw_init_metaops( struct brw_context *brw ) -{ - init_attribs(brw); - - - brw->intel.vtbl.install_meta_state = install_meta_state; - brw->intel.vtbl.leave_meta_state = leave_meta_state; - brw->intel.vtbl.meta_no_depth_write = meta_no_depth_write; - brw->intel.vtbl.meta_no_stencil_write = meta_no_stencil_write; - brw->intel.vtbl.meta_stencil_replace = meta_stencil_replace; - brw->intel.vtbl.meta_depth_replace = meta_depth_replace; - brw->intel.vtbl.meta_color_mask = meta_color_mask; - brw->intel.vtbl.meta_no_texture = meta_no_texture; - brw->intel.vtbl.meta_import_pixel_state = meta_import_pixel_state; - brw->intel.vtbl.meta_frame_buffer_texture = meta_frame_buffer_texture; - brw->intel.vtbl.meta_draw_region = meta_draw_region; - brw->intel.vtbl.meta_draw_quad = meta_draw_quad; - brw->intel.vtbl.meta_texture_blend_replace = meta_texture_blend_replace; -/* brw->intel.vtbl.meta_tex_rect_source = meta_tex_rect_source; */ -/* brw->intel.vtbl.meta_draw_format = set_draw_format; */ -} - -void brw_destroy_metaops( struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - if (brw->metaops.vbo) - ctx->Driver.DeleteBuffer( ctx, brw->metaops.vbo ); - -/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp ); */ -/* ctx->Driver.DeleteProgram( ctx, brw->metaops.fp_tex ); */ -/* ctx->Driver.DeleteProgram( ctx, brw->metaops.vp ); */ -} diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 627705fa9b..ea71857548 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -48,15 +48,16 @@ static void upload_blend_constant_color(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_blend_constant_color bcc; memset(&bcc, 0, sizeof(bcc)); bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = brw->attribs.Color->BlendColor[0]; - bcc.blend_constant_color[1] = brw->attribs.Color->BlendColor[1]; - bcc.blend_constant_color[2] = brw->attribs.Color->BlendColor[2]; - bcc.blend_constant_color[3] = brw->attribs.Color->BlendColor[3]; + bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; + bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; + bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; + bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; BRW_CACHED_BATCH_STRUCT(brw, &bcc); } @@ -100,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -116,13 +118,14 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } @@ -154,10 +157,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else OUT_BATCH(0); - if (!brw->metaops.active) - OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - else - OUT_BATCH(0); + OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); @@ -172,6 +172,7 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); brw_add_validated_bo(brw, brw->cc.state_bo); } @@ -180,13 +181,13 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); + brw_upload_cs_urb_state(brw); } const struct brw_tracked_state brw_psp_urb_cbs = { .dirty = { .mesa = 0, - .brw = BRW_NEW_URB_FENCE | BRW_NEW_METAOPS | BRW_NEW_BATCH, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, .cache = (CACHE_NEW_VS_UNIT | CACHE_NEW_GS_UNIT | CACHE_NEW_GS_PROG | @@ -211,7 +212,7 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = BRW_IS_G4X(brw) ? 6 : 5; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; if (region == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -222,7 +223,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -244,6 +245,8 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + assert(region->tiling != I915_TILING_X); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -259,7 +262,7 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -284,6 +287,7 @@ const struct brw_tracked_state brw_depthbuffer = { static void upload_polygon_stipple(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_polygon_stipple bps; GLuint i; @@ -291,8 +295,21 @@ static void upload_polygon_stipple(struct brw_context *brw) bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; - for (i = 0; i < 32; i++) - bps.stipple[i] = brw->attribs.PolygonStipple[31 - i]; /* invert */ + /* Polygon stipple is provided in OpenGL order, i.e. bottom + * row first. If we're rendering to a window (i.e. the + * default frame buffer object, 0), then we need to invert + * it to match our pixel layout. But if we're rendering + * to a FBO (i.e. any named frame buffer object), we *don't* + * need to invert - we already match the layout. + */ + if (ctx->DrawBuffer->Name == 0) { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + } + else { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ + } BRW_CACHED_BATCH_STRUCT(brw, &bps); } @@ -320,8 +337,22 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), + * we have to invert the Y axis in order to match the OpenGL + * pixel coordinate system, and our offset must be matched + * to the window position. If we're drawing to a FBO + * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate + * system works just fine, and there's no window system to + * worry about. + */ + if (brw->intel.ctx.DrawBuffer->Name == 0) { + bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + } + else { + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + } BRW_CACHED_BATCH_STRUCT(brw, &bpso); } @@ -344,7 +375,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - if (!BRW_IS_G4X(brw)) + if (BRW_IS_965(brw)) return; /* use legacy aa line coverage computation */ @@ -370,6 +401,7 @@ const struct brw_tracked_state brw_aa_line_parameters = { static void upload_line_stipple(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_line_stipple bls; GLfloat tmp; GLint tmpi; @@ -378,10 +410,10 @@ static void upload_line_stipple(struct brw_context *brw) bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; bls.header.length = sizeof(bls)/4 - 2; - bls.bits0.pattern = brw->attribs.Line->StipplePattern; - bls.bits1.repeat_count = brw->attribs.Line->StippleFactor; + bls.bits0.pattern = ctx->Line.StipplePattern; + bls.bits1.repeat_count = ctx->Line.StippleFactor; - tmp = 1.0 / (GLfloat) brw->attribs.Line->StippleFactor; + tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; tmpi = tmp * (1<<13); @@ -480,14 +512,27 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - BEGIN_BATCH(6, IGNORE_CLIPRECTS); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } } const struct brw_tracked_state brw_state_base_address = { diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 0c86911044..bac69187c1 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -38,6 +38,7 @@ #include "brw_context.h" #include "brw_util.h" +#include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -94,7 +95,12 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + _mesa_delete_program( ctx, prog ); } @@ -110,30 +116,36 @@ static void brwProgramStringNotify( GLcontext *ctx, GLenum target, struct gl_program *prog ) { + struct brw_context *brw = brw_context(ctx); + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_context *brw = brw_context(ctx); - struct brw_fragment_program *p = (struct brw_fragment_program *)prog; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + if (fprog->FogOption) { _mesa_append_fog_code(ctx, fprog); fprog->FogOption = GL_NONE; } - if (p == fp) + if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - p->id = brw->program_id++; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { - struct brw_context *brw = brw_context(ctx); - struct brw_vertex_program *p = (struct brw_vertex_program *)prog; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (p == vp) + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (p->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &p->program); + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); } - p->id = brw->program_id++; + newVP->id = brw->program_id++; /* Also tell tnl about it: */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index cb9169e2ee..a195bc32b0 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -146,17 +146,12 @@ static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) { - /* XXX: Need to expose dri_bo_is_idle from bufmgr. */ -#if 0 struct brw_query_object *query = (struct brw_query_object *)q; - if (dri_bo_is_idle(query->bo)) { + if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { brw_queryobj_get_results(query); query->Base.Ready = GL_TRUE; } -#else - brw_wait_query(ctx, q); -#endif } /** Called to set up the query BO and account for its aperture space */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 1a11d54621..e1c2c7777b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,6 +46,7 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -74,7 +75,7 @@ static void compile_sf_prog( struct brw_context *brw, c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { c.point_attrs[i].CoordReplace = - brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0]; + ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; } else { c.point_attrs[i].CoordReplace = GL_FALSE; @@ -128,6 +129,7 @@ static void compile_sf_prog( struct brw_context *brw, */ static void upload_sf_prog(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_sf_prog_key key; memset(&key, 0, sizeof(key)); @@ -158,15 +160,24 @@ static void upload_sf_prog(struct brw_context *brw) break; } - key.do_point_sprite = brw->attribs.Point->PointSprite; - key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + key.do_point_sprite = ctx->Point.PointSprite; + key.SpriteOrigin = ctx->Point.SpriteOrigin; /* _NEW_LIGHT */ - key.do_flat_shading = (brw->attribs.Light->ShadeModel == GL_FLAT); - key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); /* _NEW_POLYGON */ - if (key.do_twoside_color) - key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + if (key.do_twoside_color) { + /* If we're rendering to a FBO, we have to invert the polygon + * face orientation, just as we invert the viewport in + * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be + * nonzero if we're rendering to such an FBO. + */ + key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + } dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, @@ -180,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 1c0fb70fe0..6426b6df9f 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -51,7 +51,8 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint pad:10; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; GLenum SpriteOrigin; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index ffdb0ae6df..ca8f97f9f9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -59,37 +59,6 @@ static GLboolean have_attr(struct brw_sf_compile *c, return (c->key.attrs & (1<<attr)) ? 1 : 0; } -/** - * Sets VERT_RESULT_FOGC.Y for gl_FrontFacing - * - * This is currently executed if the fragment program uses VERT_RESULT_FOGC - * at all, but this could be eliminated with a scan of the FP contents. - */ -static void -do_front_facing( struct brw_sf_compile *c ) -{ - struct brw_compile *p = &c->func; - int i; - - if (!have_attr(c, VERT_RESULT_FOGC)) - return; - - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), - c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L, - c->det, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - for (i = 0; i < 3; i++) { - struct brw_reg fogc = get_vert_attr(c, c->vert[i],FRAG_ATTRIB_FOGC); - brw_MOV(p, get_element(fogc, 1), brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, get_element(fogc, 1), brw_imm_f(1)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_pop_insn_state(p); -} - - /*********************************************************************** * Twoside lighting */ @@ -182,6 +151,8 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint jmpi = 1; + if (!nr) return; @@ -190,18 +161,21 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); @@ -215,7 +189,8 @@ static void do_flatshade_line( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - + GLuint jmpi = 1; + if (!nr) return; @@ -224,13 +199,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); @@ -249,7 +227,7 @@ static void alloc_regs( struct brw_sf_compile *c ) /* Values computed by fixed function unit: */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); c->det = brw_vec1_grf(1, 2); c->dx0 = brw_vec1_grf(1, 3); c->dx2 = brw_vec1_grf(1, 4); @@ -326,9 +304,6 @@ static void invert_det( struct brw_sf_compile *c) } -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, @@ -337,9 +312,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint persp_mask; GLuint linear_mask; + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + if (c->key.do_flat_shading) linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); else @@ -384,7 +366,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) invert_det(c); copy_z_inv_w(c); - do_front_facing(c); if (c->key.do_twoside_color) do_twoside_color(c); @@ -706,7 +687,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -727,7 +708,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -740,7 +721,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 242b7047a1..bc0f076073 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -43,10 +43,12 @@ static void upload_sf_vp(struct brw_context *brw) const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - if (intel_rendering_to_texture(ctx)) { + if (render_to_fbo) { y_scale = 1.0; y_bias = 0; } @@ -55,27 +57,18 @@ static void upload_sf_vp(struct brw_context *brw) y_bias = ctx->DrawBuffer->Height; } - /* _NEW_VIEWPORT, BRW_NEW_METAOPS */ + /* _NEW_VIEWPORT */ - if (!brw->metaops.active) { - const GLfloat *v = ctx->Viewport._WindowMap.m; + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - } else { - sfv.viewport.m00 = 1; - sfv.viewport.m11 = - 1; - sfv.viewport.m22 = 1; - sfv.viewport.m30 = 0; - sfv.viewport.m31 = ctx->DrawBuffer->Height; - sfv.viewport.m32 = 0; - } - - /* _NEW_SCISSOR */ + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ /* The scissor only needs to handle the intersection of drawable and * scissor rect. Clipping to the boundaries of static shared buffers @@ -84,10 +77,20 @@ static void upload_sf_vp(struct brw_context *brw) * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; - sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; + sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } dri_bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); @@ -96,8 +99,9 @@ static void upload_sf_vp(struct brw_context *brw) const struct brw_tracked_state brw_sf_vp = { .dirty = { .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR), - .brw = BRW_NEW_METAOPS, + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = 0, .cache = 0 }, .prepare = upload_sf_vp @@ -109,16 +113,20 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face; - GLboolean scissor, line_smooth, point_sprite, point_attenuated; + GLenum front_face, cull_face, provoking_vertex; + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned render_to_fbo:1; float line_width; float point_size; - GLboolean render_to_texture; }; static void sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_SF_PROG */ @@ -130,22 +138,25 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->urb_size = brw->urb.vsize; key->sfsize = brw->urb.sfsize; - key->scissor = brw->attribs.Scissor->Enabled; - key->front_face = brw->attribs.Polygon->FrontFace; + key->scissor = ctx->Scissor.Enabled; + key->front_face = ctx->Polygon.FrontFace; - if (brw->attribs.Polygon->CullFlag) - key->cull_face = brw->attribs.Polygon->CullFaceMode; + if (ctx->Polygon.CullFlag) + key->cull_face = ctx->Polygon.CullFaceMode; else key->cull_face = GL_NONE; - key->line_width = brw->attribs.Line->Width; - key->line_smooth = brw->attribs.Line->SmoothFlag; + key->line_width = ctx->Line.Width; + key->line_smooth = ctx->Line.SmoothFlag; + + key->point_sprite = ctx->Point.PointSprite; + key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); + key->point_attenuated = ctx->Point._Attenuated; - key->point_sprite = brw->attribs.Point->PointSprite; - key->point_size = brw->attribs.Point->Size; - key->point_attenuated = brw->attribs.Point->_Attenuated; + /* _NEW_LIGHT */ + key->provoking_vertex = ctx->Light.ProvokingVertex; - key->render_to_texture = intel_rendering_to_texture(&brw->intel.ctx); + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } static dri_bo * @@ -154,7 +165,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, { struct brw_sf_unit_state sf; dri_bo *bo; - + int chipset_max_threads; memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; @@ -163,13 +174,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24 threads */ - sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; @@ -192,10 +216,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to texture, and that inverts + /* The viewport is inverted for rendering to a FBO, and that inverts * polygon front/back orientation. */ - sf.sf5.front_winding ^= key->render_to_texture; + sf.sf5.front_winding ^= key->render_to_fbo; switch (key->cull_face) { case GL_FRONT: @@ -225,10 +249,37 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; /* opengl conventions */ + /* _NEW_BUFFERS */ + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + if (!key->render_to_fbo) { + /* Rendering to an OpenGL window */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + } + else { + /* If rendering to an FBO, the pixel coordinate system is + * inverted with respect to the normal OpenGL coordinate + * system, so BRW_RASTRULE_LOWER_RIGHT is correct. + * But this value is listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * It does work on at least some devices, if not all; + * if devices that don't support it can be identified, + * the likely failure case is that points are rasterized + * incorrectly, which is no worse than occurs without + * the value, so we're using it here. + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + } /* XXX clamp max depends on AA vs. non-AA */ + /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; @@ -236,9 +287,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } sf.sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: @@ -252,6 +309,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, &sf, sizeof(sf), NULL, NULL); + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ /* Emit SF program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -292,11 +352,12 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | + _NEW_LIGHT | _NEW_LINE | _NEW_POINT | - _NEW_SCISSOR), - .brw = (BRW_NEW_URB_FENCE | - BRW_NEW_METAOPS), + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index df839c5b30..78572356a3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; @@ -73,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -91,6 +92,21 @@ const struct brw_tracked_state brw_clear_batch_cache; const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; /*********************************************************************** * brw_state.c @@ -136,8 +152,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -151,4 +167,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index dc87859f3f..811940edc0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b5166406..e40d7a0416 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b28c57c2bc..e94fa7d2b4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -84,6 +84,19 @@ get_965_surfacetype(unsigned int surfacetype) } } +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + static void dump_wm_surface_state(struct brw_context *brw) { int i; @@ -95,7 +108,7 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, "WM SS%d: NULL\n", i); + fprintf(stderr, " WM SS%d: NULL\n", i); continue; } dri_bo_map(surf_bo, GL_FALSE); @@ -103,8 +116,9 @@ static void dump_wm_surface_state(struct brw_context *brw) surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s\n", - get_965_surfacetype(surf->ss0.surface_type)); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); state_out(name, surf, surfoff, 1, "offset\n"); state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); @@ -112,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); dri_bo_unmap(surf_bo); } @@ -162,6 +178,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog) fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; } dri_bo_unmap(prog); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 4845859b3e..414620d0b3 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,27 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, + &brw_index_buffer, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +192,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -231,11 +206,10 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), - DEFINE_BIT(BRW_NEW_INPUT_VARYING), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_METAOPS), DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), @@ -298,6 +272,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) */ void brw_validate_state( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; @@ -314,13 +289,13 @@ void brw_validate_state( struct brw_context *brw ) state->brw |= ~0; } - if (brw->fragment_program != brw->attribs.FragmentProgram->_Current) { - brw->fragment_program = brw->attribs.FragmentProgram->_Current; + if (brw->fragment_program != ctx->FragmentProgram._Current) { + brw->fragment_program = ctx->FragmentProgram._Current; brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; } - if (brw->vertex_program != brw->attribs.VertexProgram->_Current) { - brw->vertex_program = brw->attribs.VertexProgram->_Current; + if (brw->vertex_program != ctx->VertexProgram._Current) { + brw->vertex_program = ctx->VertexProgram._Current; brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; } @@ -336,7 +311,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -347,6 +322,19 @@ void brw_validate_state( struct brw_context *brw ) } } } + + /* Make sure that the textures which are referenced by the current + * brw fragment program are actually present/valid. + * If this fails, we can experience GPU lock-ups. + */ + { + const struct brw_fragment_program *fp; + fp = brw_fragment_program_const(brw->fragment_program); + if (fp) { + assert((fp->tex_units_used & ctx->Texture._EnabledUnits) + == fp->tex_units_used); + } + } } @@ -367,8 +355,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +385,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 4e577d0f6a..66d4127271 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -33,6 +33,14 @@ #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + /* Command packets: */ struct header @@ -434,12 +442,12 @@ struct brw_urb_fence { GLuint sf_fence:10; GLuint vf_fence:10; - GLuint cs_fence:10; - GLuint pad:2; + GLuint cs_fence:11; + GLuint pad:1; } bits1; }; -struct brw_constant_buffer_state /* previously brw_command_streamer */ +struct brw_cs_urb_state { struct header header; @@ -815,7 +823,9 @@ struct brw_gs_unit_state struct { - GLuint pad0:10; + GLuint pad0:8; + GLuint rendering_enable:1; /* for IGDNG */ + GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; GLuint pad1:1; @@ -923,6 +933,28 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; + + /* for IGDNG only */ + struct { + GLuint pad0:1; + GLuint grf_reg_count_1:3; + GLuint pad1:2; + GLuint kernel_start_pointer_1:26; + } wm8; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_2:3; + GLuint pad1:2; + GLuint kernel_start_pointer_2:26; + } wm9; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_3:3; + GLuint pad1:2; + GLuint kernel_start_pointer_3:26; + } wm10; }; struct brw_sampler_default_color { @@ -1031,10 +1063,10 @@ struct brw_surface_state GLuint writedisable_green:1; GLuint writedisable_red:1; GLuint writedisable_alpha:1; - GLuint surface_format:9; + GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ GLuint data_return_format:1; GLuint pad0:1; - GLuint surface_type:3; + GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ } ss0; struct { @@ -1075,7 +1107,7 @@ struct brw_surface_state GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; - } ss5; /* NEW in Integrated Graphics Device */ + } ss5; /* New in G4X */ }; @@ -1168,7 +1200,7 @@ struct brw_instruction GLuint predicate_control:4; GLuint predicate_inverse:1; GLuint execution_size:3; - GLuint destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ GLuint pad0:2; GLuint debug_control:1; GLuint saturate:1; @@ -1196,7 +1228,9 @@ struct brw_instruction GLuint dest_reg_type:3; GLuint src0_reg_file:2; GLuint src0_reg_type:3; - GLuint pad:6; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ GLuint dest_horiz_stride:2; @@ -1211,7 +1245,7 @@ struct brw_instruction GLuint src0_reg_type:3; GLuint src1_reg_file:2; GLuint src1_reg_type:3; - GLuint pad0:1; + GLuint pad:1; GLuint dest_writemask:4; GLuint dest_subreg_nr:1; GLuint dest_reg_nr:8; @@ -1298,6 +1332,14 @@ struct brw_instruction GLuint pad1:6; } ia16; + struct + { + GLuint pad:26; + GLuint end_of_thread:1; + GLuint pad1:1; + GLuint sfid:4; + } send_igdng; /* for IGDNG only */ + } bits2; union @@ -1308,7 +1350,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1323,7 +1365,7 @@ struct brw_instruction GLuint src1_reg_nr:8; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_swz_z:2; GLuint src1_swz_w:2; GLuint pad1:1; @@ -1337,7 +1379,7 @@ struct brw_instruction GLuint src1_subreg_nr:3; GLuint src1_abs:1; GLuint src1_negate:1; - GLuint pad0:1; + GLuint src1_address_mode:1; GLuint src1_horiz_stride:2; GLuint src1_width:3; GLuint src1_vert_stride:4; @@ -1385,6 +1427,21 @@ struct brw_instruction } math; struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint snapshot:1; + GLuint pad0:10; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } math_igdng; + + struct { GLuint binding_table_index:8; GLuint sampler:4; GLuint return_format:2; @@ -1407,9 +1464,38 @@ struct brw_instruction GLuint end_of_thread:1; } sampler_g4x; + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint simd_mode:2; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } sampler_igdng; + struct brw_urb_immediate urb; struct { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } urb_igdng; + + struct { GLuint binding_table_index:8; GLuint msg_control:4; GLuint msg_type:2; @@ -1423,6 +1509,19 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_read_igdng; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint pixel_scoreboard_clear:1; GLuint msg_type:3; @@ -1435,6 +1534,20 @@ struct brw_instruction } dp_write; struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_write_igdng; + + struct { GLuint pad:16; GLuint response_length:4; GLuint msg_length:4; @@ -1443,8 +1556,18 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + struct { + GLuint pad:19; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } generic_igdng; + GLint d; GLuint ud; + float f; } bits3; }; diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 0bb6f176a0..71bff166dd 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -32,21 +32,12 @@ #include "main/glheader.h" #include "main/mtypes.h" -#include "main/imports.h" -#include "main/simple_list.h" -#include "main/enums.h" -#include "main/image.h" #include "main/teximage.h" -#include "main/texstore.h" -#include "main/texformat.h" - -#include "texmem.h" #include "intel_context.h" #include "intel_regions.h" #include "intel_tex.h" #include "brw_context.h" -#include "brw_defines.h" void brw_FrameBufferTexInit( struct brw_context *brw, @@ -86,11 +77,12 @@ void brw_FrameBufferTexDestroy( struct brw_context *brw ) */ void brw_validate_textures( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; int i; for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_ReallyEnabled) { intel_finalize_mipmap_tree(intel, i); diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb4..5986cbffad 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -37,17 +36,93 @@ #include "intel_tex_layout.h" #include "intel_context.h" #include "main/macros.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(intel->intelScreen->deviceID)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + for (level = mt->first_level; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == mt->first_level + 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,25 +134,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + if (mt->compressed) { - align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); } - pack_x_pitch = mt->pitch; + pack_x_pitch = width; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +164,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,40 +173,50 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; break; } default: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 7673dd36eb..8c6f4355a6 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -143,7 +143,29 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - + + brw->urb.constrained = 0; + + if (BRW_IS_IGDNG(brw)) { + brw->urb.nr_vs_entries = 128; + brw->urb.nr_sf_entries = 48; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + } + } else if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + if (!check_urb_layout(brw)) { brw->urb.nr_vs_entries = limits[VS].min_nr_entries; brw->urb.nr_gs_entries = limits[GS].min_nr_entries; @@ -169,9 +191,8 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) _mesa_printf("URB CONSTRAINED\n"); } - else - brw->urb.constrained = 0; +done: if (INTEL_DEBUG & DEBUG_URB) _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 1db7ceebcf..e3111c6680 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -85,6 +85,7 @@ static void do_vs_prog( struct brw_context *brw, static void brw_upload_vs_prog(struct brw_context *brw) { + GLcontext *ctx = &brw->intel.ctx; struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; @@ -97,14 +98,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); - key.copy_edgeflag = (brw->attribs.Polygon->FrontMode != GL_FILL || - brw->attribs.Polygon->BackMode != GL_FILL); - - /* BRW_NEW_METAOPS - */ - if (brw->metaops.active) - key.know_w_is_one = 1; + key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); /* Make an early check for the key. */ @@ -123,7 +119,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) const struct brw_tracked_state brw_vs_prog = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_POLYGON, - .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS, + .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, .prepare = brw_upload_vs_prog diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 22388ec99d..4a591365c9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -43,7 +43,6 @@ struct brw_vs_prog_key { GLuint program_string_id; GLuint nr_userclip:4; GLuint copy_edgeflag:1; - GLuint know_w_is_one:1; GLuint pad:26; }; @@ -59,6 +58,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; + GLuint first_overflow_output; /**< VERT_ATTRIB_x */ GLuint first_tmp; GLuint last_tmp; @@ -76,6 +76,11 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 6fbac02de6..249a800bf4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -39,8 +39,8 @@ */ struct tracker { GLboolean twoside; - GLubyte active[PROGRAM_OUTPUT+1][128]; - GLuint size_masks[4]; + GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS]; + GLbitfield size_masks[4]; /**< one bit per fragment program input attrib */ }; @@ -53,8 +53,10 @@ static void set_active_component( struct tracker *t, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: + assert(file < PROGRAM_OUTPUT + 1); + assert(index < Elements(t->active[0])); t->active[file][index] |= active; - + break; default: break; } @@ -96,7 +98,7 @@ static GLubyte get_active( struct tracker *t, struct prog_src_register src ) { GLuint i; - GLubyte active = src.NegateBase; /* NOTE! */ + GLubyte active = src.Negate; /* NOTE! */ if (src.RelAddr) return 0xf; @@ -108,10 +110,15 @@ static GLubyte get_active( struct tracker *t, return active; } +/** + * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx. + */ static GLubyte get_output_size( struct tracker *t, GLuint idx ) { - GLubyte active = t->active[PROGRAM_OUTPUT][idx]; + GLubyte active; + assert(idx < VERT_RESULT_MAX); + active = t->active[PROGRAM_OUTPUT][idx]; if (active & (1<<3)) return 4; if (active & (1<<2)) return 3; if (active & (1<<1)) return 2; @@ -123,7 +130,7 @@ static GLubyte get_output_size( struct tracker *t, */ static void calc_sizes( struct tracker *t ) { - GLuint i; + GLint vertRes; if (t->twoside) { t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= @@ -133,12 +140,27 @@ static void calc_sizes( struct tracker *t ) t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - switch (get_output_size(t, i)) { - case 4: t->size_masks[4-1] |= 1<<i; - case 3: t->size_masks[3-1] |= 1<<i; - case 2: t->size_masks[2-1] |= 1<<i; - case 1: t->size_masks[1-1] |= 1<<i; + /* Examine vertex program output sizes to set the size_masks[] info + * which describes the fragment program input sizes. + */ + for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) { + GLint fragAttrib; + + /* map vertex program output index to fragment program input index */ + if (vertRes <= VERT_RESULT_TEX7) + fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0; + else if (vertRes >= VERT_RESULT_VAR0) + fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0; + else + continue; + assert(fragAttrib >= FRAG_ATTRIB_TEX0); + assert(fragAttrib <= FRAG_ATTRIB_MAX); + + switch (get_output_size(t, vertRes)) { + case 4: t->size_masks[4-1] |= 1 << fragAttrib; + case 3: t->size_masks[3-1] |= 1 << fragAttrib; + case 2: t->size_masks[2-1] |= 1 << fragAttrib; + case 1: t->size_masks[1-1] |= 1 << fragAttrib; break; } } @@ -168,9 +190,10 @@ static GLuint get_input_size(struct brw_context *brw, */ static void calc_wm_input_sizes( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; @@ -179,7 +202,7 @@ static void calc_wm_input_sizes( struct brw_context *brw ) memset(&t, 0, sizeof(t)); /* _NEW_LIGHT */ - if (brw->attribs.Light->Model.TwoSide) + if (ctx->Light.Model.TwoSide) t.twoside = 1; for (i = 0; i < VERT_ATTRIB_MAX; i++) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 174331a765..1638ef8111 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -38,18 +38,55 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + -/* Do things as simply as possible. Allocate and populate all regs +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; - GLuint nr_params; + int attributes_in_vue; + + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; + else + c->vp->use_const_buffer = GL_FALSE; + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ - c->r0 = brw_vec8_grf(reg, 0); reg++; + c->r0 = brw_vec8_grf(reg, 0); + reg++; /* User clip planes from curbe: */ @@ -60,39 +97,59 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Deal with curbe alignment: */ - reg += ((6+c->key.nr_userclip+3)/4)*2; + reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params+1)/2; + if (c->vp->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; - c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = nr_params * 4; + } /* Allocate input regs: */ c->nr_inputs = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1<<i)) { + if (c->prog_data.inputs_read & (1 << i)) { c->nr_inputs++; c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. + /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = 0; c->first_output = reg; - mrf = 4; + c->first_overflow_output = 0; + + if (BRW_IS_IGDNG(c->func.brw)) + mrf = 8; + else + mrf = 4; + for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1<<i)) { + if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -103,8 +160,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } } } } @@ -132,17 +198,24 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + for (i = 0; i < 128; i++) { - if (c->output_regs[i].used_in_src) { - c->output_regs[i].reg = brw_vec8_grf(reg, 0); - reg++; - } + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } } c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); reg += 2; - - + /* Some opcodes need an internal temporary: */ c->first_tmp = reg; @@ -152,35 +225,38 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * urb_read_length is the number of registers read from *each* * vertex urb, so is half the amount: */ - c->prog_data.urb_read_length = (c->nr_inputs+1)/2; - - c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; - c->prog_data.total_grf = reg; -} - + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; + if (BRW_IS_IGDNG(c->func.brw)) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + else + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; - return tmp; -} + c->prog_data.total_grf = reg; -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } } +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ static void unalias1( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -200,6 +276,10 @@ static void unalias1( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ static void unalias2( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -222,6 +302,10 @@ static void unalias2( struct brw_vs_compile *c, } } +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ static void unalias3( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, @@ -615,6 +699,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, } brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); } static void emit_lrp_noalias(struct brw_vs_compile *c, @@ -655,13 +741,84 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || relAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (relAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (relAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - GLuint file, + gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -690,13 +847,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -717,10 +878,67 @@ static struct brw_reg deref( struct brw_vs_compile *c, brw_pop_insn_state(p); } + /* NOTE: tmp not released */ return vec8(tmp); } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + case PROGRAM_ENV_PARAM: + if (c->vp->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -732,30 +950,31 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -766,16 +985,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->NegateBase ? 1 : 0; + reg.negate = src->Negate ? 1 : 0; return reg; } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -785,14 +1026,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; - GLboolean need_tmp = (src.NegateBase && + GLboolean need_tmp = (src.Negate && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; @@ -826,10 +1069,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -844,8 +1084,8 @@ static void emit_swz( struct brw_vs_compile *c, if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); - if (src.NegateBase) - brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); @@ -863,6 +1103,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; + int eot; + GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -871,21 +1113,17 @@ static void emit_vertex_write( struct brw_vs_compile *c) } /* Build ndc coords */ - if (!c->key.know_w_is_one) { - ndc = get_tmp(c); - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); - } - else { - ndc = pos; - } + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || - c->key.nr_userclip || - (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one)) + c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -916,7 +1154,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!BRW_IS_G4X(p->brw) && !c->key.know_w_is_one) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -943,7 +1181,23 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - brw_MOV(p, offset(m0, 3), pos); + + if (BRW_IS_IGDNG(p->brw)) { + /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ + brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ + /* m4, m5 contain the distances from vertex to the user clip planeXXX. + * Seems it is useless for us. + * m6 is used for aligning, so that the remainder of vertex element is + * reg-aligned. + */ + brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ + len_vertext_header = 6; + } else { + brw_MOV(p, offset(m0, 3), pos); + len_vertext_header = 2; + } + + eot = (c->first_overflow_output == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -951,62 +1205,126 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - c->nr_outputs + 3, /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ + eot, /* eot */ + eot, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); + + if (c->first_overflow_output > 0) { + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + GLuint i, mrf = 0; + for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; + } + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + mrf+1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + BRW_MAX_MRF-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } } +/** + * Called after code generation to resolve subroutine calls and the + * END instruction. + * \param end_inst points to brw code for END instruction + * \param last_inst points to last instruction emitted before vertex write + */ static void -post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +post_vs_emit( struct brw_vs_compile *c, + struct brw_instruction *end_inst, + struct brw_instruction *last_inst ) { - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->vp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - case OPCODE_BRA: - target_insn = inst1->BranchTarget; - inst2 = &c->vp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - case OPCODE_END: - offset = end_inst - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } + GLint offset; + + brw_resolve_cals(&c->func); + + /* patch up the END code to jump past subroutines, etc */ + offset = last_inst - end_inst; + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } +} + +static uint32_t +get_predicate(const struct prog_instruction *inst) +{ + if (inst->DstReg.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ + assert(inst->DstReg.CondMask == COND_NE); + + switch (inst->DstReg.CondSwizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", + inst->DstReg.CondMask); + return BRW_PREDICATE_NORMAL; } } -/* Emit the fragment program instructions here. +/* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; - struct brw_instruction *end_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_context *brw = p->brw; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; + GLuint end_offset = 0; + struct brw_instruction *end_inst, *last_inst; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-emit:\n"); + _mesa_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); _mesa_printf("\n"); } @@ -1035,22 +1353,26 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + /* Get argument regs. SWZ is special and does this itself. */ - inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; - if (file == PROGRAM_OUTPUT&&c->output_regs[index].used_in_src) + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1178,7 +1500,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ @@ -1188,20 +1510,61 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); + if_depth++; break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; case OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); @@ -1209,7 +1572,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - inst->Data = &p->store[p->nr_insn]; + brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_RET: @@ -1218,14 +1581,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); + break; case OPCODE_END: + end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_PRINT: + /* no-op */ + break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: - /* no-op instructions */ - break; + /* no-op */ + break; default: _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", inst->Opcode, inst->Opcode < MAX_OPCODE ? @@ -1233,6 +1605,19 @@ void brw_vs_emit(struct brw_vs_compile *c ) "unknown"); } + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + if ((inst->DstReg.File == PROGRAM_OUTPUT) && (inst->DstReg.Index != VERT_RESULT_HPOS) && c->output_regs[inst->DstReg.Index].used_in_src) { @@ -1263,9 +1648,20 @@ void brw_vs_emit(struct brw_vs_compile *c ) release_tmps(c); } - end_inst = &p->store[p->nr_insn]; + end_inst = &p->store[end_offset]; + last_inst = &p->store[p->nr_insn]; + + /* The END instruction will be patched to jump to this code */ emit_vertex_write(c); - post_vs_emit(c, end_inst); - for (insn = 0; insn < nr_insns; insn++) - c->vp->program.Base.Instructions[insn].Data = NULL; + + post_vs_emit(c, end_inst, last_inst); + + if (INTEL_DEBUG & DEBUG_VS) { + int i; + + _mesa_printf("vs-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 942581696d..d790ab6555 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,11 +44,15 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; + memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -60,8 +64,11 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (brw->attribs.Transform->ClipPlanesEnabled) { + if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence * clip_start: */ @@ -90,16 +97,28 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + + if (BRW_IS_IGDNG(brw)) + vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + else + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 72; + else if (BRW_IS_G4X(brw)) chipset_max_threads = 32; else chipset_max_threads = 16; @@ -111,6 +130,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* No samplers for ARB_vp programs: */ + /* It has to be set to 0 for IGDNG + */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) @@ -156,6 +177,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c new file mode 100644 index 0000000000..89f47522a1 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index f7293ef423..ac11790151 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -23,14 +23,12 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - **********************************************************************/ - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - - +**********************************************************************/ +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ #include "main/glheader.h" #include "main/mtypes.h" @@ -44,12 +42,11 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" - #include "brw_draw.h" #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" -#include <stdarg.h> + static void dri_bo_release(dri_bo **bo) @@ -58,27 +55,31 @@ dri_bo_release(dri_bo **bo) *bo = NULL; } -/* called from intelDestroyContext() + +/** + * called from intelDestroyContext() */ static void brw_destroy_context( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); int i; - brw_destroy_metaops(brw); brw_destroy_state(brw); brw_draw_destroy( brw ); + _mesa_free(brw->wm.compile_data); + brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); - brw->state.nr_draw_regions = 0; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); @@ -92,6 +93,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->cc.prog_bo); @@ -99,37 +101,46 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->cc.vp_bo); } -/* called from intelDrawBuffer() + +/** + * called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_regions[], - struct intel_region *depth_region, - GLuint num_regions) + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - int i; + GLuint i; + + /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); - for (i = 0; i < num_regions; i++) - intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_draw_regions = num_regions; + brw->state.nr_color_regions = num_color_regions; } -/* called from intel_batchbuffer_flush and children before sending a + +/** + * called from intel_batchbuffer_flush and children before sending a * batchbuffer off. */ static void brw_finish_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - brw_emit_query_end(brw); } -/* called from intelFlushBatchLocked + +/** + * called from intelFlushBatchLocked */ static void brw_new_batch( struct intel_context *intel ) { @@ -160,39 +171,12 @@ static void brw_new_batch( struct intel_context *intel ) } } -static void brw_note_fence( struct intel_context *intel, - GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; -} - -static void brw_note_unlock( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - - brw_state_cache_check_size(brw); -} - - -void brw_do_flush( struct brw_context *brw, - GLuint flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - -static void brw_emit_flush( struct intel_context *intel, - GLuint unused ) +static void brw_note_fence( struct intel_context *intel, GLuint fence ) { - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -202,10 +186,11 @@ static GLuint brw_flush_cmd( void ) struct brw_mi_flush flush; flush.opcode = CMD_MI_FLUSH; flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + flush.flags = BRW_FLUSH_STATE_CACHE; return *(GLuint *)&flush; } + static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -215,20 +200,17 @@ static void brw_invalidate_state( struct intel_context *intel, GLuint new_state void brwInitVtbl( struct brw_context *brw ) { brw->intel.vtbl.check_vertex_size = 0; - brw->intel.vtbl.emit_state = 0; - brw->intel.vtbl.reduced_primitive_state = 0; + brw->intel.vtbl.emit_state = 0; + brw->intel.vtbl.reduced_primitive_state = 0; brw->intel.vtbl.render_start = 0; - brw->intel.vtbl.update_texture_state = 0; + brw->intel.vtbl.update_texture_state = 0; - brw->intel.vtbl.invalidate_state = brw_invalidate_state; - brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; + brw->intel.vtbl.invalidate_state = brw_invalidate_state; + brw->intel.vtbl.note_fence = brw_note_fence; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; - brw->intel.vtbl.emit_flush = brw_emit_flush; brw->intel.vtbl.debug_batch = brw_debug_batch; } - diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index c50b0d2dd9..2292de94c4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -40,12 +40,14 @@ GLuint brw_wm_nr_args( GLuint opcode ) { switch (opcode) { + case WM_FRONTFACING: case WM_PIXELXY: + return 0; case WM_CINTERP: case WM_WPOSXY: + case WM_DELTAXY: return 1; case WM_LINTERP: - case WM_DELTAXY: case WM_PIXELW: return 2; case WM_FB_WRITE: @@ -80,6 +82,58 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -90,52 +144,41 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + c->dispatch_width = 8; + brw_wm_glsl_emit(brw, c); } + else { + c->dispatch_width = 16; + brw_wm_non_glsl_emit(brw, c); + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); @@ -157,9 +200,11 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -170,51 +215,50 @@ static void brw_wm_populate_key( struct brw_context *brw, */ /* _NEW_COLOR */ if (fp->program.UsesKill || - brw->attribs.Color->AlphaEnabled) + ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ - if (brw->attribs.Depth->Test) + if (ctx->Depth.Test) lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - if (brw->attribs.Depth->Test && - brw->attribs.Depth->Mask) /* ?? */ + if (ctx->Depth.Test && + ctx->Depth.Mask) /* ?? */ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (brw->attribs.Stencil->Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - if (brw->attribs.Stencil->WriteMask[0] || - (brw->attribs.Stencil->_TestTwoSide && - brw->attribs.Stencil->WriteMask[1])) + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } line_aa = AA_NEVER; /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (brw->attribs.Line->SmoothFlag) { + if (ctx->Line.SmoothFlag) { if (brw->intel.reduced_primitive == GL_LINES) { line_aa = AA_ALWAYS; } else if (brw->intel.reduced_primitive == GL_TRIANGLES) { - if (brw->attribs.Polygon->FrontMode == GL_LINE) { + if (ctx->Polygon.FrontMode == GL_LINE) { line_aa = AA_SOMETIMES; - if (brw->attribs.Polygon->BackMode == GL_LINE || - (brw->attribs.Polygon->CullFlag && - brw->attribs.Polygon->CullFaceMode == GL_BACK)) + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) line_aa = AA_ALWAYS; } - else if (brw->attribs.Polygon->BackMode == GL_LINE) { + else if (ctx->Polygon.BackMode == GL_LINE) { line_aa = AA_SOMETIMES; - if ((brw->attribs.Polygon->CullFlag && - brw->attribs.Polygon->CullFaceMode == GL_FRONT)) + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) line_aa = AA_ALWAYS; } } @@ -222,27 +266,36 @@ static void brw_wm_populate_key( struct brw_context *brw, brw_wm_lookup_iz(line_aa, lookup, + uses_depth, key); /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ - key->flat_shade = (brw->attribs.Light->ShadeModel == GL_FLAT); + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &brw->attribs.Texture->Unit[i]; - const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; if (unit->_ReallyEnabled) { - if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1<<i; - if (t->Image[0][t->BaseLevel]->TexFormat->MesaFormat == - MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1<< i; + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (img->InternalFormat == GL_YCBCR_MESA) { + key->yuvtex_mask |= 1 << i; + if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1 << i; } + + key->tex_swizzles[i] = t->_Swizzle; + } + else { + key->tex_swizzles[i] = SWIZZLE_NOOP; } } @@ -273,10 +326,11 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } - /* Extra info: - */ - key->program_string_id = fp->id; + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + /* The unique fragment program ID */ + key->program_string_id = fp->id; } @@ -300,12 +354,11 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | @@ -315,7 +368,7 @@ const struct brw_tracked_state brw_wm_prog = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), - .cache = 0 + .cache = CACHE_NEW_VS_PROG, }, .prepare = brw_prepare_wm_prog }; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index ded079695e..872b1f3ecf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -38,6 +38,8 @@ #include "brw_context.h" #include "brw_eu.h" +#define SATURATE (1<<5) + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -60,20 +62,23 @@ struct brw_wm_prog_key { GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; GLuint nr_depth_regs:3; - GLuint projtex_mask:8; - GLuint shadowtex_mask:8; GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; - GLuint yuvtex_mask:8; - GLuint yuvtex_swap_mask:8; /* UV swaped */ - GLuint pad1:16; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ + GLuint shadowtex_mask:16; + GLuint yuvtex_mask:16; + GLuint yuvtex_swap_mask:16; /* UV swaped */ + + GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; GLuint origin_x, origin_y; GLuint drawable_height; + GLuint vp_outputs_written; }; @@ -142,13 +147,12 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; -#define PROGRAM_INTERNAL_PARAM - #define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) @@ -172,7 +176,8 @@ struct brw_wm_instruction { #define WM_CINTERP (MAX_OPCODE + 5) #define WM_WPOSXY (MAX_OPCODE + 6) #define WM_FB_WRITE (MAX_OPCODE + 7) -#define MAX_WM_OPCODE (MAX_OPCODE + 8) +#define WM_FRONTFACING (MAX_OPCODE + 8) +#define MAX_WM_OPCODE (MAX_OPCODE + 9) #define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) #define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) @@ -200,7 +205,6 @@ struct brw_wm_compile { GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; @@ -239,17 +243,31 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + + /** Mapping from Mesa registers to hardware registers */ struct { - GLboolean inited; - struct brw_reg reg; + GLboolean inited; + struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; @@ -276,8 +294,16 @@ void brw_wm_print_program( struct brw_wm_compile *c, void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); + +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c index 8f07f89ebc..220821087c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c @@ -130,6 +130,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, case WM_FB_WRITE: _mesa_printf(" = FB_WRITE"); break; + case WM_FRONTFACING: + _mesa_printf(" = FRONTFACING"); + break; default: _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index b5050a3e40..bf80a2942a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,8 +34,6 @@ #include "brw_context.h" #include "brw_wm.h" -#define SATURATE (1<<5) - /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -65,8 +63,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) static void emit_pixel_xy(struct brw_compile *p, const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) + GLuint mask) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -98,8 +95,7 @@ static void emit_pixel_xy(struct brw_compile *p, static void emit_delta_xy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -254,6 +250,107 @@ static void emit_cinterp( struct brw_compile *p, } } +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask ) +{ + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + GLuint i; + + if (!(mask & WRITEMASK_XYZW)) + return; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_MOV(p, dst[i], brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + if (is_ddx) { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } else { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } + brw_ADD(p, dst[i], src0, negate(src1)); + } + } + if (mask & SATURATE) + brw_set_saturate(p, 0); +} static void emit_alu1( struct brw_compile *p, struct brw_instruction *(*func)(struct brw_compile *, @@ -325,6 +422,19 @@ static void emit_mad( struct brw_compile *p, } } +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} static void emit_lrp( struct brw_compile *p, const struct brw_reg *dst, @@ -504,16 +614,18 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, 0); } @@ -524,17 +636,19 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_MAC(p, dst[0], arg0[3], arg1[3]); + brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]); brw_set_saturate(p, 0); } @@ -545,17 +659,19 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { + const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); - brw_MAC(p, dst[0], arg0[2], arg1[2]); + brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]); brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); - brw_ADD(p, dst[0], dst[0], arg1[3]); + brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]); brw_set_saturate(p, 0); } @@ -591,18 +707,19 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X || - // function == BRW_MATH_FUNCTION_SINCOS); - + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ brw_math_16(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -618,10 +735,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert((mask & WRITEMASK_XYZW) == WRITEMASK_X); + assert(is_power_of_two(mask & WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -640,7 +759,7 @@ static void emit_math2( struct brw_compile *p, */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, - dst[0], + dst[dst_chan], function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 2, @@ -650,7 +769,7 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, - offset(dst[0],1), + offset(dst[dst_chan],1), function, (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, 4, @@ -671,9 +790,9 @@ static void emit_tex( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; - GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; + GLuint msg_type; /* How many input regs are there? */ @@ -687,13 +806,17 @@ static void emit_tex( struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* unexpected target */ + abort(); } - if (shadow) { + if (inst->tex_shadow) { nr = 4; emit |= WRITEMASK_W; } @@ -711,19 +834,31 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ + if (BRW_IS_IGDNG(p->brw)) { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + } else { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - (shadow ? - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), + msg_type, responseLength, msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -735,7 +870,7 @@ static void emit_txb( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength; - + GLuint msg_type; /* Shadow ignored for txb. */ switch (inst->tex_idx) { @@ -750,27 +885,38 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); break; + default: + /* unexpected target */ + abort(); } brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ + SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + msg_type, 8, /* responseLength */ msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -833,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c, } } +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, @@ -886,6 +1046,9 @@ static void emit_aa( struct brw_wm_compile *c, /* Post-fragment-program processing. Send the results to the * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, @@ -979,7 +1142,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -994,8 +1157,8 @@ static void emit_fb_write( struct brw_wm_compile *c, } -/* Post-fragment-program processing. Send the results to the - * framebuffer. +/** + * Move a GPR to scratch memory. */ static void emit_spill( struct brw_wm_compile *c, struct brw_reg reg, @@ -1014,11 +1177,13 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } +/** + * Load a GPR from scratch memory. + */ static void emit_unspill( struct brw_wm_compile *c, struct brw_reg reg, GLuint slot ) @@ -1039,13 +1204,13 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } /** - * Retrieve upto 4 GEN4 register pairs for the given wm reg: + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. */ static void get_argument_regs( struct brw_wm_compile *c, struct brw_wm_ref *arg[], @@ -1055,13 +1220,12 @@ static void get_argument_regs( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (arg[i]) { - - if (arg[i]->unspill_reg) - emit_unspill(c, + if (arg[i]->unspill_reg) + emit_unspill(c, brw_vec8_grf(arg[i]->unspill_reg, 0), arg[i]->value->spill_slot); - regs[i] = arg[i]->hw_reg; + regs[i] = arg[i]->hw_reg; } else { regs[i] = brw_null_reg(); @@ -1070,6 +1234,9 @@ static void get_argument_regs( struct brw_wm_compile *c, } +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ static void spill_values( struct brw_wm_compile *c, struct brw_wm_value *values, GLuint nr ) @@ -1127,11 +1294,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags, args[0]); + emit_pixel_xy(p, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0], args[1]); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: @@ -1158,6 +1325,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); break; + case WM_FRONTFACING: + emit_frontfacing(p, dst, dst_flags); + break; + /* Straightforward arithmetic: */ case OPCODE_ADD: @@ -1172,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1184,6 +1363,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; @@ -1297,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + default: _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? @@ -1310,4 +1497,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->hw_reg, inst->dst[i]->spill_slot); } + + if (INTEL_DEBUG & DEBUG_WM) { + int i; + + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 6df2c95d80..4e3edfbbff 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -42,6 +42,12 @@ #include "shader/prog_statevars.h" +/** An invalid texture target */ +#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS + +/** An invalid texture unit */ +#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT + #define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 @@ -58,7 +64,8 @@ static const char *wm_opcode_strings[] = { "PINTERP", "CINTERP", "WPOSXY", - "FB_WRITE" + "FB_WRITE", + "FRONTFACING", }; #if 0 @@ -79,9 +86,8 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx) reg.Index = idx; reg.Swizzle = SWIZZLE_NOOP; reg.RelAddr = 0; - reg.NegateBase = 0; + reg.Negate = NEGATE_NONE; reg.Abs = 0; - reg.NegateAbs = 0; return reg; } @@ -111,6 +117,12 @@ static struct prog_src_register src_swizzle1( struct prog_src_register reg, int return src_swizzle(reg, x, x, x, x); } +static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) +{ + reg.Swizzle = swizzle; + return reg; +} + /*********************************************************************** * Dest regs @@ -123,7 +135,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.Index = idx; reg.WriteMask = WRITEMASK_XYZW; reg.RelAddr = 0; - reg.CondMask = 0; + reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; reg.pad = 0; @@ -177,22 +189,31 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; - inst->Data = (void *)inst0; return inst; } -static struct prog_instruction * emit_op(struct brw_wm_compile *c, +static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint op, struct prog_dst_register dest, GLuint saturate, GLuint tex_src_unit, GLuint tex_src_target, + GLuint tex_shadow, struct prog_src_register src0, struct prog_src_register src1, struct prog_src_register src2 ) { struct prog_instruction *inst = get_fp_inst(c); + assert(tex_src_unit < BRW_MAX_TEX_UNIT || + tex_src_unit == TEX_UNIT_NONE); + assert(tex_src_target < NUM_TEXTURE_TARGETS || + tex_src_target == TEX_TARGET_NONE); + + /* update mask of which texture units are referenced by this program */ + if (tex_src_unit != TEX_UNIT_NONE) + c->fp->tex_units_used |= (1 << tex_src_unit); + memset(inst, 0, sizeof(*inst)); inst->Opcode = op; @@ -200,6 +221,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SaturateMode = saturate; inst->TexSrcUnit = tex_src_unit; inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; @@ -207,6 +229,53 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, } +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + return emit_tex_op(c, op, dest, saturate, + TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ + src0, src1, src2); +} + + +/* Many Mesa opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst; + unsigned int dst_chan; + unsigned int other_channel_mask; + + if (inst0->DstReg.WriteMask == 0) + return NULL; + + dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; + inst = get_fp_inst(c); + *inst = *inst0; + inst->DstReg.WriteMask = 1 << dst_chan; + + other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); + if (other_channel_mask != 0) { + inst = emit_op(c, + OPCODE_MOV, + dst_mask(inst0->DstReg, other_channel_mask), + 0, + src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), + src_undef(), + src_undef()); + } + return inst; +} /*********************************************************************** @@ -228,7 +297,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, WRITEMASK_XY), - 0, 0, 0, + 0, payload_r0_depth, src_undef(), src_undef()); @@ -251,7 +320,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) emit_op(c, WM_DELTAXY, dst_mask(delta_xy, WRITEMASK_XY), - 0, 0, 0, + 0, pixel_xy, payload_r0_depth, src_undef()); @@ -268,14 +337,13 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, WRITEMASK_W), - 0, 0, 0, + 0, interp_wpos, deltas, src_undef()); @@ -293,24 +361,19 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register arg2; - GLuint opcode; - + /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (idx) { case FRAG_ATTRIB_WPOS: - opcode = WM_LINTERP; - arg2 = src_undef(); - /* Have to treat wpos.xy specially: */ emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, + 0, get_pixel_xy(c), src_undef(), src_undef()); @@ -322,10 +385,10 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_LINTERP, dst, - 0, 0, 0, + 0, interp, deltas, - arg2); + src_undef()); break; case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: @@ -333,26 +396,95 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_CINTERP, dst, - 0, 0, 0, + 0, interp, src_undef(), src_undef()); } else { - emit_op(c, - WM_LINTERP, - dst, - 0, 0, 0, - interp, - deltas, - src_undef()); + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } } break; + case FRAG_ATTRIB_FOGC: + /* Interpolate the fog coordinate */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_X), + 0, + interp, + deltas, + get_pixel_w(c)); + + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_YZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_FACE: + /* XXX review/test this case */ + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_X), + 0, + src_undef(), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_PNTC: + /* XXX review/test this case */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_XY), + 0, + interp, + deltas, + get_pixel_w(c)); + + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_ZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + default: emit_op(c, WM_PINTERP, dst, - 0, 0, 0, + 0, interp, deltas, get_pixel_w(c)); @@ -362,38 +494,6 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } -static void emit_ddx( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDX, - inst->DstReg, - 0, 0, 0, - interp, - get_pixel_w(c), - src_undef()); -} - -static void emit_ddy( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDY, - inst->DstReg, - 0, 0, 0, - interp, - get_pixel_w(c), - src_undef()); -} - /*********************************************************************** * Hacks to extend the program parameter and constant lists. */ @@ -483,13 +583,12 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(dst, WRITEMASK_Y), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src1, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -499,12 +598,12 @@ static void precalc_dst( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), src_undef()); /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase &= ~NEGATE_X; + swz->SrcReg[0].Negate &= ~NEGATE_X; } if (dst.WriteMask & WRITEMASK_W) { /* dst.w = mov src1.w @@ -512,7 +611,7 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_W), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src1, src_undef(), src_undef()); @@ -534,32 +633,40 @@ static void precalc_lit( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, + 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), src_undef()); /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].NegateBase = 0; + swz->SrcReg[0].Negate = NEGATE_NONE; } - if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src_undef(), src_undef()); } } + +/** + * Some TEX instructions require extra code, cube map coordinate + * normalization, or coordinate scaling for RECT textures, etc. + * This function emits those extra instructions and the TEX + * instruction itself. + */ static void precalc_tex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { struct prog_src_register coord; struct prog_dst_register tmpcoord; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + assert(unit < BRW_MAX_TEX_UNIT); if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { struct prog_instruction *out; @@ -569,49 +676,56 @@ static void precalc_tex( struct brw_wm_compile *c, struct prog_src_register tmp1src = src_reg_from_dst(tmp1); struct prog_src_register src0 = inst->SrcReg[0]; + /* find longest component of coord vector and normalize it */ tmpcoord = get_temp(c); coord = src_reg_from_dst(tmpcoord); + /* tmpcoord = src0 (i.e.: coord = src0) */ out = emit_op(c, OPCODE_MOV, tmpcoord, - 0, 0, 0, + 0, src0, src_undef(), src_undef()); - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; + /* tmp0 = MAX(coord.X, coord.Y) */ emit_op(c, OPCODE_MAX, tmp0, - 0, 0, 0, + 0, src_swizzle1(coord, X), src_swizzle1(coord, Y), src_undef()); + /* tmp1 = MAX(tmp0, coord.Z) */ emit_op(c, OPCODE_MAX, tmp1, - 0, 0, 0, + 0, tmp0src, src_swizzle1(coord, Z), src_undef()); + /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, - tmp0, - 0, 0, 0, + dst_mask(tmp0, WRITEMASK_X), + 0, tmp1src, src_undef(), src_undef()); + /* tmpCoord = src0 * tmp0 */ emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, src0, - tmp0src, + src_swizzle1(tmp0src, SWIZZLE_X), src_undef()); release_temp(c, tmp0); release_temp(c, tmp1); - } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + } + else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { struct prog_src_register scale = search_or_add_param5( c, STATE_INTERNAL, @@ -626,9 +740,13 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, inst->SrcReg[0], - scale, + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), src_undef()); coord = src_reg_from_dst(tmpcoord); @@ -642,19 +760,9 @@ static void precalc_tex( struct brw_wm_compile *c, * conversion requires allocating a temporary variable which we * don't have the facility to do that late in the compilation. */ - if (!(c->key.yuvtex_mask & (1<<unit))) { - emit_op(c, - OPCODE_TEX, - inst->DstReg, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); - } - else { - GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); + if (c->key.yuvtex_mask & (1 << unit)) { + /* convert ycbcr to RGBA */ + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); /* CONST C0 = { -.5, -.0625, -.5, 1.164 } @@ -676,22 +784,23 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ - emit_op(c, - OPCODE_TEX, - tmp, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); + emit_tex_op(c, + OPCODE_TEX, + tmp, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op(c, OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, tmpsrc, C0, src_undef()); @@ -702,7 +811,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), - 0, 0, 0, + 0, tmpsrc, src_swizzle1(C0, W), src_undef()); @@ -717,7 +826,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), - 0, 0, 0, + 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -727,13 +836,38 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), - 0, 0, 0, + 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), src_swizzle1(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } + else { + /* ordinary RGBA tex instruction */ + emit_tex_op(c, + OPCODE_TEX, + inst->DstReg, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); + } + + /* For GL_EXT_texture_swizzle: */ + if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { + /* swizzle the result of the TEX instruction */ + struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); + emit_op(c, OPCODE_SWZ, + inst->DstReg, + SATURATE_OFF, /* saturate already done above */ + src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), + src_undef(), + src_undef()); + } if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) @@ -741,10 +875,16 @@ static void precalc_tex( struct brw_wm_compile *c, } +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src = inst->SrcReg[0]; + const struct prog_src_register src = inst->SrcReg[0]; + GLboolean retVal; + + assert(inst->Opcode == OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -754,16 +894,21 @@ static GLboolean projtex( struct brw_wm_compile *c, * user-provided fragment programs anyway: */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - return 0; /* ut2004 gun rendering !?! */ + retVal = GL_FALSE; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) - return 0; + (c->key.proj_attrib_mask & (1 << src.Index)) == 0) + retVal = GL_FALSE; else - return 1; + retVal = GL_TRUE; + + return retVal; } +/** + * Emit code for TXP. + */ static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { @@ -778,7 +923,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), - 0, 0, 0, + 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); @@ -788,7 +933,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); @@ -814,42 +959,41 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); struct prog_src_register outcolor; GLuint i; struct prog_instruction *inst, *last_inst; struct brw_context *brw = c->func.brw; - /* inst->Sampler is not used by backend, - use it for fb write target and eot */ - - if (brw->state.nr_draw_regions > 1) { - for (i = 0 ; i < brw->state.nr_draw_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, - outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - } - } - last_inst->Sampler |= 1; //eot + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = 1|(0<<1); + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); } } @@ -880,9 +1024,9 @@ static void validate_dst_regs( struct brw_wm_compile *c, const struct prog_instruction *inst ) { if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) - c->fp_fragcolor_emitted = 1; + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; } } @@ -902,11 +1046,15 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("UNKNOWN\n"); - + _mesa_printf("965 Opcode %d\n", insn->Opcode); } } + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; @@ -922,16 +1070,21 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->delta_xy = src_undef(); c->pixel_w = src_undef(); c->nr_fp_insns = 0; + c->fp->tex_units_used = 0x0; - /* Emit preamble instructions: + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. */ - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; @@ -940,7 +1093,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * necessary: */ - switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); @@ -950,14 +1102,14 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_ABS: out = emit_insn(c, inst); out->Opcode = OPCODE_MOV; - out->SrcReg[0].NegateBase = 0; + out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; case OPCODE_SUB: out = emit_insn(c, inst); out->Opcode = OPCODE_ADD; - out->SrcReg[1].NegateBase ^= 0xf; + out->SrcReg[1].Negate ^= NEGATE_XYZW; break; case OPCODE_SCS: @@ -986,6 +1138,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; case OPCODE_XPD: @@ -1001,28 +1154,24 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; case OPCODE_END: emit_fb_write(c); break; case OPCODE_PRINT: break; - default: - emit_insn(c, inst); + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, inst); + else + emit_insn(c, inst); break; } } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 8fd776ac39..c9fe1dd8ad 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -8,21 +10,28 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component); + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { + case OPCODE_ARL: case OPCODE_IF: - case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: @@ -36,6 +45,87 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,25 +133,32 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } -static int get_scalar_dst_index(struct prog_instruction *inst) -{ - int i; - for (i = 0; i < 4; i++) - if (inst->DstReg.WriteMask & (1<<i)) - break; - return i; -} - static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +174,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -89,21 +200,40 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL break; case PROGRAM_UNDEFINED: return brw_null_reg(); - default: + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_PAYLOAD: break; + default: + _mesa_problem(NULL, "Unexpected file in get_reg()"); + return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) - reg = c->wm_regs[file][index][component].reg; - else - reg = brw_vec8_grf(c->reg_index, 0); + assert(index < 256); + assert(component < 4); - if(!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; + } + else { + /* no, allocate new register */ + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } + + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ + + set_reg(c, file, index, component, reg); } - if (neg & (1<< component)) { + if (neg & (1 << component)) { reg = negate(reg); } if (abs) @@ -111,78 +241,360 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; struct brw_reg reg; - int nr_interp_regs = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + int urb_read_length = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - c->prog_data.nr_params = 4*nr_params; - for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - c->nr_creg = 2*((4*nr_params+15)/16); - c->reg_index += c->nr_creg; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; + + /* use a real constant buffer, or just use a section of the GRF? */ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ + + if (c->fp->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + reg_index += c->nr_creg; + } } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (inputs & (1<<i)) { - nr_interp_regs++; - reg = brw_vec8_grf(c->reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; - } + /* fragment shader inputs */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + reg_index += 2; + } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); +#endif +} + + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + c->current_const[i].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); + } + } } + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - struct prog_instruction *inst, int component, int nr) + const struct prog_instruction *inst, + GLuint component) { + const int nr = 1; return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, 0, 0); } + +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->Negate & (1 << component)) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", + c->current_const[srcRegIndex].index, + component, + srcRegIndex, + const_reg.nr); +#endif + + return const_reg; +} + + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - struct prog_src_register *src, int index, int nr) + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) { - int component = GET_SWZ(src->Swizzle, index); - return get_reg(c, src->File, src->Index, component, nr, - src->NegateBase, src->Abs); + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + + if (c->fp->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->Negate, src->Abs); + } } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->Negate & (1 << channel)) + value = -value; + if (src->Abs) + value = FABSF(value); +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } +} + + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -238,25 +650,8 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_abs( struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (inst->DstReg.WriteMask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); - brw_MOV(p, dst, brw_abs(src)); - } - } - brw_set_saturate(p, 0); -} - static void emit_trunc( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -265,8 +660,8 @@ static void emit_trunc( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1) ; - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, inst, 0, i); brw_RNDZ(p, dst, src); } } @@ -274,7 +669,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -283,8 +678,10 @@ static void emit_mov( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i, 1); - src = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + /* XXX some moves from immediate value don't work reliably!!! */ + /*src = get_src_reg_imm(c, inst, 0, i);*/ + src = get_src_reg(c, inst, 0, i); brw_MOV(p, dst, src); } } @@ -292,7 +689,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -301,8 +698,8 @@ static void emit_pixel_xy(struct brw_wm_compile *c, struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ @@ -319,21 +716,20 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - dst0 = get_dst_reg(c, inst, 0, 1); - dst1 = get_dst_reg(c, inst, 1, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1); + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ @@ -351,10 +747,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -385,7 +779,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -397,38 +791,63 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + if (c->key.dest_depth_reg) { + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); - nr += 2; + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1_1); + brw_pop_insn_state(p); + } + else + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } static void emit_pixel_w( struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -436,10 +855,10 @@ static void emit_pixel_w( struct brw_wm_compile *c, struct brw_reg dst, src0, delta0, delta1; struct brw_reg interp3; - dst = get_dst_reg(c, inst, 3, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); interp3 = brw_vec1_grf(src0.nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -458,19 +877,19 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -479,7 +898,7 @@ static void emit_linterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); } @@ -487,17 +906,17 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg interp[4]; struct brw_reg dst, src0; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -506,14 +925,14 @@ static void emit_cinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV(p, dst, suboffset(interp[i],3)); } } } static void emit_pinterp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -521,13 +940,13 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; + GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); - delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -536,7 +955,7 @@ static void emit_pinterp(struct brw_wm_compile *c, for(i = 0; i < 4; i++ ) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_LINE(p, brw_null_reg(), interp[i], delta0); brw_MAC(p, dst, suboffset(interp[i],1), delta1); @@ -545,8 +964,38 @@ static void emit_pinterp(struct brw_wm_compile *c, } } +/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ +static void emit_frontfacing(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(0.0)); + } + } + + /* bit 31 is "primitive is back face", so checking < (1 << 31) gives + * us front face + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_set_predicate_control_flag_value(p, 0xff); +} + static void emit_xpd(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -556,12 +1005,12 @@ static void emit_xpd(struct brw_wm_compile *c, GLuint i1 = (i+1)%3; if (mask & (1<<i)) { struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i, 1); - src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1)); - src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1); + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, inst, 0, i2)); + src1 = get_src_reg_imm(c, inst, 1, i1); brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1); + src0 = get_src_reg(c, inst, 0, i1); + src1 = get_src_reg_imm(c, inst, 1, i2); brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); brw_MAC(p, dst, src0, src1); brw_set_saturate(p, 0); @@ -571,17 +1020,25 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -590,16 +1047,24 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, brw_null_reg(), src0[2], src1[2]); @@ -609,16 +1074,24 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1); + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); } - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + dst = get_dst_reg(c, inst, dst_chan); brw_MUL(p, brw_null_reg(), src0[0], src1[0]); brw_MAC(p, brw_null_reg(), src0[1], src1[1]); brw_MAC(p, dst, src0[2], src1[2]); @@ -627,63 +1100,77 @@ static void emit_dph(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ static void emit_math1(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint func) + const struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg(c, inst, 0, 0); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); } static void emit_rcp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -692,36 +1179,31 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_ADD(p, dst, src0, src1); } } brw_set_saturate(p, 0); } -static void emit_sub(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_arl(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; + struct brw_reg src0, addr_reg; brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_ADD(p, dst, src0, negate(src1)); - } - } + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); brw_set_saturate(p, 0); } + static void emit_mul(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -730,9 +1212,9 @@ static void emit_mul(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_MUL(p, dst, src0, src1); } } @@ -740,7 +1222,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -749,8 +1231,8 @@ static void emit_frc(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_FRC(p, dst, src0); } } @@ -759,7 +1241,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -768,78 +1250,79 @@ static void emit_flr(struct brw_wm_compile *c, brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg_imm(c, inst, 0, i); brw_RNDD(p, dst, src0); } } brw_set_saturate(p, 0); } -static void emit_max(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - } - } - brw_pop_insn_state(p); -} -static void emit_min(struct brw_wm_compile *c, - struct prog_instruction *inst) +static void emit_min_max(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0, src1, dst; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); int i; brw_push_insn_state(p); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + struct brw_reg real_dst = get_dst_reg(c, inst, i); + struct brw_reg src0 = get_src_reg(c, inst, 0, i); + struct brw_reg src1 = get_src_reg(c, inst, 1, i); + struct brw_reg dst; + /* if dst==src0 or dst==src1 we need to use a temp reg */ + GLboolean use_temp = brw_same_reg(dst, src0) || + brw_same_reg(dst, src1); + if (use_temp) + dst = alloc_tmp(c); + else + dst = real_dst; + + /* + printf(" Min/max: dst %d src0 %d src1 %d\n", + dst.nr, src0.nr, src1.nr); + */ brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_MOV(p, dst, src0); brw_set_saturate(p, 0); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst, src1); brw_set_saturate(p, 0); brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); } } brw_pop_insn_state(p); + release_tmps(c, mark); } static void emit_pow(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1); + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); brw_MOV(p, brw_message_reg(2), src0); brw_MOV(p, brw_message_reg(3), src1); @@ -855,7 +1338,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -864,10 +1347,10 @@ static void emit_lrp(struct brw_wm_compile *c, int mark = mark_tmps(c); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + src1 = get_src_reg_imm(c, inst, 1, i); if (src1.nr == dst.nr) { tmp1 = alloc_tmp(c); @@ -875,7 +1358,7 @@ static void emit_lrp(struct brw_wm_compile *c, } else tmp1 = src1; - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + src2 = get_src_reg(c, inst, 2, i); if (src2.nr == dst.nr) { tmp2 = alloc_tmp(c); brw_MOV(p, tmp2, src2); @@ -908,7 +1391,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -917,10 +1400,10 @@ static void emit_mad(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); - src2 = get_src_reg(c, &inst->SrcReg[2], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); + src2 = get_src_reg_imm(c, inst, 2, i); brw_MUL(p, dst, src0, src1); brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); @@ -931,7 +1414,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - struct prog_instruction *inst, GLuint cond) + const struct prog_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -940,9 +1423,9 @@ static void emit_sop(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - src0 = get_src_reg(c, &inst->SrcReg[0], i, 1); - src1 = get_src_reg(c, &inst->SrcReg[1], i, 1); + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, inst, 0, i); + src1 = get_src_reg_imm(c, inst, 1, i); brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, src0, src1); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -955,113 +1438,58 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } -static void emit_ddx(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_MOV(p, dst, interp[i]); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -static void emit_ddy(struct brw_wm_compile *c, - struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - nr = src0.nr; - w = get_src_reg(c, &inst->SrcReg[1], 3, 1); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); - brw_MOV(p, dst, suboffset(interp[i], 1)); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -static __inline struct brw_reg high_words( struct brw_reg reg ) +static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), 0, 8, 2 ); } -static __inline struct brw_reg low_words( struct brw_reg reg ) +static INLINE struct brw_reg low_words( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -static __inline struct brw_reg even_bytes( struct brw_reg reg ) +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); } -static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), 0, 16, 2 ); @@ -1161,7 +1589,7 @@ static void noise1_sub( struct brw_wm_compile *c ) { } static void emit_noise1( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src, param, dst; @@ -1171,7 +1599,7 @@ static void emit_noise1( struct brw_wm_compile *c, assert( mark == 0 ); - src = get_src_reg( c, inst->SrcReg, 0, 1 ); + src = get_src_reg( c, inst, 0, 0 ); param = alloc_tmp( c ); @@ -1183,7 +1611,7 @@ static void emit_noise1( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param ); } } @@ -1331,7 +1759,7 @@ static void noise2_sub( struct brw_wm_compile *c ) { } static void emit_noise2( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, param0, param1, dst; @@ -1341,8 +1769,8 @@ static void emit_noise2( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1356,7 +1784,7 @@ static void emit_noise2( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1366,9 +1794,11 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* The three-dimensional case is much like the one- and two- versions above, - but since the number of corners is rapidly growing we now pack 16 16-bit - hashes into each register to extract more parallelism from the EUs. */ +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ static void noise3_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; @@ -1632,7 +2062,7 @@ static void noise3_sub( struct brw_wm_compile *c ) { } static void emit_noise3( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, param0, param1, param2, dst; @@ -1642,9 +2072,9 @@ static void emit_noise3( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -1660,7 +2090,7 @@ static void emit_noise3( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -1670,13 +2100,15 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* For the four-dimensional case, the little micro-optimisation benefits - we obtain by unrolling all the loops aren't worth the massive bloat it - now causes. Instead, we loop twice around performing a similar operation - to noise3, once for the w=0 cube and once for the w=1, with a bit more - code to glue it all together. */ -static void noise4_sub( struct brw_wm_compile *c ) { - +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ struct brw_compile *p = &c->func; struct brw_reg param[ 4 ], x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ @@ -2053,7 +2485,7 @@ static void noise4_sub( struct brw_wm_compile *c ) { } static void emit_noise4( struct brw_wm_compile *c, - struct prog_instruction *inst ) + const struct prog_instruction *inst ) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; @@ -2063,10 +2495,10 @@ static void emit_noise4( struct brw_wm_compile *c, assert( mark == 0 ); - src0 = get_src_reg( c, inst->SrcReg, 0, 1 ); - src1 = get_src_reg( c, inst->SrcReg, 1, 1 ); - src2 = get_src_reg( c, inst->SrcReg, 2, 1 ); - src3 = get_src_reg( c, inst->SrcReg, 3, 1 ); + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); param0 = alloc_tmp( c ); param1 = alloc_tmp( c ); @@ -2084,7 +2516,7 @@ static void emit_noise4( struct brw_wm_compile *c, brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); for (i = 0 ; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i, 1); + dst = get_dst_reg(c, inst, i); brw_MOV( p, dst, param0 ); } } @@ -2095,17 +2527,17 @@ static void emit_noise4( struct brw_wm_compile *c, } static void emit_wpos_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; struct brw_reg src0[2], dst[2]; - dst[0] = get_dst_reg(c, inst, 0, 1); - dst[1] = get_dst_reg(c, inst, 1, 1); + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1); - src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1); + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); /* Calculate the pixel offset from window bottom left into destination * X and Y channels. @@ -2128,27 +2560,32 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } /* TODO - BIAS on SIMD8 not workind yet... + BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint i; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: @@ -2156,46 +2593,63 @@ static void emit_txb(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); brw_MOV(p, brw_message_reg(3), src[1]); brw_MOV(p, brw_message_reg(4), src[2]); break; + default: + /* invalid target */ + abort(); } - brw_MOV(p, brw_message_reg(5), src[3]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ + + if (BRW_IS_IGDNG(p->brw)) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; + } else { + /* Does it work well on SIMD8? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } + brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, - 4, - 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); } + static void emit_tex(struct brw_wm_compile *c, - struct prog_instruction *inst) + const struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; GLuint msg_len; GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i, 1); + dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1); - + src[i] = get_src_reg(c, inst, 0, i); switch (inst->TexSrcTarget) { case TEXTURE_1D_INDEX: @@ -2207,13 +2661,18 @@ static void emit_tex(struct brw_wm_compile *c, emit = WRITEMASK_XY; nr = 2; break; - default: + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; nr = 3; break; + default: + /* invalid target */ + abort(); } msg_len = 1; + /* move/load S, T, R coords */ for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<<i)) @@ -2224,77 +2683,103 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), src[2]); + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ + brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } + if (BRW_IS_IGDNG(p->brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; + } else { + /* Does it work for shadow on SIMD8 ? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + } + brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), - 1, - retype(payload_reg, BRW_REGISTER_TYPE_UW), - unit + MAX_DRAW_BUFFERS, /* surface */ - unit, /* sampler */ - inst->DstReg.WriteMask, - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, - 4, - shadow ? 6 : 4, - 0); + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); } + +/** + * Resolve subroutine calls after code emit is done. + */ static void post_wm_emit( struct brw_wm_compile *c ) { - GLuint nr_insns = c->fp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } + brw_resolve_cals(&c->func); +} + +static void +get_argument_regs(struct brw_wm_compile *c, + const struct prog_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); } } static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; + const struct prog_instruction *inst = &c->prog_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); + c->cur_inst = i; + +#if 0 + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); +#endif + + /* fetch any constants that this instruction needs */ + if (c->fp->use_const_buffer) + fetch_constants(c, inst); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + switch (inst->Opcode) { case WM_PIXELXY: emit_pixel_xy(c, inst); @@ -2320,14 +2805,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case WM_FB_WRITE: emit_fb_write(c, inst); break; - case OPCODE_ABS: - emit_abs(c, inst); + case WM_FRONTFACING: + emit_frontfacing(c, inst); break; case OPCODE_ADD: emit_add(c, inst); break; - case OPCODE_SUB: - emit_sub(c, inst); + case OPCODE_ARL: + emit_arl(c, inst); break; case OPCODE_FRC: emit_frc(c, inst); @@ -2342,6 +2827,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: + case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2374,17 +2860,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_lg2(c, inst); break; - case OPCODE_MAX: - emit_max(c, inst); - break; case OPCODE_MIN: - emit_min(c, inst); + case OPCODE_MAX: + emit_min_max(c, inst); break; case OPCODE_DDX: - emit_ddx(c, inst); - break; case OPCODE_DDY: - emit_ddy(c, inst); + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); break; case OPCODE_SLT: emit_slt(c, inst); @@ -2435,18 +2925,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_kil(c); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: + /* no-op */ break; case OPCODE_CAL: brw_push_insn_state(p); @@ -2456,8 +2949,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; + brw_save_call(&c->func, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; @@ -2474,7 +2966,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + /* XXX may need to invalidate the current_constant regs */ + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); @@ -2485,39 +2978,69 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; - } - } - break; + } + } + } + break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } } +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); - c->prog_data.total_grf = c->reg_index; + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b31..5e399ac62a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -116,8 +116,13 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, + GLboolean ps_uses_depth, struct brw_wm_prog_key *key ) { GLuint reg = 2; @@ -127,7 +132,7 @@ void brw_wm_lookup_iz( GLuint line_aa, if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) key->computes_depth = 1; - if (wm_iz_table[lookup].sd_present) { + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { key->source_depth_reg = reg; reg += 2; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 205a7160d3..6279258339 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -51,6 +51,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) return &c->vreg[c->nr_vreg++]; } +/** return pointer to a newly allocated instruction */ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); @@ -60,6 +61,7 @@ static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) /*********************************************************************** */ +/** Init the "undef" register */ static void pass0_init_undef( struct brw_wm_compile *c) { struct brw_wm_ref *ref = &c->undef_ref; @@ -69,6 +71,7 @@ static void pass0_init_undef( struct brw_wm_compile *c) ref->prevuse = NULL; } +/** Set a FP register to a value */ static void pass0_set_fpreg_value( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -83,6 +86,7 @@ static void pass0_set_fpreg_value( struct brw_wm_compile *c, c->pass0_fp_reg[file][idx][component] = ref; } +/** Set a FP register to a ref */ static void pass0_set_fpreg_ref( struct brw_wm_compile *c, GLuint file, GLuint idx, @@ -115,12 +119,13 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, ref->value = &c->creg[i/16]; ref->insn = 0; ref->prevuse = NULL; - + return ref; } } +/** Return a ref to a constant/literal value */ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, const GLfloat *constval ) { @@ -142,7 +147,7 @@ static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, */ c->constref[i].constval = *constval; c->constref[i].ref = get_param_ref(c, constval); - + return c->constref[i].ref; } else { @@ -187,7 +192,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, /* There's something really hokey about parameters parsed in * arb programs - they all end up in here, whether they be - * state values, paramters or constants. This duplicates the + * state values, parameters or constants. This duplicates the * structure above & also seems to subvert the limits set for * each type of constant/param. */ @@ -198,7 +203,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, */ ref = get_const_ref(c, &plist->ParameterValues[idx][component]); break; - + case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: /* These may change from run to run: @@ -229,14 +234,13 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, - /*********************************************************************** * Straight translation to internal instruction format */ static void pass0_set_dst( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, GLuint writemask ) { const struct prog_dst_register *dst = &inst->DstReg; @@ -245,44 +249,14 @@ static void pass0_set_dst( struct brw_wm_compile *c, for (i = 0; i < 4; i++) { if (writemask & (1<<i)) { out->dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); } } - - out->writemask = writemask; -} - -static void pass0_set_dst_scalar( struct brw_wm_compile *c, - struct brw_wm_instruction *out, - const struct prog_instruction *inst, - GLuint writemask ) -{ - if (writemask) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint i; - - /* Compute only the first (X) value: - */ - out->writemask = WRITEMASK_X; - out->dst[0] = get_value(c); - - /* Update our tracking register file for all the components in - * writemask: - */ - for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[0]); - } - } - } - else - out->writemask = 0; + out->writemask = writemask; } - static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct prog_src_register src, GLuint i ) @@ -292,14 +266,13 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, static const GLfloat const_zero = 0.0; static const GLfloat const_one = 1.0; - if (component == SWIZZLE_ZERO) src_ref = get_const_ref(c, &const_zero); else if (component == SWIZZLE_ONE) src_ref = get_const_ref(c, &const_one); else src_ref = pass0_get_reg(c, src.File, src.Index, component); - + return src_ref; } @@ -311,19 +284,19 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, { const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); struct brw_wm_ref *newref = get_ref(c); - + newref->value = ref->value; newref->hw_reg = ref->hw_reg; - if (insn) { + if (insn) { newref->insn = insn - c->instruction; newref->prevuse = newref->value->lastuse; newref->value->lastuse = newref; } - if (src.NegateBase & (1<<i)) + if (src.Negate & (1 << i)) newref->hw_reg.negate ^= 1; - + if (src.Abs) { newref->hw_reg.negate = 0; newref->hw_reg.abs = 1; @@ -333,9 +306,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, } - -static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); GLuint writemask = inst->DstReg.WriteMask; @@ -348,8 +321,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; - out->eot = inst->Sampler & 1; - out->target = inst->Sampler>>1; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; /* Args: */ @@ -361,12 +335,7 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, /* Dst: */ - if (brw_wm_is_scalar_result(out->opcode)) - pass0_set_dst_scalar(c, out, inst, writemask); - else - pass0_set_dst(c, out, inst, writemask); - - return out; + pass0_set_dst(c, out, inst, writemask); } @@ -379,14 +348,22 @@ static void pass0_precalc_mov( struct brw_wm_compile *c, { const struct prog_dst_register *dst = &inst->DstReg; GLuint writemask = inst->DstReg.WriteMask; + struct brw_wm_ref *refs[4]; GLuint i; /* Get the effect of a MOV by manipulating our register table: + * First get all refs, then assign refs. This ensures that "in-place" + * swizzles such as: + * MOV t, t.xxyx + * are handled correctly. Previously, these two steps were done in + * one loop and the above case was incorrectly handled. */ for (i = 0; i < 4; i++) { - if (writemask & (1<<i)) { - pass0_set_fpreg_ref( c, dst->File, dst->Index, i, - get_new_ref(c, inst->SrcReg[0], i, NULL)); + refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL); + } + for (i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]); } } } @@ -418,6 +395,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) &c->payload.input_interp[i] ); } + /*********************************************************************** * PASS 0 * @@ -440,7 +418,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_fp_insns; insn++) { const struct prog_instruction *inst = &c->prog_instructions[insn]; - /* Optimize away moves, otherwise emit translated instruction: */ switch (inst->Opcode) { @@ -453,8 +430,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) translate_insn(c, inst); } break; - - default: translate_insn(c, inst); break; @@ -465,4 +440,3 @@ void brw_wm_pass0( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass0"); } } - diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index f6f3a38e9e..b449394029 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -58,7 +58,8 @@ static void unlink_ref(struct brw_wm_ref *ref) if (ref == value->lastuse) { value->lastuse = ref->prevuse; - } else { + } + else { struct brw_wm_ref *i = value->lastuse; while (i->prevuse != ref) i = i->prevuse; i->prevuse = ref->prevuse; @@ -75,8 +76,9 @@ static void track_arg(struct brw_wm_compile *c, for (i = 0; i < 4; i++) { struct brw_wm_ref *ref = inst->src[arg][i]; if (ref) { - if (readmask & (1<<i)) + if (readmask & (1<<i)) { ref->value->contributes_to_output = 1; + } else { unlink_ref(ref); inst->src[arg][i] = NULL; @@ -88,15 +90,21 @@ static void track_arg(struct brw_wm_compile *c, static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { - case TEXTURE_1D_INDEX: return WRITEMASK_X; - case TEXTURE_2D_INDEX: return WRITEMASK_XY; - case TEXTURE_3D_INDEX: return WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: return WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: return WRITEMASK_XY; + case TEXTURE_1D_INDEX: + return WRITEMASK_X; + case TEXTURE_2D_INDEX: + return WRITEMASK_XY; + case TEXTURE_3D_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_CUBE_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_RECT_INDEX: + return WRITEMASK_XY; default: return 0; } } + /* Step two: Basically this is dead code elimination. * * Iterate backwards over instructions, noting which values @@ -151,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SWZ: + case OPCODE_TRUNC: read0 = writemask; break; @@ -169,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read1 = writemask; break; + case OPCODE_DDX: + case OPCODE_DDY: + read0 = writemask; + break; + case OPCODE_MAD: case OPCODE_CMP: case OPCODE_LRP: @@ -202,9 +216,10 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_TEX: + case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; @@ -259,7 +274,8 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: - case OPCODE_TXP: + case WM_FRONTFACING: + case OPCODE_KIL_NV: default: break; } @@ -273,6 +289,3 @@ void brw_wm_pass1( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass1"); } } - - - diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 6fca9ad220..6faea018fb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,8 +69,6 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - struct brw_context *brw = c->func.brw; - GLuint inputs = (brw->vs.prog_data->outputs_written & DO_SETUP_BITS); GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -84,18 +82,22 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) - if (inputs & (1<<j)) { - /* index for vs output and ps input are not the same - in shader varying */ - GLuint index; - if (j > FRAG_ATTRIB_VAR0) - index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + for (j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (c->key.vp_outputs_written & (1<<j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; else - index = j; + fp_index = -1; + nr_interp_regs++; - prealloc_reg(c, &c->payload.input_interp[index], i++); + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); } + } assert(nr_interp_regs >= 1); @@ -120,7 +122,7 @@ static void update_register_usage(struct brw_wm_compile *c, /* Only search those which can change: */ if (grf->nextuse < thisinsn) { - struct brw_wm_ref *ref = grf->value->lastuse; + const struct brw_wm_ref *ref = grf->value->lastuse; /* Has last use of value been passed? */ @@ -148,7 +150,7 @@ static void spill_value(struct brw_wm_compile *c, /* Allocate a spill slot. Note that allocations start from 0x40 - * the first slot is reserved to mean "undef" in brw_wm_emit.c */ - if (!value->spill_slot) { + if (!value->spill_slot) { c->last_scratch += 0x40; value->spill_slot = c->last_scratch; } @@ -189,7 +191,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, if (grf[i+j].nextuse < group_nextuse) group_nextuse = grf[i+j].nextuse; } - + if (group_nextuse > furthest) { furthest = group_nextuse; reg = i; @@ -197,7 +199,7 @@ static GLuint search_contiguous_regs(struct brw_wm_compile *c, } assert(furthest != thisinsn); - + /* Any non-empty regs will need to be spilled: */ for (j = 0; j < nr; j++) @@ -243,7 +245,7 @@ static void alloc_contiguous_dest(struct brw_wm_compile *c, static void load_args(struct brw_wm_compile *c, struct brw_wm_instruction *inst) -{ +{ GLuint thisinsn = inst - c->instruction; GLuint i,j; @@ -258,17 +260,17 @@ static void load_args(struct brw_wm_compile *c, * register allocation and mark the ref as requiring a fill. */ GLuint reg = search_contiguous_regs(c, 1, thisinsn); - + c->pass2_grf[reg].value = ref->value; c->pass2_grf[reg].nextuse = thisinsn; - + ref->value->resident = &c->pass2_grf[reg]; /* Note that a fill is required: */ ref->unspill_reg = reg*2; } - + /* Adjust the hw_reg to point at the value's current location: */ assert(ref->value == ref->value->resident->value); @@ -294,7 +296,7 @@ void brw_wm_pass2( struct brw_wm_compile *c ) for (insn = 0; insn < c->nr_insns; insn++) { struct brw_wm_instruction *inst = &c->instruction[insn]; - + /* Update registers' nextuse values: */ update_register_usage(c, insn); @@ -322,11 +324,11 @@ void brw_wm_pass2( struct brw_wm_compile *c ) break; } - if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { for (i = 0; i < 4; i++) if (inst->dst[i]) spill_value(c, inst->dst[i]); - + } } if (INTEL_DEBUG & DEBUG_WM) { @@ -339,6 +341,3 @@ void brw_wm_pass2( struct brw_wm_compile *c ) brw_wm_print_program(c, "pass2/done"); } } - - - diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 8c9cb78945..dff466587a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -95,6 +95,7 @@ struct wm_sampler_key { int sampler_count; struct wm_sampler_entry { + GLenum tex_target; GLenum wrap_r, wrap_s, wrap_t; float maxlod, minlod; float lod_bias; @@ -102,6 +103,10 @@ struct wm_sampler_key { GLenum minfilter, magfilter; GLenum comparemode, comparefunc; dri_bo *sdc_bo; + + /** If target is cubemap, take context setting. + */ + GLboolean seamless_cube_map; } sampler[BRW_MAX_TEX_UNIT]; }; @@ -151,7 +156,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MAX2((key->max_aniso - 2) / 2, + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, BRW_ANISORATIO_16); } } @@ -172,15 +177,29 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - /* Fulsim complains if I don't do this. Hardware doesn't mind: + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ -#if 0 - if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + if (key->tex_target == GL_TEXTURE_CUBE_MAP) { + if (key->seamless_cube_map && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; } -#endif + /* Set shadow function: */ @@ -215,24 +234,31 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } + /** Sets up the cache key for sampler state for all texture units */ static void brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_key *key) { + GLcontext *ctx = &brw->intel.ctx; int unit; memset(key, 0, sizeof(*key)); for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (brw->attribs.Texture->Unit[unit]._ReallyEnabled) { + if (ctx->Texture.Unit[unit]._ReallyEnabled) { struct wm_sampler_entry *entry = &key->sampler[unit]; - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(texObj); struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + entry->tex_target = texObj->Target; + + entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) + ? ctx->Texture.CubeMapSeamless : GL_FALSE; + entry->wrap_r = texObj->WrapR; entry->wrap_s = texObj->WrapS; entry->wrap_t = texObj->WrapT; @@ -274,6 +300,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, */ static void upload_wm_samplers( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; int i; @@ -317,7 +344,7 @@ static void upload_wm_samplers( struct brw_context *brw ) /* Emit SDC relocations */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (!brw->attribs.Texture->Unit[i]._ReallyEnabled) + if (!ctx->Texture.Unit[i]._ReallyEnabled) continue; dri_bo_emit_reloc(brw->wm.sampler_bo, diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 5302405eda..361f91292b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -60,7 +60,9 @@ struct brw_wm_unit_key { static void wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -69,7 +71,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -95,31 +99,43 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->sampler_count = brw->wm.sampler_count; /* _NEW_POLYGONSTIPPLE */ - key->polygon_stipple = brw->attribs.Polygon->StippleFlag; + key->polygon_stipple = ctx->Polygon.StippleFlag; /* BRW_NEW_FRAGMENT_PROGRAM */ key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_DEPTH_BUFFER + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->state.depth_region == NULL) + key->computes_depth = 0; /* _NEW_COLOR */ - key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled; - key->is_glsl = brw_wm_is_glsl(fp); + key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); - /* XXX: This needs a flag to indicate when it changes. */ + /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; /* _NEW_LINE */ - key->line_stipple = brw->attribs.Line->StippleFlag; + key->line_stipple = ctx->Line.StippleFlag; /* _NEW_POLYGON */ - key->offset_enable = brw->attribs.Polygon->OffsetFill; - key->offset_units = brw->attribs.Polygon->OffsetUnits; - key->offset_factor = brw->attribs.Polygon->OffsetFactor; + key->offset_enable = ctx->Polygon.OffsetFill; + key->offset_units = ctx->Polygon.OffsetUnits; + key->offset_factor = ctx->Polygon.OffsetFactor; } +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ static dri_bo * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, dri_bo **reloc_bufs) @@ -133,11 +149,15 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_buffer->offset >> 10; /* reloc */ + brw->wm.scratch_bo->offset >> 10; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -146,11 +166,15 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.thread3.urb_entry_read_offset = 0; - wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { /* reloc */ wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; @@ -215,7 +239,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_buffer); + brw->wm.scratch_bo); } /* Emit sampler state relocation */ @@ -246,20 +270,20 @@ static void upload_wm_unit( struct brw_context *brw ) if (key.total_scratch) { GLuint total = key.total_scratch * key.max_threads; - if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) { - dri_bo_unreference(brw->wm.scratch_buffer); - brw->wm.scratch_buffer = NULL; + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { + dri_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; } - if (brw->wm.scratch_buffer == NULL) { - brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); } } reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; dri_bo_unreference(brw->wm.state_bo); @@ -277,11 +301,13 @@ const struct brw_tracked_state brw_wm_unit = { .mesa = (_NEW_POLYGON | _NEW_POLYGONSTIPPLE | _NEW_LINE | - _NEW_COLOR), + _NEW_COLOR | + _NEW_DEPTH), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_DEPTH_BUFFER | + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 06e71e6d69..51539ac1e7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -33,11 +33,12 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" - +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" @@ -69,7 +70,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +91,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -133,13 +141,34 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) case MESA_FORMAT_RGBA_DXT5: return BRW_SURFACEFORMAT_BC3_UNORM; - case MESA_FORMAT_SRGBA8: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case MESA_FORMAT_SARGB8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case MESA_FORMAT_SLA8: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case MESA_FORMAT_SL8: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + case MESA_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; case MESA_FORMAT_S8_Z24: - return BRW_SURFACEFORMAT_I24X8_UNORM; + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; default: assert(0); @@ -147,17 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) } } -struct brw_wm_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -179,7 +197,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -188,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -232,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -253,10 +273,11 @@ static void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) { struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -267,6 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; @@ -282,33 +304,221 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); + } +} + + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static void +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; + + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = fp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; +} + +/** + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. + */ +static void prepare_wm_constant_surface(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; } + + brw_update_wm_constant_surface(ctx, surf); } +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ static void -brw_update_region_surface(struct brw_context *brw, struct intel_region *region, - unsigned int unit, GLboolean cached) +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit) { + GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; struct { unsigned int surface_type; unsigned int surface_format; - unsigned int width, height, cpp; + unsigned int width, height, pitch, cpp; GLubyte color_mask[4]; GLboolean color_blend; uint32_t tiling; + uint32_t draw_offset; } key; memset(&key, 0, sizeof(key)); @@ -317,34 +527,54 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - if (region->cpp == 4) + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - else + break; + case MESA_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } key.tiling = region->tiling; - key.width = region->pitch; /* XXX: not really! */ - key.height = region->height; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } + key.pitch = region->pitch; key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = 0; + key.tiling = I915_TILING_X; key.width = 1; key.height = 1; key.cpp = 4; + key.draw_offset = 0; } - memcpy(key.color_mask, brw->attribs.Color->ColorMask, + memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); - key.color_blend = (!brw->attribs.Color->_LogicOpEnabled && - brw->attribs.Color->BlendEnabled); + key.color_blend = (!ctx->Color._LogicOpEnabled && + ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -353,13 +583,34 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } if (region_bo != NULL) - surf.ss1.base_addr = region_bo->offset; /* reloc */ + surf.ss1.base_addr += region_bo->offset; /* reloc */ surf.ss2.width = key.width - 1; surf.ss2.height = key.height - 1; brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.width * key.cpp) - 1; + surf.ss3.pitch = (key.pitch * key.cpp) - 1; /* _NEW_COLOR */ surf.ss0.color_blend = key.color_blend; @@ -369,8 +620,9 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -379,13 +631,12 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * them both. We might be able to figure out from other state * a more restrictive relocation to emit. */ - dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_SAMPLER, - I915_GEM_DOMAIN_RENDER, - 0, - offsetof(struct brw_surface_state, ss1), - region_bo); + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + offsetof(struct brw_surface_state, ss1), + region_bo, + surf.ss1.base_addr - region_bo->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } } } @@ -400,7 +651,9 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -416,7 +669,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -446,51 +699,62 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_draw_regions > 1) { - for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, - GL_FALSE); + /* _NEW_BUFFERS */ + /* Update surfaces for drawing buffers */ + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i); } - }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + } else { + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + + /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[i]; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const GLuint surf = SURF_INDEX_TEXTURE(i); /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ - if(texUnit->_ReallyEnabled) { + if (texUnit->_ReallyEnabled) { if (texUnit->_Current == intel->frame_buffer_texobj) { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + /* render to texture */ + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[surf]); + brw->wm.nr_surfaces = surf + 1; } else { + /* regular texture */ brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1; + brw->wm.nr_surfaces = surf + 1; } } else { - dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[i+MAX_DRAW_BUFFERS] = NULL; + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; } - } dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, .prepare = prepare_wm_surfaces, diff --git a/src/mesa/drivers/dri/i965/intel_clear.c b/src/mesa/drivers/dri/i965/intel_clear.c new file mode 120000 index 0000000000..9a2a742a0d --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_clear.c @@ -0,0 +1 @@ +../intel/intel_clear.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_depthstencil.c b/src/mesa/drivers/dri/i965/intel_depthstencil.c deleted file mode 120000 index 4ac4ae690a..0000000000 --- a/src/mesa/drivers/dri/i965/intel_depthstencil.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_depthstencil.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c new file mode 120000 index 0000000000..a2f3e8cd20 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -0,0 +1 @@ +../intel/intel_extensions.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c new file mode 120000 index 0000000000..4c6b37ada0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c new file mode 120000 index 0000000000..cc4589f4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -0,0 +1 @@ +../intel/intel_pixel_read.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c index 67ef5f78c1..519672fc35 100644..120000 --- a/src/mesa/drivers/dri/i965/intel_state.c +++ b/src/mesa/drivers/dri/i965/intel_state.c @@ -1,225 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "main/colormac.h" -#include "main/dd.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_regions.h" -#include "swrast/swrast.h" - -int intel_translate_shadow_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_ALWAYS; - case GL_LESS: - return COMPAREFUNC_LEQUAL; - case GL_LEQUAL: - return COMPAREFUNC_LESS; - case GL_GREATER: - return COMPAREFUNC_GEQUAL; - case GL_GEQUAL: - return COMPAREFUNC_GREATER; - case GL_NOTEQUAL: - return COMPAREFUNC_EQUAL; - case GL_EQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_ALWAYS: - return COMPAREFUNC_NEVER; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_NEVER; -} - -int intel_translate_compare_func( GLenum func ) -{ - switch(func) { - case GL_NEVER: - return COMPAREFUNC_NEVER; - case GL_LESS: - return COMPAREFUNC_LESS; - case GL_LEQUAL: - return COMPAREFUNC_LEQUAL; - case GL_GREATER: - return COMPAREFUNC_GREATER; - case GL_GEQUAL: - return COMPAREFUNC_GEQUAL; - case GL_NOTEQUAL: - return COMPAREFUNC_NOTEQUAL; - case GL_EQUAL: - return COMPAREFUNC_EQUAL; - case GL_ALWAYS: - return COMPAREFUNC_ALWAYS; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); - return COMPAREFUNC_ALWAYS; -} - -int intel_translate_stencil_op( GLenum op ) -{ - switch(op) { - case GL_KEEP: - return STENCILOP_KEEP; - case GL_ZERO: - return STENCILOP_ZERO; - case GL_REPLACE: - return STENCILOP_REPLACE; - case GL_INCR: - return STENCILOP_INCRSAT; - case GL_DECR: - return STENCILOP_DECRSAT; - case GL_INCR_WRAP: - return STENCILOP_INCR; - case GL_DECR_WRAP: - return STENCILOP_DECR; - case GL_INVERT: - return STENCILOP_INVERT; - default: - return STENCILOP_ZERO; - } -} - -int intel_translate_blend_factor( GLenum factor ) -{ - switch(factor) { - case GL_ZERO: - return BLENDFACT_ZERO; - case GL_SRC_ALPHA: - return BLENDFACT_SRC_ALPHA; - case GL_ONE: - return BLENDFACT_ONE; - case GL_SRC_COLOR: - return BLENDFACT_SRC_COLR; - case GL_ONE_MINUS_SRC_COLOR: - return BLENDFACT_INV_SRC_COLR; - case GL_DST_COLOR: - return BLENDFACT_DST_COLR; - case GL_ONE_MINUS_DST_COLOR: - return BLENDFACT_INV_DST_COLR; - case GL_ONE_MINUS_SRC_ALPHA: - return BLENDFACT_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BLENDFACT_DST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return BLENDFACT_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: - return BLENDFACT_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return BLENDFACT_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_COLOR: - return BLENDFACT_INV_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return BLENDFACT_CONST_ALPHA; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return BLENDFACT_INV_CONST_ALPHA; - } - - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); - return BLENDFACT_ZERO; -} - -int intel_translate_logic_op( GLenum opcode ) -{ - switch(opcode) { - case GL_CLEAR: - return LOGICOP_CLEAR; - case GL_AND: - return LOGICOP_AND; - case GL_AND_REVERSE: - return LOGICOP_AND_RVRSE; - case GL_COPY: - return LOGICOP_COPY; - case GL_COPY_INVERTED: - return LOGICOP_COPY_INV; - case GL_AND_INVERTED: - return LOGICOP_AND_INV; - case GL_NOOP: - return LOGICOP_NOOP; - case GL_XOR: - return LOGICOP_XOR; - case GL_OR: - return LOGICOP_OR; - case GL_OR_INVERTED: - return LOGICOP_OR_INV; - case GL_NOR: - return LOGICOP_NOR; - case GL_EQUIV: - return LOGICOP_EQUIV; - case GL_INVERT: - return LOGICOP_INV; - case GL_OR_REVERSE: - return LOGICOP_OR_RVRSE; - case GL_NAND: - return LOGICOP_NAND; - case GL_SET: - return LOGICOP_SET; - default: - return LOGICOP_SET; - } -} - - -static void intelClearColor(GLcontext *ctx, const GLfloat color[4]) -{ - struct intel_context *intel = intel_context(ctx); - - UNCLAMPED_FLOAT_TO_RGBA_CHAN(intel->clear_chan, color); - - intel->ClearColor8888 = INTEL_PACKCOLOR8888(intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2], - intel->clear_chan[3]); - intel->ClearColor565 = INTEL_PACKCOLOR565(intel->clear_chan[0], - intel->clear_chan[1], - intel->clear_chan[2]); -} - - - -/* Fallback to swrast for select and feedback. - */ -static void intelRenderMode( GLcontext *ctx, GLenum mode ) -{ - struct intel_context *intel = intel_context(ctx); - FALLBACK( intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER) ); -} - - -void intelInitStateFuncs( struct dd_function_table *functions ) -{ - functions->RenderMode = intelRenderMode; - functions->ClearColor = intelClearColor; -} +../intel/intel_state.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_swapbuffers.c b/src/mesa/drivers/dri/i965/intel_swapbuffers.c new file mode 120000 index 0000000000..148d5215aa --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_swapbuffers.c @@ -0,0 +1 @@ +../intel/intel_swapbuffers.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c new file mode 120000 index 0000000000..0b2e56ab24 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c @@ -0,0 +1 @@ +../intel/intel_syncobj.c
\ No newline at end of file |