diff options
Diffstat (limited to 'src/mesa/drivers')
68 files changed, 1025 insertions, 381 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index ebfaa2f07b..ca5eb5c755 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -47,6 +47,9 @@ #if FEATURE_ARB_sync #include "main/syncobj.h" #endif +#if FEATURE_EXT_transform_feedback +#include "main/transformfeedback.h" +#endif #include "shader/program.h" #include "shader/shader_api.h" @@ -178,14 +181,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->TexParameter = NULL; driver->Viewport = NULL; - /* state queries */ - driver->GetBooleanv = NULL; - driver->GetDoublev = NULL; - driver->GetFloatv = NULL; - driver->GetIntegerv = NULL; - driver->GetInteger64v = NULL; - driver->GetPointerv = NULL; - /* buffer objects */ _mesa_init_buffer_object_functions(driver); @@ -213,6 +208,10 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->DeleteArrayObject = _mesa_delete_array_object; driver->BindArrayObject = NULL; +#if FEATURE_EXT_transform_feedback + _mesa_init_transform_feedback_functions(driver); +#endif + /* T&L stuff */ driver->NeedValidate = GL_FALSE; driver->ValidateTnlModule = NULL; diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 4b9a0c1786..8cb25439e4 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -26,6 +26,7 @@ SHARED_INCLUDES = \ -I$(TOP)/src/mesa/drivers/dri/common \ -Iserver \ -I$(TOP)/include \ + -I$(TOP)/src/mapi \ -I$(TOP)/src/mesa \ -I$(TOP)/src/egl/main \ -I$(TOP)/src/egl/drivers/dri \ diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 360c524754..c3d1f2c454 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -736,6 +736,8 @@ setupLoaderExtensions(__DRIscreen *psp, psp->dri2.loader = (__DRIdri2LoaderExtension *) extensions[i]; if (strcmp(extensions[i]->name, __DRI_IMAGE_LOOKUP) == 0) psp->dri2.image = (__DRIimageLookupExtension *) extensions[i]; + if (strcmp(extensions[i]->name, __DRI_USE_INVALIDATE) == 0) + psp->dri2.useInvalidate = (__DRIuseInvalidateExtension *) extensions[i]; } } diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index ab6c6e57af..e4c590b132 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -527,6 +527,7 @@ struct __DRIscreenRec { int enabled; __DRIdri2LoaderExtension *loader; __DRIimageLookupExtension *image; + __DRIuseInvalidateExtension *useInvalidate; } dri2; /* The lock actually in use, old sarea or DRI2 */ diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 3b9b3ae329..38e524e183 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -453,8 +453,6 @@ i830Viewport(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height) { intelCalcViewport(ctx); - - intel_viewport(ctx, x, y, width, height); } diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 91b228d52b..26d387f383 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -394,8 +394,6 @@ i915Viewport(GLcontext * ctx, GLint x, GLint y, GLsizei width, GLsizei height) { intelCalcViewport(ctx); - - intel_viewport(ctx, x, y, width, height); } diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index 9449a158dc..7aecf68e4a 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -1208,7 +1208,7 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode) if (mode) { intel->Fallback |= bit; if (oldfallback == 0) { - intelFlush(ctx); + intel_flush(ctx); if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "ENTER FALLBACK %x: %s\n", bit, getFallbackString(bit)); diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 029a16500b..49ef859e45 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -42,7 +42,6 @@ #include "brw_state.h" #include "brw_clip.h" - #define FRONT_UNFILLED_BIT 0x1 #define BACK_UNFILLED_BIT 0x2 @@ -127,6 +126,14 @@ static void compile_clip_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_CLIP) { + printf("clip:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index d71bac7f61..68222c6c27 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -114,8 +114,6 @@ struct brw_clip_compile { GLboolean need_direction; - GLuint last_mrf; - GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; }; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index b27fe654ca..916a99ea00 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -177,7 +177,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; + struct brw_instruction *is_poly, *is_trifan; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); @@ -195,8 +195,22 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) is_poly = brw_ELSE(p, is_poly); { if (c->key.pv_first) { - brw_clip_copy_colors(c, 1, 0); - brw_clip_copy_colors(c, 2, 0); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRIFAN)); + is_trifan = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 0, 1); + brw_clip_copy_colors(c, 2, 1); + } + is_trifan = brw_ELSE(p, is_trifan); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + brw_ENDIF(p, is_trifan); } else { brw_clip_copy_colors(c, 0, 2); diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 34a966a47a..2148bc8244 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -211,27 +211,14 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, GLuint header) { struct brw_compile *p = &c->func; - GLuint start = c->last_mrf; brw_clip_ff_sync(c); assert(!(allocate && eot)); - - /* Cycle through mrf regs - probably futile as we have to wait for - * the allocation response anyway. Also, the order this function - * is invoked doesn't correspond to the order the instructions will - * be executed, so it won't have any effect in many cases. - */ -#if 0 - if (start + c->nr_regs + 1 >= MAX_MRF) - start = 0; - c->last_mrf = start + c->nr_regs + 1; -#endif - /* Copy the vertex from vertn into m1..mN+1: */ - brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs); /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: @@ -247,7 +234,7 @@ void brw_clip_emit_vue(struct brw_clip_compile *c, */ brw_urb_WRITE(p, allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), - start, + 0, c->reg.R0, allocate, 1, /* used */ @@ -370,18 +357,13 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) need_ff_sync = brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } brw_ENDIF(p, need_ff_sync); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 523a11aea3..dc4bd5802d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -64,8 +64,6 @@ static void brwInitDriverFunctions( struct dd_function_table *functions ) brwInitFragProgFuncs( functions ); brwInitProgFuncs( functions ); brw_init_queryobj_functions(functions); - - functions->Viewport = intel_viewport; } GLboolean brwCreateContext( int api, @@ -194,8 +192,6 @@ GLboolean brwCreateContext( int api, ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1f09651126..a97fcb0f4d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -418,18 +418,12 @@ struct brw_vertex_info { struct brw_query_object { struct gl_query_object Base; - /** Doubly linked list of active query objects in the context. */ - struct brw_query_object *prev, *next; - /** Last query BO associated with this query. */ dri_bo *bo; /** First index in bo with query data for this object. */ int first_index; /** Last index in bo with query data for this object. */ int last_index; - - /* Total count of pixels from previous BOs */ - unsigned int count; }; @@ -664,7 +658,7 @@ struct brw_context } cc; struct { - struct brw_query_object active_head; + struct brw_query_object *obj; dri_bo *bo; int index; GLboolean active; @@ -726,7 +720,7 @@ void brw_upload_urb_fence(struct brw_context *brw); void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst); +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f26a13fc3c..2d3556b805 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -783,7 +783,7 @@ #define CMD_BINDING_TABLE_PTRS 0x7801 # define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) # define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) -# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) #define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index db3fc50a63..ff12daf497 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -323,6 +323,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -774,7 +779,7 @@ static int src1 (FILE *file, struct brw_instruction *inst) } } -int brw_disasm (FILE *file, struct brw_instruction *inst) +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -829,12 +834,20 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) } if (inst->header.opcode == BRW_OPCODE_SEND) { + int target; + + if (gen >= 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + newline (file); pad (file, 16); space = 0; err |= control (file, "target function", target_function, - inst->bits3.generic.msg_target, &space); - switch (inst->bits3.generic.msg_target) { + target, &space); + + switch (target) { case BRW_MESSAGE_TARGET_MATH: err |= control (file, "math function", math_function, inst->bits3.math.function, &space); @@ -864,8 +877,17 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) inst->bits3.dp_write.send_commit_msg); break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index e348d4686b..fe633d3e25 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -77,32 +77,41 @@ static const GLenum reduced_prim[GL_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +static GLuint brw_set_prim(struct brw_context *brw, + const struct _mesa_prim *prim) { GLcontext *ctx = &brw->intel.ctx; + GLenum mode = prim->mode; if (INTEL_DEBUG & DEBUG_PRIMS) - printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); - + printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); + /* Slight optimization to avoid the GS program when not needed: */ - if (prim == GL_QUAD_STRIP && + if (mode == GL_QUAD_STRIP && ctx->Light.ShadeModel != GL_FLAT && ctx->Polygon.FrontMode == GL_FILL && ctx->Polygon.BackMode == GL_FILL) - prim = GL_TRIANGLE_STRIP; + mode = GL_TRIANGLE_STRIP; + + if (prim->mode == GL_QUADS && prim->count == 4 && + ctx->Light.ShadeModel != GL_FLAT && + ctx->Polygon.FrontMode == GL_FILL && + ctx->Polygon.BackMode == GL_FILL) { + mode = GL_TRIANGLE_FAN; + } - if (prim != brw->primitive) { - brw->primitive = prim; + if (mode != brw->primitive) { + brw->primitive = mode; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - if (reduced_prim[prim] != brw->intel.reduced_primitive) { - brw->intel.reduced_primitive = reduced_prim[prim]; + if (reduced_prim[mode] != brw->intel.reduced_primitive) { + brw->intel.reduced_primitive = reduced_prim[mode]; brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } } - return prim_to_hw_prim[prim]; + return prim_to_hw_prim[mode]; } @@ -351,7 +360,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, */ intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); - hw_prim = brw_set_prim(brw, prim[i].mode); + hw_prim = brw_set_prim(brw, &prim[i]); if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { first_time = GL_FALSE; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 8247faa36d..9cbff24863 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -59,7 +59,7 @@ static GLuint half_float_types[5] = { 0, BRW_SURFACEFORMAT_R16_FLOAT, BRW_SURFACEFORMAT_R16G16_FLOAT, - 0, /* can't seem to render this one */ + BRW_SURFACEFORMAT_R16G16B16A16_FLOAT, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 4f55158e8f..3a32ad26c1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -822,13 +822,8 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle); + GLboolean eot); void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 785d382a00..175899b026 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -280,28 +280,23 @@ static void brw_set_math_message( struct brw_context *brw, } -static void brw_set_ff_sync_message( struct brw_context *brw, - struct brw_instruction *insn, - GLboolean allocate, - GLboolean used, - GLuint msg_length, - GLuint response_length, - GLboolean end_of_thread, - GLboolean complete, - GLuint offset, - GLuint swizzle_control ) +static void brw_set_ff_sync_message(struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLuint response_length, + GLboolean end_of_thread) { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_gen5.opcode = 1; - insn->bits3.urb_gen5.offset = offset; - insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; - insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.header_present = 1; - insn->bits3.urb_gen5.response_length = response_length; - insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ + insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; insn->bits2.send_gen5.end_of_thread = end_of_thread; @@ -1451,18 +1446,11 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle) + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); - brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1470,13 +1458,8 @@ void brw_ff_sync(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, - insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); + insn, + allocate, + response_length, + eot); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 4b13494ecf..94d93f3aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -122,6 +122,16 @@ static void compile_gs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_GS) { + int i; + + printf("gs:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->gs.prog_bo); @@ -163,6 +173,12 @@ static void populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); + if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) { + /* Provide consistent primitive order with brw_set_prim's + * optimization of single quads to trifans. + */ + key->pv_first = GL_TRUE; + } key->need_gs_prog = (key->hint_gs_always || brw->primitive == GL_QUADS || diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index dd7b057d62..99a6f6be11 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -104,18 +104,13 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 6cce7e5089..3f47a68049 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -38,7 +38,6 @@ * required for handling queries, so that we can be sure that we won't * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. */ -#include "main/simple_list.h" #include "main/imports.h" #include "brw_context.h" @@ -105,7 +104,7 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q) query->first_index = -1; query->last_index = -1; - insert_at_head(&brw->query.active_head, query); + brw->query.obj = query; intel->stats_wm++; } @@ -131,7 +130,7 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q) brw->query.bo = NULL; } - remove_from_list(query); + brw->query.obj = NULL; intel->stats_wm--; } @@ -161,7 +160,7 @@ brw_prepare_query_begin(struct brw_context *brw) struct intel_context *intel = &brw->intel; /* Skip if we're not doing any queries. */ - if (is_empty_list(&brw->query.active_head)) + if (!brw->query.obj) return; /* Get a new query BO if we're going to need it. */ @@ -182,10 +181,10 @@ void brw_emit_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_query_object *query; + struct brw_query_object *query = brw->query.obj; /* Skip if we're not doing any queries, or we've emitted the start. */ - if (brw->query.active || is_empty_list(&brw->query.active_head)) + if (!query || brw->query.active) return; BEGIN_BATCH(4); @@ -205,16 +204,14 @@ brw_emit_query_begin(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); - foreach(query, &brw->query.active_head) { - if (query->bo != brw->query.bo) { - if (query->bo != NULL) - brw_queryobj_get_results(query); - dri_bo_reference(brw->query.bo); - query->bo = brw->query.bo; - query->first_index = brw->query.index; - } - query->last_index = brw->query.index; + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + dri_bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; } + query->last_index = brw->query.index; brw->query.active = GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 57d1c29ade..b0dd1ff3af 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,6 +46,7 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -107,6 +108,14 @@ static void compile_sf_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_SF) { + printf("sf:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->sf.prog_bo); @@ -154,6 +163,7 @@ static void upload_sf_prog(struct brw_context *brw) break; } + /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; if (key.do_point_sprite) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 44b085e214..57ffb2d89e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -48,6 +48,7 @@ static void do_vs_prog( struct brw_context *brw, const GLuint *program; struct brw_vs_compile c; int aux_size; + int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -63,6 +64,17 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); + } + if (0) _mesa_print_program(&c.vp->program.Base); @@ -106,6 +118,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + int i; memset(&key, 0, sizeof(key)); @@ -117,6 +130,14 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + /* _NEW_POINT */ + if (ctx->Point.PointSprite) { + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_coord_replace |= (1 << i); + } + } + /* Make an early check for the key. */ dri_bo_unreference(brw->vs.prog_bo); @@ -135,7 +156,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 95e0501b1e..6493744f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -43,7 +43,7 @@ struct brw_vs_prog_key { GLuint program_string_id; GLuint nr_userclip:4; GLuint copy_edgeflag:1; - GLuint pad:26; + GLuint point_coord_replace:8; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dc6ab81c4a..0b44deeb63 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1882,7 +1882,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 375e795391..323cfac8fa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1717,7 +1717,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], p->brw->intel.gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 88b885cb94..fe3c89b721 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -2111,7 +2111,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (INTEL_DEBUG & DEBUG_WM) { printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 9768b0deee..ca8e344836 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -130,8 +130,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; - if (!intel->using_dri2_swapbuffers && - intel->first_post_swapbuffers_batch == NULL) { + if (intel->first_post_swapbuffers_batch == NULL) { intel->first_post_swapbuffers_batch = intel->batch->buf; drm_intel_bo_reference(intel->first_post_swapbuffers_batch); } diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 7d9f302dca..a590c799ad 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -353,6 +353,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(clear_val); ADVANCE_BATCH(); + if (intel->always_flush_cache) + intel_batchbuffer_emit_mi_flush(intel->batch); + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); else diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 103aaf2b95..c38551bf95 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -277,7 +277,7 @@ intel_bufferobj_map(GLcontext * ctx, /* Flush any existing batchbuffer that might reference this data. */ if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) - intelFlush(ctx); + intel_flush(ctx); if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); @@ -349,7 +349,7 @@ intel_bufferobj_map_range(GLcontext * ctx, */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) - intelFlush(ctx); + intel_flush(ctx); if (intel_obj->buffer == NULL) { obj->Pointer = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 8ee9a292a1..05d4998654 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -177,6 +177,30 @@ intelGetString(GLcontext * ctx, GLenum name) } } +static void +intel_flush_front(GLcontext *ctx) +{ + struct intel_context *intel = intel_context(ctx); + __DRIcontext *driContext = intel->driContext; + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) { + if (screen->dri2.loader && + (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL) && + driContext->driDrawablePriv && + driContext->driDrawablePriv->loaderPrivate) { + (*screen->dri2.loader->flushFrontBuffer)(driContext->driDrawablePriv, + driContext->driDrawablePriv->loaderPrivate); + + /* We set the dirty bit in intel_prepare_render() if we're + * front buffer rendering once we get there. + */ + intel->front_buffer_dirty = GL_FALSE; + } + } +} + static unsigned intel_bits_per_pixel(const struct intel_renderbuffer *rb) { @@ -203,8 +227,10 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) * real front buffer contents will get copied to the new fake front * buffer. */ - if (intel->is_front_buffer_rendering) - intel_flush(&intel->ctx, GL_FALSE); + if (intel->is_front_buffer_rendering) { + intel_flush(&intel->ctx); + intel_flush_front(&intel->ctx); + } /* Set this up front, so that in case our buffers get invalidated * while we're getting new buffers, we don't clobber the stamp and @@ -362,7 +388,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) if (buffers[i].attachment == __DRI_BUFFER_DEPTH) depth_region = region; - intel_renderbuffer_set_region(rb, region); + intel_renderbuffer_set_region(intel, rb, region); intel_region_release(®ion); if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { @@ -374,7 +400,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) continue; intel_region_reference(&stencil_region, region); - intel_renderbuffer_set_region(rb, stencil_region); + intel_renderbuffer_set_region(intel, rb, stencil_region); intel_region_release(&stencil_region); } } @@ -383,6 +409,10 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) driUpdateFramebufferSize(&intel->ctx, drawable); } +/** + * intel_prepare_render should be called anywhere that curent read/drawbuffer + * state is required. + */ void intel_prepare_render(struct intel_context *intel) { @@ -403,16 +433,47 @@ intel_prepare_render(struct intel_context *intel) intel_update_renderbuffers(driContext, drawable); driContext->dri2.read_stamp = drawable->dri2.stamp; } + + /* If we're currently rendering to the front buffer, the rendering + * that will happen next will probably dirty the front buffer. So + * mark it as dirty here. + */ + if (intel->is_front_buffer_rendering) + intel->front_buffer_dirty = GL_TRUE; + + /* Wait for the swapbuffers before the one we just emitted, so we + * don't get too many swaps outstanding for apps that are GPU-heavy + * but not CPU-heavy. + * + * We're using intelDRI2Flush (called from the loader before + * swapbuffer) and glFlush (for front buffer rendering) as the + * indicator that a frame is done and then throttle when we get + * here as we prepare to render the next frame. At this point for + * round trips for swap/copy and getting new buffers are done and + * we'll spend less time waiting on the GPU. + * + * Unfortunately, we don't have a handle to the batch containing + * the swap, and getting our hands on that doesn't seem worth it, + * so we just us the first batch we emitted after the last swap. + */ + if (intel->need_throttle && intel->first_post_swapbuffers_batch) { + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + intel->need_throttle = GL_FALSE; + } } -void +static void intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) { struct intel_context *intel = intel_context(ctx); __DRIcontext *driContext = intel->driContext; - if (!intel->using_dri2_swapbuffers && - !intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { + if (intel->saved_viewport) + intel->saved_viewport(ctx, x, y, w, h); + + if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { dri2InvalidateDrawable(driContext->driDrawablePriv); dri2InvalidateDrawable(driContext->driReadablePriv); } @@ -431,12 +492,12 @@ static const struct dri_debug_control debug_control[] = { { "buf", DEBUG_BUFMGR}, { "reg", DEBUG_REGION}, { "fbo", DEBUG_FBO}, - { "lock", DEBUG_LOCK}, + { "gs", DEBUG_GS}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, + { "sf", DEBUG_SF }, { "san", DEBUG_SANITY }, { "sleep", DEBUG_SLEEP }, { "stats", DEBUG_STATS }, @@ -447,6 +508,7 @@ static const struct dri_debug_control debug_control[] = { { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, + { "clip", DEBUG_CLIP }, { NULL, 0 } }; @@ -469,10 +531,9 @@ intelInvalidateState(GLcontext * ctx, GLuint new_state) } void -intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) +intel_flush(GLcontext *ctx) { struct intel_context *intel = intel_context(ctx); - __DRIcontext *driContext = intel->driContext; if (intel->Fallback) _swrast_flush(ctx); @@ -482,35 +543,6 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush) if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); - - if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) { - __DRIscreen *const screen = intel->intelScreen->driScrnPriv; - - if (screen->dri2.loader && - (screen->dri2.loader->base.version >= 2) - && (screen->dri2.loader->flushFrontBuffer != NULL) && - driContext->driDrawablePriv && - driContext->driDrawablePriv->loaderPrivate) { - (*screen->dri2.loader->flushFrontBuffer)(driContext->driDrawablePriv, - driContext->driDrawablePriv->loaderPrivate); - - /* Only clear the dirty bit if front-buffer rendering is no longer - * enabled. This is done so that the dirty bit can only be set in - * glDrawBuffer. Otherwise the dirty bit would have to be set at - * each of N places that do rendering. This has worse performances, - * but it is much easier to get correct. - */ - if (!intel->is_front_buffer_rendering) { - intel->front_buffer_dirty = GL_FALSE; - } - } - } -} - -void -intelFlush(GLcontext * ctx) -{ - intel_flush(ctx, GL_FALSE); } static void @@ -518,26 +550,9 @@ intel_glFlush(GLcontext *ctx) { struct intel_context *intel = intel_context(ctx); - intel_flush(ctx, GL_TRUE); - - /* We're using glFlush as an indicator that a frame is done, which is - * what DRI2 does before calling SwapBuffers (and means we should catch - * people doing front-buffer rendering, as well).. - * - * Wait for the swapbuffers before the one we just emitted, so we don't - * get too many swaps outstanding for apps that are GPU-heavy but not - * CPU-heavy. - * - * Unfortunately, we don't have a handle to the batch containing the swap, - * and getting our hands on that doesn't seem worth it, so we just us the - * first batch we emitted after the last swap. - */ - if (!intel->using_dri2_swapbuffers && - intel->first_post_swapbuffers_batch != NULL) { - drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); - drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; - } + intel_flush(ctx); + intel_flush_front(ctx); + intel->need_throttle = GL_TRUE; } void @@ -546,7 +561,8 @@ intelFinish(GLcontext * ctx) struct gl_framebuffer *fb = ctx->DrawBuffer; int i; - intelFlush(ctx); + intel_flush(ctx); + intel_flush_front(ctx); for (i = 0; i < fb->_NumColorDrawBuffers; i++) { struct intel_renderbuffer *irb; @@ -602,6 +618,12 @@ intelInitContext(struct intel_context *intel, if (intelScreen->bufmgr == NULL) return GL_FALSE; + /* Can't rely on invalidate events, fall back to glViewport hack */ + if (!driContextPriv->driScreenPriv->dri2.useInvalidate) { + intel->saved_viewport = functions->Viewport; + functions->Viewport = intel_viewport; + } + if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx, functions, (void *) intel)) { printf("%s: failed to init mesa context\n", __FUNCTION__); @@ -886,6 +908,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv, driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); _mesa_make_current(&intel->ctx, fb, readFb); + + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer + * is NULL at that point. We can't call _mesa_makecurrent() + * first, since we need the buffer size for the initial + * viewport. So just call intel_draw_buffer() again here. */ + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 02bb4d0d64..04d5fc92a2 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -150,8 +150,8 @@ struct intel_context struct intel_batchbuffer *batch; drm_intel_bo *first_post_swapbuffers_batch; + GLboolean need_throttle; GLboolean no_batch_wrap; - GLboolean using_dri2_swapbuffers; struct { @@ -243,6 +243,8 @@ struct intel_context __DRIcontext *driContext; struct intel_screen *intelScreen; + void (*saved_viewport)(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height); /** * Configuration cache @@ -324,12 +326,12 @@ extern int INTEL_DEBUG; #define DEBUG_BUFMGR 0x200 #define DEBUG_REGION 0x400 #define DEBUG_FBO 0x800 -#define DEBUG_LOCK 0x1000 +#define DEBUG_GS 0x1000 #define DEBUG_SYNC 0x2000 #define DEBUG_PRIMS 0x4000 #define DEBUG_VERTS 0x8000 #define DEBUG_DRI 0x10000 -#define DEBUG_DMA 0x20000 +#define DEBUG_SF 0x20000 #define DEBUG_SANITY 0x40000 #define DEBUG_SLEEP 0x80000 #define DEBUG_STATS 0x100000 @@ -339,6 +341,7 @@ extern int INTEL_DEBUG; #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 #define DEBUG_GLSL_FORCE 0x4000000 +#define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ @@ -371,8 +374,7 @@ extern GLboolean intelInitContext(struct intel_context *intel, struct dd_function_table *functions); extern void intelFinish(GLcontext * ctx); -extern void intelFlush(GLcontext * ctx); -extern void intel_flush(GLcontext * ctx, GLboolean needs_mi_flush); +extern void intel_flush(GLcontext * ctx); extern void intelInitDriverFunctions(struct dd_function_table *functions); @@ -447,9 +449,6 @@ extern int intel_translate_stencil_op(GLenum op); extern int intel_translate_blend_factor(GLenum factor); extern int intel_translate_logic_op(GLenum opcode); -void intel_viewport(GLcontext * ctx, GLint x, GLint y, - GLsizei width, GLsizei height); - void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); void intel_prepare_render(struct intel_context *intel); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 8278d12bb9..217be7ef6c 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -42,7 +42,9 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" - +#ifndef I915 +#include "brw_state.h" +#endif #define FILE_DEBUG_FLAG DEBUG_FBO @@ -166,7 +168,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); cpp = _mesa_get_format_bytes(rb->Format); - intelFlush(ctx); + intel_flush(ctx); /* free old region */ if (irb->region) { @@ -280,7 +282,8 @@ intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, void -intel_renderbuffer_set_region(struct intel_renderbuffer *rb, +intel_renderbuffer_set_region(struct intel_context *intel, + struct intel_renderbuffer *rb, struct intel_region *region) { struct intel_region *old; @@ -288,6 +291,12 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb, old = rb->region; rb->region = NULL; intel_region_reference(&rb->region, region); +#ifndef I915 + if (old) { + brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache, + old->buffer); + } +#endif intel_region_release(&old); } @@ -411,7 +420,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx, { DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); - intelFlush(ctx); + intel_flush(ctx); _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); intel_draw_buffer(ctx, fb); diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h index 72413f7369..028f657d12 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.h +++ b/src/mesa/drivers/dri/intel/intel_fbo.h @@ -82,7 +82,8 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex) extern void -intel_renderbuffer_set_region(struct intel_renderbuffer *irb, +intel_renderbuffer_set_region(struct intel_context *intel, + struct intel_renderbuffer *irb, struct intel_region *region); diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index ef1966ea7e..71ef7a8e39 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -116,17 +116,16 @@ intel_miptree_create(struct intel_context *intel, GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; - uint32_t tiling; + uint32_t tiling = I915_TILING_NONE; if (intel->use_texture_tiling && compress_byte == 0) { if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || base_format == GL_DEPTH_STENCIL_EXT)) tiling = I915_TILING_Y; - else + else if (width0 >= 64) tiling = I915_TILING_X; - } else - tiling = I915_TILING_NONE; + } mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, width0, diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index 56faf076c7..2008a4c2be 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -142,7 +142,7 @@ do_blit_copypixels(GLcontext * ctx, if (!src || !dst) return GL_FALSE; - intelFlush(&intel->ctx); + intel_flush(&intel->ctx); /* Clip to destination buffer. */ orig_dstx = dstx; diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index bd1dd13fb7..a40b232fff 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -167,7 +167,7 @@ intel_stencil_drawpixels(GLcontext * ctx, irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888); irb->Base.Width = depth_irb->Base.Width; irb->Base.Height = depth_irb->Base.Height; - intel_renderbuffer_set_region(irb, depth_irb->region); + intel_renderbuffer_set_region(intel, irb, depth_irb->region); /* Create a name for our renderbuffer, which lets us use other mesa * rb functions for convenience. diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index 2ac3da7f42..21d2a7a93e 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -170,11 +170,19 @@ intelReadPixels(GLcontext * ctx, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { + struct intel_context *intel = intel_context(ctx); + GLboolean dirty; + if (INTEL_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); - intelFlush(ctx); - intel_prepare_render(intel_context(ctx)); + intel_flush(ctx); + + /* glReadPixels() wont dirty the front buffer, so reset the dirty + * flag after calling intel_prepare_render(). */ + dirty = intel->front_buffer_dirty; + intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; if (do_blit_readpixels (ctx, x, y, width, height, format, type, pack, pixels)) @@ -193,4 +201,7 @@ intelReadPixels(GLcontext * ctx, _mesa_update_state(ctx); _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); + + /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ + intel->front_buffer_dirty = dirty; } diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 1172de90b1..8cdeaf608c 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -111,7 +111,7 @@ debug_backtrace(void) GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { - intelFlush(&intel->ctx); + intel_flush(&intel->ctx); _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 3aed253e24..15a465c640 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -110,23 +110,16 @@ intelDRI2Flush(__DRIdrawable *drawable) if (intel->gen < 4) INTEL_FIREVERTICES(intel); + intel->need_throttle = GL_TRUE; + if (intel->batch->map != intel->batch->ptr) intel_batchbuffer_flush(intel->batch); } -static void -intelDRI2Invalidate(__DRIdrawable *drawable) -{ - struct intel_context *intel = drawable->driContextPriv->driverPrivate; - - intel->using_dri2_swapbuffers = GL_TRUE; - dri2InvalidateDrawable(drawable); -} - static const struct __DRI2flushExtensionRec intelFlushExtension = { { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, intelDRI2Flush, - intelDRI2Invalidate, + dri2InvalidateDrawable, }; static __DRIimage * diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index c1e15d1b0f..059f76f289 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -244,7 +244,7 @@ intelSpanRenderStart(GLcontext * ctx) struct intel_context *intel = intel_context(ctx); GLuint i; - intelFlush(&intel->ctx); + intel_flush(&intel->ctx); intel_prepare_render(intel); for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c index d67f0cb4a6..c2d86432ff 100644 --- a/src/mesa/drivers/dri/intel/intel_syncobj.c +++ b/src/mesa/drivers/dri/intel/intel_syncobj.c @@ -77,7 +77,7 @@ intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s, sync->bo = intel->batch->buf; drm_intel_bo_reference(sync->bo); - intelFlush(ctx); + intel_flush(ctx); } /* We ignore the user-supplied timeout. This is weaselly -- we're allowed to diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 62e1e78f59..549a4acc7d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -108,7 +108,7 @@ do_copy_texsubimage(struct intel_context *intel, return GL_FALSE; } - /* intelFlush(ctx); */ + /* intel_flush(ctx); */ intel_prepare_render(intel); { drm_intel_bo *dst_bo = intel_region_buffer(intel, diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 7be5231eae..610a169beb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -1,7 +1,7 @@ #include "intel_context.h" #include "intel_tex.h" #include "main/enums.h" - +#include "main/formats.h" /** * Choose hardware texture format given the user's glTexImage parameters. @@ -208,22 +208,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, int intel_compressed_num_bytes(GLuint mesaFormat) { - int bytes = 0; - switch(mesaFormat) { - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - bytes = 4; - default: - break; - } - - return bytes; + GLuint bw, bh; + GLuint block_size; + + block_size = _mesa_get_format_bytes(mesaFormat); + _mesa_get_format_block_size(mesaFormat, &bw, &bh); + + return block_size / bh; } diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 9db96acdc0..06bf262704 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -239,8 +239,8 @@ try_pbo_upload(struct intel_context *intel, dst_stride = intelImage->mt->region->pitch; if (drm_intel_bo_references(intel->batch->buf, dst_buffer)) - intelFlush(&intel->ctx); - intel_prepare_render(intel); + intel_flush(&intel->ctx); + { dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); @@ -473,14 +473,12 @@ intelTexImage(GLcontext * ctx, pixels, unpack, "glTexImage"); } - intel_prepare_render(intel); - if (intelImage->mt) { if (pixels != NULL) { /* Flush any queued rendering with the texture before mapping. */ if (drm_intel_bo_references(intel->batch->buf, intelImage->mt->region->buffer)) { - intelFlush(ctx); + intel_flush(ctx); } texImage->Data = intel_miptree_image_map(intel, intelImage->mt, @@ -638,7 +636,7 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, * make sure rendering is complete. * We could probably predicate this on texObj->_RenderToTexture */ - intelFlush(ctx); + intel_flush(ctx); /* Map */ if (intelImage->mt) { @@ -728,7 +726,8 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, if (!intelObj) return; - if (dPriv->lastStamp != dPriv->dri2.stamp) + if (dPriv->lastStamp != dPriv->dri2.stamp || + !pDRICtx->driScreenPriv->dri2.useInvalidate) intel_update_renderbuffers(pDRICtx, dPriv); rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index c35d2e8757..4f5c26acf2 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -59,7 +59,7 @@ intelTexSubimage(GLcontext * ctx, _mesa_lookup_enum_by_nr(target), level, xoffset, yoffset, width, height); - intelFlush(ctx); + intel_flush(ctx); if (compressed) pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index e432afc3d4..34d22b4559 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_optimize.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index cfa48a59e3..5d5de2f1b2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = { {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} }; static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); @@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 25bf373b6f..3e88ccbc46 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + rc_optimize(&c->Base); + + debug_program_log(c, "after dataflow optimize"); + rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c index 853b2becd1..0eab18c344 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -146,7 +146,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da unsigned comp; for(comp = 0; comp < c->Constants[index].Size; ++comp) { if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); return index; } } @@ -159,7 +159,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da if (free_index >= 0) { unsigned comp = c->Constants[free_index].Size++; c->Constants[free_index].u.Immediate[comp] = data; - *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp); return free_index; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 27274f0712..1979e7e4e4 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -132,7 +132,7 @@ struct r300_fragment_program_external_state { * 2 - GL_ALPHA * depending on the depth texture mode. */ - unsigned depth_texture_mode : 2; + unsigned depth_texture_swizzle:12; /** * If the sampler is used as a shadow sampler, @@ -144,6 +144,12 @@ struct r300_fragment_program_external_state { unsigned texture_compare_func : 3; /** + * No matter what the sampler type is, + * this field turns it into a shadow sampler. + */ + unsigned compare_mode_enabled : 1; + + /** * If the sampler needs to fake NPOT, this field is set. */ unsigned fake_npot : 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 16e2f3a218..0e6c62541f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,7 +30,7 @@ #include "radeon_program.h" -static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); @@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, refmask &= RC_MASK_XYZW; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(refmask, chan)) { - cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); - } - } + if (refmask) + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); if (refmask && inst->SrcReg[src].RelAddr) cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; @@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v } for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(refmasks[src], chan)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); - } - } + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); - if (inst->Alpha.Src[src].Used) { - if (GET_BIT(refmasks[src], 3)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); - } + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); } } /** - * Calls a callback function for all sourced register channels. + * Calls a callback function for all register reads. * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. */ -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { reads_normal(inst, cb, userdata); @@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(inst->DstReg.WriteMask, chan)) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); - } - } + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(inst->RGB.WriteMask, chan)) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); - } + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } /** - * Calls a callback function for all written register channels. + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { writes_normal(inst, cb, userdata); @@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * } +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + static void remap_normal_instruction(struct rc_instruction * fullinst, rc_remap_register_fn cb, void * userdata) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 62cda20eea..60a6e192a9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -39,10 +39,15 @@ struct rc_swizzle_caps; * Help analyze and modify the register accesses of instructions. */ /*@{*/ -typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, rc_register_file * pfile, unsigned int * pindex); @@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f void rc_dataflow_swizzles(struct radeon_compiler * c); /*@}*/ +void rc_optimize(struct radeon_compiler * c); + #endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index d889612f4f..863654cf68 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s, sap.Proxies = proxies; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - rc_for_all_writes(inst, scan_write, &sap); + rc_for_all_writes_mask(inst, scan_write, &sap); rc_remap_registers(inst, remap_proxy_function, &sap); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 0000000000..21d7210888 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(outer.Swizzle, chan); + if (swz < 4) + combine.Negate |= GET_BIT(inner.Negate, swz) << chan; + } + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +struct peephole_state { + struct radeon_compiler * C; + struct rc_instruction * Mov; + unsigned int Conflict:1; + + /** Whether Mov's source has been clobbered */ + unsigned int SourceClobbered:1; + + /** Which components of Mov's destination register are still from that Mov? */ + unsigned int MovMask:4; + + /** Which components of Mov's destination register are clearly *not* from that Mov */ + unsigned int DefinedMask:4; + + /** Which components of Mov's source register are sourced */ + unsigned int SourcedMask:4; + + /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ + int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + return; + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + s->Conflict = 1; + return; + } + if ((mask & s->MovMask) == mask) { + if (s->SourceClobbered) { + s->Conflict = 1; + } + } else if ((mask & s->DefinedMask) == mask) { + /* read from something entirely written by other instruction: this is okay */ + } else { + /* read from component combination that is not well-defined without + * the MOV: cannot remove it */ + s->Conflict = 1; + } +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (s->BranchDepth < 0) + return; + + if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { + s->MovMask &= ~mask; + if (s->BranchDepth == 0) + s->DefinedMask |= mask; + else + s->DefinedMask &= ~mask; + } + if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (mask & s->SourcedMask) + s->SourceClobbered = 1; + } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { + s->SourceClobbered = 1; + } +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct peephole_state s; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.Mov = inst_mov; + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); + s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; + } + + /* 1st pass: Check whether all subsequent readers can be changed */ + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, peephole_scan_read, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + if (s.Conflict) + return; + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) { + s.DefinedMask &= ~s.MovMask; + s.MovMask = 0; + } + } + } + } + + if (s.Conflict) + return; + + /* 2nd pass: We can satisfy all readers, so switch them over all at once */ + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.BranchDepth = 0; + + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { + unsigned int refmask = 0; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + refmask |= (1 << swz) & RC_MASK_XYZW; + } + + if ((refmask & s.MovMask) == refmask) + inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { + s.MovMask &= ~inst->U.I.DstReg.WriteMask; + } + } + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) + break; /* no more readers after this point */ + } + } + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + struct rc_constant * constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + struct rc_src_register newsrc = inst->U.I.SrcReg[src]; + int have_real_reference = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + if (swz >= 4) + continue; + + unsigned int newswz; + float imm = constant->u.Immediate[swz]; + float baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); +} + +void rc_optimize(struct radeon_compiler * c) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + peephole(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index fdfee86701..8a912da461 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s, } static void scan_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) + rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; @@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - rc_for_all_reads(inst, scan_callback, s); - rc_for_all_writes(inst, scan_callback, s); + rc_for_all_reads_mask(inst, scan_callback, s); + rc_for_all_writes_mask(inst, scan_callback, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index df67aafe02..a279549ff8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes(inst, &scan_write, &s); - rc_for_all_reads(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 05b874ba7c..5ba2c29408 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -556,6 +556,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + unsigned constant_swizzle; + int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, + 0.0000000000000000001, + &constant_swizzle); + + /* MOV dst, src */ + emit1(c, inst->Prev, RC_OPCODE_MOV, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[0]); + + /* MAX dst.z, src, 0.00...001 */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(tempreg, RC_MASK_Y), + srcreg(RC_FILE_TEMPORARY, tempreg), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + + inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -572,6 +595,7 @@ int r300_transform_vertex_alu( case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; @@ -789,7 +813,7 @@ int radeonTransformDeriv(struct radeon_compiler* c, if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) return 0; - inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 842012def0..2ddf60b677 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -115,6 +115,7 @@ typedef enum { #define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) #define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z) #define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) #define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) #define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index b4ba0b3f87..8336e58d55 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -97,7 +97,8 @@ int radeonTransformTEX( /* ARB_shadow & EXT_shadow_funcs */ if (inst->U.I.Opcode != RC_OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) || + (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { @@ -113,52 +114,75 @@ int radeonTransformTEX( return 1; } else { rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; - struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); - struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); - struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); - int pass, fail; + struct rc_instruction * inst_rcp = NULL; + struct rc_instruction * inst_mad; + struct rc_instruction * inst_cmp; + unsigned tmp_texsample = rc_find_free_temporary(c); + unsigned tmp_sum = rc_find_free_temporary(c); + unsigned tmp_recip_w = 0; + int pass, fail, tex; - inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + /* Save the output register. */ + struct rc_dst_register output_reg = inst->U.I.DstReg; - inst_cmp->U.I.DstReg = inst->U.I.DstReg; + /* Redirect TEX to a new temp. */ inst->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst_mad->U.I.Opcode = RC_OPCODE_MAD; + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + tmp_recip_w = rc_find_free_temporary(c); + + /* Compute 1/W. */ + inst_rcp = rc_insert_new_instruction(c, inst); + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tmp_recip_w; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + } + + /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */ + inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.DstReg.Index = tmp_sum; inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; - if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); - else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; + if (inst->U.I.Opcode == RC_OPCODE_TXP) { + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + tex = 2; + } else { + inst_mad->U.I.Opcode = RC_OPCODE_ADD; + tex = 1; + } + inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; + inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; - /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: - * r < tex <=> -tex+r < 0 - * r >= tex <=> not (-tex+r < 0 */ + /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ + if (comparefunc == RC_COMPARE_FUNC_EQUAL) { + comparefunc = RC_COMPARE_FUNC_GEQUAL; + } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { + comparefunc = RC_COMPARE_FUNC_LESS; + } + + /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: + * LESS: r < tex <=> -tex+r < 0 + * GEQUAL: r >= tex <=> not (-tex+r < 0) + * GREATER: r > tex <=> tex-r < 0 + * LEQUAL: r <= tex <=> not ( tex-r < 0) + * + * This negates either r or tex: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; + inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; else inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->U.I.Opcode = RC_OPCODE_CMP; - /* DstReg has been filled out above */ - inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; - inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - + /* This negates the whole expresion: */ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; @@ -167,6 +191,11 @@ int radeonTransformTEX( fail = 1; } + inst_cmp = rc_insert_new_instruction(c, inst_mad); + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; + inst_cmp->U.I.DstReg = output_reg; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = tmp_sum; inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 2b7c93a957..e678a42ca2 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -46,13 +46,13 @@ #include "radeon_mesa_to_rc.h" -static GLuint build_dtm(GLuint depthmode) +static GLuint build_dts(GLuint depthmode) { switch(depthmode) { default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; + case GL_LUMINANCE: return RC_SWIZZLE_XYZZ; + case GL_INTENSITY: return RC_SWIZZLE_XYZW; + case GL_ALPHA: return RC_SWIZZLE_WWWW; } } @@ -78,7 +78,7 @@ static void build_state( if (fp->Base.ShadowSamplers & (1 << unit)) { struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].depth_texture_swizzle = build_dts(tex->DepthMode); state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); } } diff --git a/src/mesa/drivers/fbdev/Makefile b/src/mesa/drivers/fbdev/Makefile index ee73f29a46..5120e1ac9e 100644 --- a/src/mesa/drivers/fbdev/Makefile +++ b/src/mesa/drivers/fbdev/Makefile @@ -11,10 +11,11 @@ OBJECTS = $(SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ + -I$(TOP)/src/mapi \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main -CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a +CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a .c.o: diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile index 7dcf9a6541..6da9f93f59 100644 --- a/src/mesa/drivers/glslcompiler/Makefile +++ b/src/mesa/drivers/glslcompiler/Makefile @@ -11,11 +11,12 @@ OBJECTS = \ glslcompiler.o \ ../common/driverfuncs.o \ ../../libmesa.a \ - ../../libglapi.a + $(TOP)/src/mapi/glapi/libglapi.a INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/mapi \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main \ -I$(TOP)/src/mesa/glapi \ diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index 9010bbd130..ea49a89659 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -16,6 +16,7 @@ OBJECTS = $(SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ + -I$(TOP)/src/mapi \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main @@ -23,7 +24,7 @@ INCLUDE_DIRS = \ ifeq ($(DRIVER_DIRS), osmesa) CORE_MESA = \ $(TOP)/src/mesa/libmesa.a \ - $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mapi/glapi/libglapi.a \ $(TOP)/src/glsl/cl/libglslcl.a \ $(TOP)/src/glsl/pp/libglslpp.a else diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c index e20507ae92..ead4050397 100644 --- a/src/mesa/drivers/osmesa/osmesa.c +++ b/src/mesa/drivers/osmesa/osmesa.c @@ -1004,7 +1004,20 @@ new_osmesa_renderbuffer(GLcontext *ctx, GLenum format, GLenum type) rb->AllocStorage = osmesa_renderbuffer_storage; rb->InternalFormat = GL_RGBA; - rb->Format = MESA_FORMAT_RGBA8888; + switch (type) { + case GL_UNSIGNED_BYTE: + rb->Format = MESA_FORMAT_RGBA8888; + break; + case GL_UNSIGNED_SHORT: + rb->Format = MESA_FORMAT_RGBA_16; + break; + case GL_FLOAT: + rb->Format = MESA_FORMAT_RGBA_FLOAT32; + break; + default: + assert(0 && "Unexpected type in new_osmesa_renderbuffer()"); + rb->Format = MESA_FORMAT_RGBA8888; + } rb->_BaseFormat = GL_RGBA; rb->DataType = type; } @@ -1048,7 +1061,6 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits, struct dd_function_table functions; GLint rind, gind, bind, aind; GLint redBits = 0, greenBits = 0, blueBits = 0, alphaBits =0; - GLenum type = CHAN_TYPE; rind = gind = bind = aind = 0; if (format==OSMESA_RGBA) { @@ -1167,11 +1179,9 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits, return NULL; } - /* create front color buffer in user-provided memory (no back buffer) */ - osmesa->rb = new_osmesa_renderbuffer(&osmesa->mesa, format, type); - _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb); - assert(osmesa->rb->RefCount == 2); - + /* Create depth/stencil/accum buffers. We'll create the color + * buffer later in OSMesaMakeCurrent(). + */ _mesa_add_soft_renderbuffers(osmesa->gl_buffer, GL_FALSE, /* color */ osmesa->gl_visual->haveDepthBuffer, @@ -1308,11 +1318,23 @@ OSMesaMakeCurrent( OSMesaContext osmesa, void *buffer, GLenum type, */ _glapi_check_multithread(); + + /* Create a front/left color buffer which wraps the user-provided buffer. + * There is no back color buffer. + * If the user tries to use a 8, 16 or 32-bit/channel buffer that + * doesn't match what Mesa was compiled for (CHAN_BITS) the + * _mesa_add_renderbuffer() function will create a "wrapper" renderbuffer + * that converts rendering from CHAN_BITS to the user-requested channel + * size. + */ + osmesa->rb = new_osmesa_renderbuffer(&osmesa->mesa, osmesa->format, type); + _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb); + assert(osmesa->rb->RefCount == 2); + /* Set renderbuffer fields. Set width/height = 0 to force * osmesa_renderbuffer_storage() being called by _mesa_resize_framebuffer() */ osmesa->rb->Data = buffer; - osmesa->rb->DataType = type; osmesa->rb->Width = osmesa->rb->Height = 0; /* Set the framebuffer's size. This causes the diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index 5e427d2d5c..b5b0c1f11a 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -40,11 +40,12 @@ OBJECTS = $(SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ + -I$(TOP)/src/mapi \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main \ $(X11_INCLUDES) -CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a +CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index a1723fa37b..dac1668cfe 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -210,7 +210,7 @@ gamma_adjust( GLfloat gamma, GLint value, GLint max ) } else { double x = (double) value / (double) max; - return IROUND_POS((GLfloat) max * _mesa_pow(x, 1.0F/gamma)); + return IROUND_POS((GLfloat) max * pow(x, 1.0F/gamma)); } } @@ -846,19 +846,19 @@ setup_8bit_hpcr(XMesaVisual v) g = 1.0 / v->RedGamma; for (i=0; i<256; i++) { - GLint red = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[0][i]/255.0, g )); + GLint red = IROUND_POS(255.0 * pow( hpcr_rgbTbl[0][i]/255.0, g )); v->hpcr_rgbTbl[0][i] = CLAMP( red, 16, 239 ); } g = 1.0 / v->GreenGamma; for (i=0; i<256; i++) { - GLint green = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[1][i]/255.0, g )); + GLint green = IROUND_POS(255.0 * pow( hpcr_rgbTbl[1][i]/255.0, g )); v->hpcr_rgbTbl[1][i] = CLAMP( green, 16, 239 ); } g = 1.0 / v->BlueGamma; for (i=0; i<256; i++) { - GLint blue = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[2][i]/255.0, g )); + GLint blue = IROUND_POS(255.0 * pow( hpcr_rgbTbl[2][i]/255.0, g )); v->hpcr_rgbTbl[2][i] = CLAMP( blue, 32, 223 ); } v->undithered_pf = PF_HPCR; /* can't really disable dithering for now */ |