diff options
Diffstat (limited to 'src/mesa')
42 files changed, 760 insertions, 197 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 029a16500b..49ef859e45 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -42,7 +42,6 @@ #include "brw_state.h" #include "brw_clip.h" - #define FRONT_UNFILLED_BIT 0x1 #define BACK_UNFILLED_BIT 0x2 @@ -127,6 +126,14 @@ static void compile_clip_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_CLIP) { + printf("clip:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index b27fe654ca..916a99ea00 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -177,7 +177,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) { struct brw_compile *p = &c->func; - struct brw_instruction *is_poly; + struct brw_instruction *is_poly, *is_trifan; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); @@ -195,8 +195,22 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) is_poly = brw_ELSE(p, is_poly); { if (c->key.pv_first) { - brw_clip_copy_colors(c, 1, 0); - brw_clip_copy_colors(c, 2, 0); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRIFAN)); + is_trifan = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 0, 1); + brw_clip_copy_colors(c, 2, 1); + } + is_trifan = brw_ELSE(p, is_trifan); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + brw_ENDIF(p, is_trifan); } else { brw_clip_copy_colors(c, 0, 2); diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 34a966a47a..a730664f9b 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -370,18 +370,13 @@ void brw_clip_ff_sync(struct brw_clip_compile *c) need_ff_sync = brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } brw_ENDIF(p, need_ff_sync); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6b04ad9ec6..dc4bd5802d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -192,8 +192,6 @@ GLboolean brwCreateContext( int api, ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1f09651126..a97fcb0f4d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -418,18 +418,12 @@ struct brw_vertex_info { struct brw_query_object { struct gl_query_object Base; - /** Doubly linked list of active query objects in the context. */ - struct brw_query_object *prev, *next; - /** Last query BO associated with this query. */ dri_bo *bo; /** First index in bo with query data for this object. */ int first_index; /** Last index in bo with query data for this object. */ int last_index; - - /* Total count of pixels from previous BOs */ - unsigned int count; }; @@ -664,7 +658,7 @@ struct brw_context } cc; struct { - struct brw_query_object active_head; + struct brw_query_object *obj; dri_bo *bo; int index; GLboolean active; @@ -726,7 +720,7 @@ void brw_upload_urb_fence(struct brw_context *brw); void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst); +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index f26a13fc3c..2d3556b805 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -783,7 +783,7 @@ #define CMD_BINDING_TABLE_PTRS 0x7801 # define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) # define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) -# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10) +# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) #define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index db3fc50a63..ff12daf497 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -323,6 +323,11 @@ char *math_precision[2] = { [1] = "partial_precision" }; +char *urb_opcode[2] = { + [0] = "urb_write", + [1] = "ff_sync", +}; + char *urb_swizzle[4] = { [BRW_URB_SWIZZLE_NONE] = "", [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", @@ -774,7 +779,7 @@ static int src1 (FILE *file, struct brw_instruction *inst) } } -int brw_disasm (FILE *file, struct brw_instruction *inst) +int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) { int err = 0; int space = 0; @@ -829,12 +834,20 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) } if (inst->header.opcode == BRW_OPCODE_SEND) { + int target; + + if (gen >= 5) + target = inst->bits2.send_gen5.sfid; + else + target = inst->bits3.generic.msg_target; + newline (file); pad (file, 16); space = 0; err |= control (file, "target function", target_function, - inst->bits3.generic.msg_target, &space); - switch (inst->bits3.generic.msg_target) { + target, &space); + + switch (target) { case BRW_MESSAGE_TARGET_MATH: err |= control (file, "math function", math_function, inst->bits3.math.function, &space); @@ -864,8 +877,17 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) inst->bits3.dp_write.send_commit_msg); break; case BRW_MESSAGE_TARGET_URB: - format (file, " %d", inst->bits3.urb.offset); + if (gen >= 5) { + format (file, " %d", inst->bits3.urb_gen5.offset); + } else { + format (file, " %d", inst->bits3.urb.offset); + } + space = 1; + if (gen >= 5) { + err |= control (file, "urb opcode", urb_opcode, + inst->bits3.urb_gen5.opcode, &space); + } err |= control (file, "urb swizzle", urb_swizzle, inst->bits3.urb.swizzle_control, &space); err |= control (file, "urb allocate", urb_allocate, diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 8247faa36d..9cbff24863 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -59,7 +59,7 @@ static GLuint half_float_types[5] = { 0, BRW_SURFACEFORMAT_R16_FLOAT, BRW_SURFACEFORMAT_R16G16_FLOAT, - 0, /* can't seem to render this one */ + BRW_SURFACEFORMAT_R16G16B16A16_FLOAT, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT }; diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 4f55158e8f..3a32ad26c1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -822,13 +822,8 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle); + GLboolean eot); void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 785d382a00..175899b026 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -280,28 +280,23 @@ static void brw_set_math_message( struct brw_context *brw, } -static void brw_set_ff_sync_message( struct brw_context *brw, - struct brw_instruction *insn, - GLboolean allocate, - GLboolean used, - GLuint msg_length, - GLuint response_length, - GLboolean end_of_thread, - GLboolean complete, - GLuint offset, - GLuint swizzle_control ) +static void brw_set_ff_sync_message(struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLuint response_length, + GLboolean end_of_thread) { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.urb_gen5.opcode = 1; - insn->bits3.urb_gen5.offset = offset; - insn->bits3.urb_gen5.swizzle_control = swizzle_control; + insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ + insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.allocate = allocate; - insn->bits3.urb_gen5.used = used; - insn->bits3.urb_gen5.complete = complete; + insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ + insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ insn->bits3.urb_gen5.header_present = 1; - insn->bits3.urb_gen5.response_length = response_length; - insn->bits3.urb_gen5.msg_length = msg_length; + insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */ + insn->bits3.urb_gen5.msg_length = 1; insn->bits3.urb_gen5.end_of_thread = end_of_thread; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB; insn->bits2.send_gen5.end_of_thread = end_of_thread; @@ -1451,18 +1446,11 @@ void brw_ff_sync(struct brw_compile *p, GLuint msg_reg_nr, struct brw_reg src0, GLboolean allocate, - GLboolean used, - GLuint msg_length, GLuint response_length, - GLboolean eot, - GLboolean writes_complete, - GLuint offset, - GLuint swizzle) + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); - brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1470,13 +1458,8 @@ void brw_ff_sync(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, - insn, - allocate, - used, - msg_length, - response_length, - eot, - writes_complete, - offset, - swizzle); + insn, + allocate, + response_length, + eot); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 4b13494ecf..94d93f3aa6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -122,6 +122,16 @@ static void compile_gs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_GS) { + int i; + + printf("gs:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->gs.prog_bo); @@ -163,6 +173,12 @@ static void populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); + if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) { + /* Provide consistent primitive order with brw_set_prim's + * optimization of single quads to trifans. + */ + key->pv_first = GL_TRUE; + } key->need_gs_prog = (key->hint_gs_always || brw->primitive == GL_QUADS || diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index dd7b057d62..99a6f6be11 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -104,18 +104,13 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, - 1, /* used */ - 1, /* msg length */ - 1, /* response length */ - 0, /* eot */ - 1, /* write compelete */ - 0, /* urb offset */ - BRW_URB_SWIZZLE_NONE); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 6cce7e5089..3f47a68049 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -38,7 +38,6 @@ * required for handling queries, so that we can be sure that we won't * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. */ -#include "main/simple_list.h" #include "main/imports.h" #include "brw_context.h" @@ -105,7 +104,7 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q) query->first_index = -1; query->last_index = -1; - insert_at_head(&brw->query.active_head, query); + brw->query.obj = query; intel->stats_wm++; } @@ -131,7 +130,7 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q) brw->query.bo = NULL; } - remove_from_list(query); + brw->query.obj = NULL; intel->stats_wm--; } @@ -161,7 +160,7 @@ brw_prepare_query_begin(struct brw_context *brw) struct intel_context *intel = &brw->intel; /* Skip if we're not doing any queries. */ - if (is_empty_list(&brw->query.active_head)) + if (!brw->query.obj) return; /* Get a new query BO if we're going to need it. */ @@ -182,10 +181,10 @@ void brw_emit_query_begin(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_query_object *query; + struct brw_query_object *query = brw->query.obj; /* Skip if we're not doing any queries, or we've emitted the start. */ - if (brw->query.active || is_empty_list(&brw->query.active_head)) + if (!query || brw->query.active) return; BEGIN_BATCH(4); @@ -205,16 +204,14 @@ brw_emit_query_begin(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); - foreach(query, &brw->query.active_head) { - if (query->bo != brw->query.bo) { - if (query->bo != NULL) - brw_queryobj_get_results(query); - dri_bo_reference(brw->query.bo); - query->bo = brw->query.bo; - query->first_index = brw->query.index; - } - query->last_index = brw->query.index; + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + dri_bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; } + query->last_index = brw->query.index; brw->query.active = GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 57d1c29ade..b0dd1ff3af 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,6 +46,7 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { + struct intel_context *intel = &brw->intel; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -107,6 +108,14 @@ static void compile_sf_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); + if (INTEL_DEBUG & DEBUG_SF) { + printf("sf:\n"); + for (i = 0; i < program_size / sizeof(struct brw_instruction); i++) + brw_disasm(stdout, &((struct brw_instruction *)program)[i], + intel->gen); + printf("\n"); + } + /* Upload */ dri_bo_unreference(brw->sf.prog_bo); @@ -154,6 +163,7 @@ static void upload_sf_prog(struct brw_context *brw) break; } + /* _NEW_POINT */ key.do_point_sprite = ctx->Point.PointSprite; if (key.do_point_sprite) { int i; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 44b085e214..57ffb2d89e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -48,6 +48,7 @@ static void do_vs_prog( struct brw_context *brw, const GLuint *program; struct brw_vs_compile c; int aux_size; + int i; memset(&c, 0, sizeof(c)); memcpy(&c.key, key, sizeof(*key)); @@ -63,6 +64,17 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (i = 0; i < 8; i++) { + if (c.key.point_coord_replace & (1 << i)) + c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i); + } + if (0) _mesa_print_program(&c.vp->program.Base); @@ -106,6 +118,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + int i; memset(&key, 0, sizeof(key)); @@ -117,6 +130,14 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + /* _NEW_POINT */ + if (ctx->Point.PointSprite) { + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_coord_replace |= (1 << i); + } + } + /* Make an early check for the key. */ dri_bo_unreference(brw->vs.prog_bo); @@ -135,7 +156,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 95e0501b1e..6493744f3e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -43,7 +43,7 @@ struct brw_vs_prog_key { GLuint program_string_id; GLuint nr_userclip:4; GLuint copy_edgeflag:1; - GLuint pad:26; + GLuint point_coord_replace:8; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index dc6ab81c4a..0b44deeb63 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1882,7 +1882,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 375e795391..323cfac8fa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1717,7 +1717,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], p->brw->intel.gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 88b885cb94..fe3c89b721 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -2111,7 +2111,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (INTEL_DEBUG & DEBUG_WM) { printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); + brw_disasm(stderr, &p->store[i], intel->gen); printf("\n"); } } diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 7d9f302dca..a590c799ad 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -353,6 +353,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) OUT_BATCH(clear_val); ADVANCE_BATCH(); + if (intel->always_flush_cache) + intel_batchbuffer_emit_mi_flush(intel->batch); + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); else diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 0369942b39..a94f6886f9 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -471,12 +471,12 @@ static const struct dri_debug_control debug_control[] = { { "buf", DEBUG_BUFMGR}, { "reg", DEBUG_REGION}, { "fbo", DEBUG_FBO}, - { "lock", DEBUG_LOCK}, + { "gs", DEBUG_GS}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, { "dri", DEBUG_DRI }, - { "dma", DEBUG_DMA }, + { "sf", DEBUG_SF }, { "san", DEBUG_SANITY }, { "sleep", DEBUG_SLEEP }, { "stats", DEBUG_STATS }, @@ -487,6 +487,7 @@ static const struct dri_debug_control debug_control[] = { { "glsl_force", DEBUG_GLSL_FORCE }, { "urb", DEBUG_URB }, { "vs", DEBUG_VS }, + { "clip", DEBUG_CLIP }, { NULL, 0 } }; @@ -905,6 +906,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv, driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); _mesa_make_current(&intel->ctx, fb, readFb); + + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer + * is NULL at that point. We can't call _mesa_makecurrent() + * first, since we need the buffer size for the initial + * viewport. So just call intel_draw_buffer() again here. */ + intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); } else { _mesa_make_current(NULL, NULL, NULL); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index db244e5872..dae5896a5c 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -326,12 +326,12 @@ extern int INTEL_DEBUG; #define DEBUG_BUFMGR 0x200 #define DEBUG_REGION 0x400 #define DEBUG_FBO 0x800 -#define DEBUG_LOCK 0x1000 +#define DEBUG_GS 0x1000 #define DEBUG_SYNC 0x2000 #define DEBUG_PRIMS 0x4000 #define DEBUG_VERTS 0x8000 #define DEBUG_DRI 0x10000 -#define DEBUG_DMA 0x20000 +#define DEBUG_SF 0x20000 #define DEBUG_SANITY 0x40000 #define DEBUG_SLEEP 0x80000 #define DEBUG_STATS 0x100000 @@ -341,6 +341,7 @@ extern int INTEL_DEBUG; #define DEBUG_URB 0x1000000 #define DEBUG_VS 0x2000000 #define DEBUG_GLSL_FORCE 0x4000000 +#define DEBUG_CLIP 0x8000000 #define DBG(...) do { \ if (INTEL_DEBUG & FILE_DEBUG_FLAG) \ diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 7be5231eae..610a169beb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -1,7 +1,7 @@ #include "intel_context.h" #include "intel_tex.h" #include "main/enums.h" - +#include "main/formats.h" /** * Choose hardware texture format given the user's glTexImage parameters. @@ -208,22 +208,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, int intel_compressed_num_bytes(GLuint mesaFormat) { - int bytes = 0; - switch(mesaFormat) { - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - bytes = 4; - default: - break; - } - - return bytes; + GLuint bw, bh; + GLuint block_size; + + block_size = _mesa_get_format_bytes(mesaFormat); + _mesa_get_format_block_size(mesaFormat, &bw, &bh); + + return block_size / bh; } diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index e432afc3d4..34d22b4559 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_optimize.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index cfa48a59e3..5d5de2f1b2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = { {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1}, {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1}, {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0} + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0} }; static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]); @@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 25bf373b6f..3e88ccbc46 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); + rc_optimize(&c->Base); + + debug_program_log(c, "after dataflow optimize"); + rc_dataflow_swizzles(&c->Base); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 16e2f3a218..0e6c62541f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -30,7 +30,7 @@ #include "radeon_program.h" -static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); @@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, refmask &= RC_MASK_XYZW; - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(refmask, chan)) { - cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); - } - } + if (refmask) + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask); if (refmask && inst->SrcReg[src].RelAddr) cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); } } -static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; unsigned int refmasks[3] = { 0, 0, 0 }; @@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v } for(unsigned int src = 0; src < 3; ++src) { - if (inst->RGB.Src[src].Used) { - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(refmasks[src], chan)) - cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); - } - } + if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, + refmasks[src] & RC_MASK_XYZ); - if (inst->Alpha.Src[src].Used) { - if (GET_BIT(refmasks[src], 3)) - cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); - } + if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W); } } /** - * Calls a callback function for all sourced register channels. + * Calls a callback function for all register reads. * - * This is conservative, i.e. channels may be called multiple times, - * and the writemask of the instruction is not taken into account. + * This is conservative, i.e. if the same register is referenced multiple times, + * the callback may also be called multiple times. + * Also, the writemask of the instruction is not taken into account. */ -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { reads_normal(inst, cb, userdata); @@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * -static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_sub_instruction * inst = &fullinst->U.I; const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); - if (opcode->HasDstReg) { - for(unsigned int chan = 0; chan < 4; ++chan) { - if (GET_BIT(inst->DstReg.WriteMask, chan)) - cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); - } - } + if (opcode->HasDstReg && inst->DstReg.WriteMask) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } -static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata) { struct rc_pair_instruction * inst = &fullinst->U.P; - for(unsigned int chan = 0; chan < 3; ++chan) { - if (GET_BIT(inst->RGB.WriteMask, chan)) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); - } + if (inst->RGB.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask); if (inst->Alpha.WriteMask) - cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W); if (inst->WriteALUResult) - cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X); } /** - * Calls a callback function for all written register channels. + * Calls a callback function for all register writes in the instruction, + * reporting writemasks to the callback function. * * \warning Does not report output registers for paired instructions! */ -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata) { if (inst->Type == RC_INSTRUCTION_NORMAL) { writes_normal(inst, cb, userdata); @@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * } +struct mask_to_chan_data { + void * UserData; + rc_read_write_chan_fn Fn; +}; + +static void mask_to_chan_cb(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct mask_to_chan_data * d = data; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(mask, chan)) + d->Fn(d->UserData, inst, file, index, chan); + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata) +{ + struct mask_to_chan_data d; + d.UserData = userdata; + d.Fn = cb; + rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d); +} + static void remap_normal_instruction(struct rc_instruction * fullinst, rc_remap_register_fn cb, void * userdata) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 62cda20eea..60a6e192a9 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -39,10 +39,15 @@ struct rc_swizzle_caps; * Help analyze and modify the register accesses of instructions. */ /*@{*/ -typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, +typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int chan); -void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); -void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); +void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata); + +typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask); +void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); +void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata); typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst, rc_register_file * pfile, unsigned int * pindex); @@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f void rc_dataflow_swizzles(struct radeon_compiler * c); /*@}*/ +void rc_optimize(struct radeon_compiler * c); + #endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c index d889612f4f..863654cf68 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c @@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s, sap.Proxies = proxies; for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { - rc_for_all_writes(inst, scan_write, &sap); + rc_for_all_writes_mask(inst, scan_write, &sap); rc_remap_registers(inst, remap_proxy_function, &sap); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c new file mode 100644 index 0000000000..21d7210888 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -0,0 +1,446 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) +{ + struct rc_src_register combine; + combine.File = inner.File; + combine.Index = inner.Index; + combine.RelAddr = inner.RelAddr; + if (outer.Abs) { + combine.Abs = 1; + combine.Negate = outer.Negate; + } else { + combine.Abs = inner.Abs; + combine.Negate = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(outer.Swizzle, chan); + if (swz < 4) + combine.Negate |= GET_BIT(inner.Negate, swz) << chan; + } + combine.Negate ^= outer.Negate; + } + combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); + return combine; +} + +struct peephole_state { + struct radeon_compiler * C; + struct rc_instruction * Mov; + unsigned int Conflict:1; + + /** Whether Mov's source has been clobbered */ + unsigned int SourceClobbered:1; + + /** Which components of Mov's destination register are still from that Mov? */ + unsigned int MovMask:4; + + /** Which components of Mov's destination register are clearly *not* from that Mov */ + unsigned int DefinedMask:4; + + /** Which components of Mov's source register are sourced */ + unsigned int SourcedMask:4; + + /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ + int BranchDepth; +}; + +static void peephole_scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index) + return; + + /* These instructions cannot read from the constants file. + * see radeonTransformTEX() + */ + if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && + s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT && + (inst->U.I.Opcode == RC_OPCODE_TEX || + inst->U.I.Opcode == RC_OPCODE_TXB || + inst->U.I.Opcode == RC_OPCODE_TXP || + inst->U.I.Opcode == RC_OPCODE_KIL)){ + s->Conflict = 1; + return; + } + if ((mask & s->MovMask) == mask) { + if (s->SourceClobbered) { + s->Conflict = 1; + } + } else if ((mask & s->DefinedMask) == mask) { + /* read from something entirely written by other instruction: this is okay */ + } else { + /* read from component combination that is not well-defined without + * the MOV: cannot remove it */ + s->Conflict = 1; + } +} + +static void peephole_scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + struct peephole_state * s = data; + + if (s->BranchDepth < 0) + return; + + if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) { + s->MovMask &= ~mask; + if (s->BranchDepth == 0) + s->DefinedMask |= mask; + else + s->DefinedMask &= ~mask; + } + if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) { + if (mask & s->SourcedMask) + s->SourceClobbered = 1; + } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { + s->SourceClobbered = 1; + } +} + +static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov) +{ + struct peephole_state s; + + if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) + return; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.Mov = inst_mov; + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); + s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; + } + + /* 1st pass: Check whether all subsequent readers can be changed */ + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, peephole_scan_read, &s); + rc_for_all_writes_mask(inst, peephole_scan_write, &s); + if (s.Conflict) + return; + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) { + s.DefinedMask &= ~s.MovMask; + s.MovMask = 0; + } + } + } + } + + if (s.Conflict) + return; + + /* 2nd pass: We can satisfy all readers, so switch them over all at once */ + s.MovMask = inst_mov->U.I.DstReg.WriteMask; + s.BranchDepth = 0; + + for(struct rc_instruction * inst = inst_mov->Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY && + inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) { + unsigned int refmask = 0; + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + refmask |= (1 << swz) & RC_MASK_XYZW; + } + + if ((refmask & s.MovMask) == refmask) + inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]); + } + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) { + s.MovMask &= ~inst->U.I.DstReg.WriteMask; + } + } + + if (s.BranchDepth >= 0) { + if (inst->U.I.Opcode == RC_OPCODE_IF) { + s.BranchDepth++; + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + s.BranchDepth--; + if (s.BranchDepth < 0) + break; /* no more readers after this point */ + } + } + } + + /* Finally, remove the original MOV instruction */ + rc_remove_instruction(inst_mov); +} + +/** + * Check if a source register is actually always the same + * swizzle constant. + */ +static int is_src_uniform_constant(struct rc_src_register src, + rc_swizzle * pswz, unsigned int * pnegate) +{ + int have_used = 0; + + if (src.File != RC_FILE_NONE) { + *pswz = 0; + return 0; + } + + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(src.Swizzle, chan); + if (swz < 4) { + *pswz = 0; + return 0; + } + if (swz == RC_SWIZZLE_UNUSED) + continue; + + if (!have_used) { + *pswz = swz; + *pnegate = GET_BIT(src.Negate, chan); + have_used = 1; + } else { + if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { + *pswz = 0; + return 0; + } + } + } + + return 1; +} + + +static void constant_folding_mad(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MUL; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_ADD; + if (negate) + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + return; + } + } +} + +static void constant_folding_mul(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ONE) { + inst->U.I.Opcode = RC_OPCODE_MOV; + if (negate) + inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; + return; + } else if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; + return; + } + } +} + +static void constant_folding_add(struct rc_instruction * inst) +{ + rc_swizzle swz; + unsigned int negate; + + if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + return; + } + } + + if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { + if (swz == RC_SWIZZLE_ZERO) { + inst->U.I.Opcode = RC_OPCODE_MOV; + return; + } + } +} + + +/** + * Replace 0.0, 1.0 and 0.5 immediate constants by their + * respective swizzles. Simplify instructions like ADD dst, src, 0; + */ +static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || + inst->U.I.SrcReg[src].RelAddr || + inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) + continue; + + struct rc_constant * constant = + &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; + + if (constant->Type != RC_CONSTANT_IMMEDIATE) + continue; + + struct rc_src_register newsrc = inst->U.I.SrcReg[src]; + int have_real_reference = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); + if (swz >= 4) + continue; + + unsigned int newswz; + float imm = constant->u.Immediate[swz]; + float baseimm = imm; + if (imm < 0.0) + baseimm = -baseimm; + + if (baseimm == 0.0) { + newswz = RC_SWIZZLE_ZERO; + } else if (baseimm == 1.0) { + newswz = RC_SWIZZLE_ONE; + } else if (baseimm == 0.5) { + newswz = RC_SWIZZLE_HALF; + } else { + have_real_reference = 1; + continue; + } + + SET_SWZ(newsrc.Swizzle, chan, newswz); + if (imm < 0.0 && !newsrc.Abs) + newsrc.Negate ^= 1 << chan; + } + + if (!have_real_reference) { + newsrc.File = RC_FILE_NONE; + newsrc.Index = 0; + } + + /* don't make the swizzle worse */ + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && + c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + continue; + + inst->U.I.SrcReg[src] = newsrc; + } + + /* Simplify instructions based on constants */ + if (inst->U.I.Opcode == RC_OPCODE_MAD) + constant_folding_mad(inst); + + /* note: MAD can simplify to MUL or ADD */ + if (inst->U.I.Opcode == RC_OPCODE_MUL) + constant_folding_mul(inst); + else if (inst->U.I.Opcode == RC_OPCODE_ADD) + constant_folding_add(inst); +} + +void rc_optimize(struct radeon_compiler * c) +{ + struct rc_instruction * inst = c->Program.Instructions.Next; + while(inst != &c->Program.Instructions) { + struct rc_instruction * cur = inst; + inst = inst->Next; + + constant_folding(c, cur); + + if (cur->U.I.Opcode == RC_OPCODE_MOV) { + peephole(c, cur); + /* cur may no longer be part of the program */ + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index fdfee86701..8a912da461 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s, } static void scan_callback(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int chan) + rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; @@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { - rc_for_all_reads(inst, scan_callback, s); - rc_for_all_writes(inst, scan_callback, s); + rc_for_all_reads_mask(inst, scan_callback, s); + rc_for_all_writes_mask(inst, scan_callback, s); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index df67aafe02..a279549ff8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c, * counter-intuitive, to account for the case where an * instruction writes to the same register as it reads * from. */ - rc_for_all_writes(inst, &scan_write, &s); - rc_for_all_reads(inst, &scan_read, &s); + rc_for_all_writes_chan(inst, &scan_write, &s); + rc_for_all_reads_chan(inst, &scan_read, &s); DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 42c08cd550..8336e58d55 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -119,7 +119,7 @@ int radeonTransformTEX( struct rc_instruction * inst_cmp; unsigned tmp_texsample = rc_find_free_temporary(c); unsigned tmp_sum = rc_find_free_temporary(c); - unsigned tmp_recip_w; + unsigned tmp_recip_w = 0; int pass, fail, tex; /* Save the output register. */ diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c index 658ac9e40c..57a33292ed 100644 --- a/src/mesa/main/dlopen.c +++ b/src/mesa/main/dlopen.c @@ -67,22 +67,27 @@ _mesa_dlopen(const char *libname, int flags) GenericFunc _mesa_dlsym(void *handle, const char *fname) { + union { + void *v; + GenericFunc f; + } u; #if defined(__blrts) - return (GenericFunc) NULL; + u.v = NULL; #elif defined(__DJGPP__) /* need '_' prefix on symbol names */ char fname2[1000]; fname2[0] = '_'; strncpy(fname2 + 1, fname, 998); fname2[999] = 0; - return (GenericFunc) dlsym(handle, fname2); + u.v = dlsym(handle, fname2); #elif defined(_GNU_SOURCE) - return (GenericFunc) dlsym(handle, fname); + u.v = dlsym(handle, fname); #elif defined(__MINGW32__) - return (GenericFunc) GetProcAddress(handle, fname); + u.v = (void *) GetProcAddress(handle, fname); #else - return (GenericFunc) NULL; + u.v = NULL; #endif + return u.f; } diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c index 86d5b555e0..c2ad5f2386 100644 --- a/src/mesa/main/drawtex.c +++ b/src/mesa/main/drawtex.c @@ -25,8 +25,6 @@ #include "main/state.h" #include "main/imports.h" -#include "main/dispatch.h" - #if FEATURE_OES_draw_texture diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index d2dcddddf2..12d046b075 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -25,6 +25,7 @@ #include "glheader.h" #include "context.h" #include "enable.h" +#include "enums.h" #include "extensions.h" #include "get.h" #include "macros.h" @@ -135,8 +136,8 @@ enum value_extra { struct value_desc { GLenum pname; - enum value_location location : 8; - enum value_type type : 8; + GLubyte location; /**< enum value_location */ + GLubyte type; /**< enum value_type */ int offset; const int *extra; }; @@ -1678,7 +1679,8 @@ check_extra(GLcontext *ctx, const char *func, const struct value_desc *d) } if (total > 0 && enabled == 0) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, d->pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(d->pname)); return GL_FALSE; } @@ -1727,7 +1729,8 @@ find_value(const char *func, GLenum pname, void **p, union value *v) /* If the enum isn't valid, the hash walk ends with index 0, * which is the API mask entry at the beginning of values[]. */ if (d->type == TYPE_API_MASK) { - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return &error_value; } hash += prime_step; @@ -2256,10 +2259,12 @@ find_value_indexed(const char *func, GLenum pname, int index, union value *v) } invalid_enum: - _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return TYPE_INVALID; invalid_value: - _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=0x%x)", func, pname); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func, + _mesa_lookup_enum_by_nr(pname)); return TYPE_INVALID; } diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index e5c08a6414..a6b04e9551 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -70,7 +70,7 @@ fpclassify(double x) } } -#elif defined(__APPLE__) || defined(__CYGWIN__) +#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) /* fpclassify is available. */ diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index cd3dd9b38c..050ebf0270 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -190,7 +190,8 @@ _mesa_free_transform_feedback(GLcontext *ctx) /* Delete the default feedback object */ assert(ctx->Driver.DeleteTransformFeedback); - ctx->Driver.DeleteTransformFeedback(ctx, ctx->TransformFeedback.DefaultObject); + ctx->Driver.DeleteTransformFeedback(ctx, + ctx->TransformFeedback.DefaultObject); ctx->TransformFeedback.CurrentObject = NULL; } @@ -749,7 +750,7 @@ _mesa_BindTransformFeedback(GLenum target, GLuint name) if (ctx->TransformFeedback.CurrentObject->Active && !ctx->TransformFeedback.CurrentObject->Paused) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glBindTransformFeedback(transform is active, or not paused)"); + "glBindTransformFeedback(transform is active, or not paused)"); return; } @@ -844,7 +845,7 @@ _mesa_ResumeTransformFeedback(void) if (!obj->Active || !obj->Paused) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glPauseTransformFeedback(feedback not active or not paused)"); + "glPauseTransformFeedback(feedback not active or not paused)"); return; } @@ -871,6 +872,11 @@ _mesa_DrawTransformFeedback(GLenum mode, GLuint name) struct gl_transform_feedback_object *obj = lookup_transform_feedback_object(ctx, name); + if (mode > GL_POLYGON) { + _mesa_error(ctx, GL_INVALID_ENUM, + "glDrawTransformFeedback(mode=0x%x)", mode); + return; + } if (!obj) { _mesa_error(ctx, GL_INVALID_VALUE, "glDrawTransformFeedback(name = %u)", name); diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 505c7bb46f..f47f213ac8 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1753,7 +1753,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* check that the sampler (tex unit index) is legal */ if (texUnit >= ctx->Const.MaxTextureImageUnits) { _mesa_error(ctx, GL_INVALID_VALUE, - "glUniform1(invalid sampler/tex unit index)"); + "glUniform1(invalid sampler/tex unit index for '%s')", + param->Name); return; } @@ -1801,7 +1802,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* non-array: count must be at most one; count == 0 is handled by the loop below */ if (count > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glUniform(uniform is not an array)"); + "glUniform(uniform '%s' is not an array)", + param->Name); return; } } @@ -1864,14 +1866,15 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, return; /* The standard specifies this as a no-op */ if (location < -1) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location=%d)", + location); return; } split_location_offset(&location, &offset); if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) { - _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location)"); + _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location=%d)", location); return; } diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c index a8bd5db630..e423d9d8a5 100644 --- a/src/mesa/state_tracker/st_cb_queryobj.c +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -94,6 +94,9 @@ st_BeginQuery(GLcontext *ctx, struct gl_query_object *q) case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: type = PIPE_QUERY_PRIMITIVES_EMITTED; break; + case GL_TIME_ELAPSED_EXT: + type = PIPE_QUERY_TIME_ELAPSED; + break; default: assert(0 && "unexpected query target in st_BeginQuery()"); return; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 12d3c99a35..b8493dab93 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -46,6 +46,7 @@ #include "st_debug.h" #include "st_context.h" #include "st_atom.h" +#include "st_cb_bitmap.h" #include "st_cb_readpixels.h" #include "st_cb_fbo.h" @@ -344,6 +345,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, return; } + st_flush_bitmap_cache(st); + dest = _mesa_map_pbo_dest(ctx, &clippedPacking, dest); if (!dest) return; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0cd80fa59f..459e924cca 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -296,6 +296,9 @@ void st_init_extensions(struct st_context *st) if (screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY)) { ctx->Extensions.ARB_occlusion_query = GL_TRUE; } + if (screen->get_param(screen, PIPE_CAP_TIMER_QUERY)) { + ctx->Extensions.EXT_timer_query = GL_TRUE; + } if (screen->get_param(screen, PIPE_CAP_TEXTURE_SHADOW_MAP)) { ctx->Extensions.ARB_depth_texture = GL_TRUE; |