summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c57
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h7
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c13
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c27
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c106
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h13
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c446
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c2
-rw-r--r--src/mesa/main/dlopen.c15
-rw-r--r--src/mesa/main/drawtex.c2
-rw-r--r--src/mesa/main/get.c17
-rw-r--r--src/mesa/main/querymatrix.c3
-rw-r--r--src/mesa/main/transformfeedback.c12
-rw-r--r--src/mesa/shader/shader_api.c11
-rw-r--r--src/mesa/state_tracker/st_cb_queryobj.c3
-rw-r--r--src/mesa/state_tracker/st_cb_readpixels.c3
-rw-r--r--src/mesa/state_tracker/st_extensions.c3
45 files changed, 792 insertions, 249 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 029a16500b..49ef859e45 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -42,7 +42,6 @@
#include "brw_state.h"
#include "brw_clip.h"
-
#define FRONT_UNFILLED_BIT 0x1
#define BACK_UNFILLED_BIT 0x2
@@ -127,6 +126,14 @@ static void compile_clip_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_CLIP) {
+ printf("clip:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index d71bac7f61..68222c6c27 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -114,8 +114,6 @@ struct brw_clip_compile {
GLboolean need_direction;
- GLuint last_mrf;
-
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
};
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index b27fe654ca..916a99ea00 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -177,7 +177,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
- struct brw_instruction *is_poly;
+ struct brw_instruction *is_poly, *is_trifan;
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
@@ -195,8 +195,22 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
is_poly = brw_ELSE(p, is_poly);
{
if (c->key.pv_first) {
- brw_clip_copy_colors(c, 1, 0);
- brw_clip_copy_colors(c, 2, 0);
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRIFAN));
+ is_trifan = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 0, 1);
+ brw_clip_copy_colors(c, 2, 1);
+ }
+ is_trifan = brw_ELSE(p, is_trifan);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ brw_ENDIF(p, is_trifan);
}
else {
brw_clip_copy_colors(c, 0, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 34a966a47a..2148bc8244 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -211,27 +211,14 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
GLuint header)
{
struct brw_compile *p = &c->func;
- GLuint start = c->last_mrf;
brw_clip_ff_sync(c);
assert(!(allocate && eot));
-
- /* Cycle through mrf regs - probably futile as we have to wait for
- * the allocation response anyway. Also, the order this function
- * is invoked doesn't correspond to the order the instructions will
- * be executed, so it won't have any effect in many cases.
- */
-#if 0
- if (start + c->nr_regs + 1 >= MAX_MRF)
- start = 0;
- c->last_mrf = start + c->nr_regs + 1;
-#endif
-
/* Copy the vertex from vertn into m1..mN+1:
*/
- brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+ brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
/* Overwrite PrimType and PrimStart in the message header, for
* each vertex in turn:
@@ -247,7 +234,7 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
*/
brw_urb_WRITE(p,
allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- start,
+ 0,
c->reg.R0,
allocate,
1, /* used */
@@ -370,18 +357,13 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
{
brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1, /* allocate */
+ 1, /* response length */
+ 0 /* eot */);
}
brw_ENDIF(p, need_ff_sync);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6b04ad9ec6..dc4bd5802d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -192,8 +192,6 @@ GLboolean brwCreateContext( int api,
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- make_empty_list(&brw->query.active_head);
-
brw_draw_init( brw );
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1f09651126..a97fcb0f4d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -418,18 +418,12 @@ struct brw_vertex_info {
struct brw_query_object {
struct gl_query_object Base;
- /** Doubly linked list of active query objects in the context. */
- struct brw_query_object *prev, *next;
-
/** Last query BO associated with this query. */
dri_bo *bo;
/** First index in bo with query data for this object. */
int first_index;
/** Last index in bo with query data for this object. */
int last_index;
-
- /* Total count of pixels from previous BOs */
- unsigned int count;
};
@@ -664,7 +658,7 @@ struct brw_context
} cc;
struct {
- struct brw_query_object active_head;
+ struct brw_query_object *obj;
dri_bo *bo;
int index;
GLboolean active;
@@ -726,7 +720,7 @@ void brw_upload_urb_fence(struct brw_context *brw);
void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
-int brw_disasm (FILE *file, struct brw_instruction *inst);
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
/*======================================================================
* Inline conversion functions. These are better-typed than the
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f26a13fc3c..2d3556b805 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -783,7 +783,7 @@
#define CMD_BINDING_TABLE_PTRS 0x7801
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
-# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index db3fc50a63..ff12daf497 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -323,6 +323,11 @@ char *math_precision[2] = {
[1] = "partial_precision"
};
+char *urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
@@ -774,7 +779,7 @@ static int src1 (FILE *file, struct brw_instruction *inst)
}
}
-int brw_disasm (FILE *file, struct brw_instruction *inst)
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -829,12 +834,20 @@ int brw_disasm (FILE *file, struct brw_instruction *inst)
}
if (inst->header.opcode == BRW_OPCODE_SEND) {
+ int target;
+
+ if (gen >= 5)
+ target = inst->bits2.send_gen5.sfid;
+ else
+ target = inst->bits3.generic.msg_target;
+
newline (file);
pad (file, 16);
space = 0;
err |= control (file, "target function", target_function,
- inst->bits3.generic.msg_target, &space);
- switch (inst->bits3.generic.msg_target) {
+ target, &space);
+
+ switch (target) {
case BRW_MESSAGE_TARGET_MATH:
err |= control (file, "math function", math_function,
inst->bits3.math.function, &space);
@@ -864,8 +877,17 @@ int brw_disasm (FILE *file, struct brw_instruction *inst)
inst->bits3.dp_write.send_commit_msg);
break;
case BRW_MESSAGE_TARGET_URB:
- format (file, " %d", inst->bits3.urb.offset);
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
+
space = 1;
+ if (gen >= 5) {
+ err |= control (file, "urb opcode", urb_opcode,
+ inst->bits3.urb_gen5.opcode, &space);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 8247faa36d..9cbff24863 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -59,7 +59,7 @@ static GLuint half_float_types[5] = {
0,
BRW_SURFACEFORMAT_R16_FLOAT,
BRW_SURFACEFORMAT_R16G16_FLOAT,
- 0, /* can't seem to render this one */
+ BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
};
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 4f55158e8f..3a32ad26c1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -822,13 +822,8 @@ void brw_ff_sync(struct brw_compile *p,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
GLuint response_length,
- GLboolean eot,
- GLboolean writes_complete,
- GLuint offset,
- GLuint swizzle);
+ GLboolean eot);
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 785d382a00..175899b026 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -280,28 +280,23 @@ static void brw_set_math_message( struct brw_context *brw,
}
-static void brw_set_ff_sync_message( struct brw_context *brw,
- struct brw_instruction *insn,
- GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
- GLuint response_length,
- GLboolean end_of_thread,
- GLboolean complete,
- GLuint offset,
- GLuint swizzle_control )
+static void brw_set_ff_sync_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLuint response_length,
+ GLboolean end_of_thread)
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.urb_gen5.opcode = 1;
- insn->bits3.urb_gen5.offset = offset;
- insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+ insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.allocate = allocate;
- insn->bits3.urb_gen5.used = used;
- insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.header_present = 1;
- insn->bits3.urb_gen5.response_length = response_length;
- insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
+ insn->bits3.urb_gen5.msg_length = 1;
insn->bits3.urb_gen5.end_of_thread = end_of_thread;
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
insn->bits2.send_gen5.end_of_thread = end_of_thread;
@@ -1451,18 +1446,11 @@ void brw_ff_sync(struct brw_compile *p,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
GLuint response_length,
- GLboolean eot,
- GLboolean writes_complete,
- GLuint offset,
- GLuint swizzle)
+ GLboolean eot)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < 16);
-
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
@@ -1470,13 +1458,8 @@ void brw_ff_sync(struct brw_compile *p,
insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_ff_sync_message(p->brw,
- insn,
- allocate,
- used,
- msg_length,
- response_length,
- eot,
- writes_complete,
- offset,
- swizzle);
+ insn,
+ allocate,
+ response_length,
+ eot);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 4b13494ecf..94d93f3aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -122,6 +122,16 @@ static void compile_gs_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_GS) {
+ int i;
+
+ printf("gs:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->gs.prog_bo);
@@ -163,6 +173,12 @@ static void populate_key( struct brw_context *brw,
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+ if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+ /* Provide consistent primitive order with brw_set_prim's
+ * optimization of single quads to trifans.
+ */
+ key->pv_first = GL_TRUE;
+ }
key->need_gs_prog = (key->hint_gs_always ||
brw->primitive == GL_QUADS ||
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index dd7b057d62..99a6f6be11 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -104,18 +104,13 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
{
struct brw_compile *p = &c->func;
brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1, /* allocate */
+ 1, /* response length */
+ 0 /* eot */);
}
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 6cce7e5089..3f47a68049 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -38,7 +38,6 @@
* required for handling queries, so that we can be sure that we won't
* have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
*/
-#include "main/simple_list.h"
#include "main/imports.h"
#include "brw_context.h"
@@ -105,7 +104,7 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
query->first_index = -1;
query->last_index = -1;
- insert_at_head(&brw->query.active_head, query);
+ brw->query.obj = query;
intel->stats_wm++;
}
@@ -131,7 +130,7 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q)
brw->query.bo = NULL;
}
- remove_from_list(query);
+ brw->query.obj = NULL;
intel->stats_wm--;
}
@@ -161,7 +160,7 @@ brw_prepare_query_begin(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
/* Skip if we're not doing any queries. */
- if (is_empty_list(&brw->query.active_head))
+ if (!brw->query.obj)
return;
/* Get a new query BO if we're going to need it. */
@@ -182,10 +181,10 @@ void
brw_emit_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_query_object *query;
+ struct brw_query_object *query = brw->query.obj;
/* Skip if we're not doing any queries, or we've emitted the start. */
- if (brw->query.active || is_empty_list(&brw->query.active_head))
+ if (!query || brw->query.active)
return;
BEGIN_BATCH(4);
@@ -205,16 +204,14 @@ brw_emit_query_begin(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
- foreach(query, &brw->query.active_head) {
- if (query->bo != brw->query.bo) {
- if (query->bo != NULL)
- brw_queryobj_get_results(query);
- dri_bo_reference(brw->query.bo);
- query->bo = brw->query.bo;
- query->first_index = brw->query.index;
- }
- query->last_index = brw->query.index;
+ if (query->bo != brw->query.bo) {
+ if (query->bo != NULL)
+ brw_queryobj_get_results(query);
+ dri_bo_reference(brw->query.bo);
+ query->bo = brw->query.bo;
+ query->first_index = brw->query.index;
}
+ query->last_index = brw->query.index;
brw->query.active = GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 57d1c29ade..b0dd1ff3af 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -46,6 +46,7 @@
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
{
+ struct intel_context *intel = &brw->intel;
struct brw_sf_compile c;
const GLuint *program;
GLuint program_size;
@@ -107,6 +108,14 @@ static void compile_sf_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_SF) {
+ printf("sf:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->sf.prog_bo);
@@ -154,6 +163,7 @@ static void upload_sf_prog(struct brw_context *brw)
break;
}
+ /* _NEW_POINT */
key.do_point_sprite = ctx->Point.PointSprite;
if (key.do_point_sprite) {
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 44b085e214..57ffb2d89e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -48,6 +48,7 @@ static void do_vs_prog( struct brw_context *brw,
const GLuint *program;
struct brw_vs_compile c;
int aux_size;
+ int i;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -63,6 +64,17 @@ static void do_vs_prog( struct brw_context *brw,
c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
}
+ /* Put dummy slots into the VUE for the SF to put the replaced
+ * point sprite coords in. We shouldn't need these dummy slots,
+ * which take up precious URB space, but it would mean that the SF
+ * doesn't get nice aligned pairs of input coords into output
+ * coords, which would be a pain to handle.
+ */
+ for (i = 0; i < 8; i++) {
+ if (c.key.point_coord_replace & (1 << i))
+ c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
+ }
+
if (0)
_mesa_print_program(&c.vp->program.Base);
@@ -106,6 +118,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
struct brw_vs_prog_key key;
struct brw_vertex_program *vp =
(struct brw_vertex_program *)brw->vertex_program;
+ int i;
memset(&key, 0, sizeof(key));
@@ -117,6 +130,14 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ /* _NEW_POINT */
+ if (ctx->Point.PointSprite) {
+ for (i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ key.point_coord_replace |= (1 << i);
+ }
+ }
+
/* Make an early check for the key.
*/
dri_bo_unreference(brw->vs.prog_bo);
@@ -135,7 +156,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 95e0501b1e..6493744f3e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -43,7 +43,7 @@ struct brw_vs_prog_key {
GLuint program_string_id;
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
- GLuint pad:26;
+ GLuint point_coord_replace:8;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dc6ab81c4a..0b44deeb63 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1882,7 +1882,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
printf("vs-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 375e795391..323cfac8fa 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1717,7 +1717,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 88b885cb94..fe3c89b721 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -2111,7 +2111,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (INTEL_DEBUG & DEBUG_WM) {
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9768b0deee..ca8e344836 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -130,8 +130,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
struct intel_context *intel = batch->intel;
GLuint used = batch->ptr - batch->map;
- if (!intel->using_dri2_swapbuffers &&
- intel->first_post_swapbuffers_batch == NULL) {
+ if (intel->first_post_swapbuffers_batch == NULL) {
intel->first_post_swapbuffers_batch = intel->batch->buf;
drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 7d9f302dca..a590c799ad 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -353,6 +353,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
OUT_BATCH(clear_val);
ADVANCE_BATCH();
+ if (intel->always_flush_cache)
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
else
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 0369942b39..05d4998654 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -440,6 +440,28 @@ intel_prepare_render(struct intel_context *intel)
*/
if (intel->is_front_buffer_rendering)
intel->front_buffer_dirty = GL_TRUE;
+
+ /* Wait for the swapbuffers before the one we just emitted, so we
+ * don't get too many swaps outstanding for apps that are GPU-heavy
+ * but not CPU-heavy.
+ *
+ * We're using intelDRI2Flush (called from the loader before
+ * swapbuffer) and glFlush (for front buffer rendering) as the
+ * indicator that a frame is done and then throttle when we get
+ * here as we prepare to render the next frame. At this point for
+ * round trips for swap/copy and getting new buffers are done and
+ * we'll spend less time waiting on the GPU.
+ *
+ * Unfortunately, we don't have a handle to the batch containing
+ * the swap, and getting our hands on that doesn't seem worth it,
+ * so we just us the first batch we emitted after the last swap.
+ */
+ if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ intel->need_throttle = GL_FALSE;
+ }
}
static void
@@ -451,8 +473,7 @@ intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
if (intel->saved_viewport)
intel->saved_viewport(ctx, x, y, w, h);
- if (!intel->using_dri2_swapbuffers &&
- !intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+ if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
dri2InvalidateDrawable(driContext->driDrawablePriv);
dri2InvalidateDrawable(driContext->driReadablePriv);
}
@@ -471,12 +492,12 @@ static const struct dri_debug_control debug_control[] = {
{ "buf", DEBUG_BUFMGR},
{ "reg", DEBUG_REGION},
{ "fbo", DEBUG_FBO},
- { "lock", DEBUG_LOCK},
+ { "gs", DEBUG_GS},
{ "sync", DEBUG_SYNC},
{ "prim", DEBUG_PRIMS },
{ "vert", DEBUG_VERTS },
{ "dri", DEBUG_DRI },
- { "dma", DEBUG_DMA },
+ { "sf", DEBUG_SF },
{ "san", DEBUG_SANITY },
{ "sleep", DEBUG_SLEEP },
{ "stats", DEBUG_STATS },
@@ -487,6 +508,7 @@ static const struct dri_debug_control debug_control[] = {
{ "glsl_force", DEBUG_GLSL_FORCE },
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
+ { "clip", DEBUG_CLIP },
{ NULL, 0 }
};
@@ -529,27 +551,8 @@ intel_glFlush(GLcontext *ctx)
struct intel_context *intel = intel_context(ctx);
intel_flush(ctx);
-
intel_flush_front(ctx);
-
- /* We're using glFlush as an indicator that a frame is done, which is
- * what DRI2 does before calling SwapBuffers (and means we should catch
- * people doing front-buffer rendering, as well)..
- *
- * Wait for the swapbuffers before the one we just emitted, so we don't
- * get too many swaps outstanding for apps that are GPU-heavy but not
- * CPU-heavy.
- *
- * Unfortunately, we don't have a handle to the batch containing the swap,
- * and getting our hands on that doesn't seem worth it, so we just us the
- * first batch we emitted after the last swap.
- */
- if (!intel->using_dri2_swapbuffers &&
- intel->first_post_swapbuffers_batch != NULL) {
- drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
- drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
- intel->first_post_swapbuffers_batch = NULL;
- }
+ intel->need_throttle = GL_TRUE;
}
void
@@ -905,6 +908,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
_mesa_make_current(&intel->ctx, fb, readFb);
+
+ /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
+ * is NULL at that point. We can't call _mesa_makecurrent()
+ * first, since we need the buffer size for the initial
+ * viewport. So just call intel_draw_buffer() again here. */
+ intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
}
else {
_mesa_make_current(NULL, NULL, NULL);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index db244e5872..04d5fc92a2 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -150,8 +150,8 @@ struct intel_context
struct intel_batchbuffer *batch;
drm_intel_bo *first_post_swapbuffers_batch;
+ GLboolean need_throttle;
GLboolean no_batch_wrap;
- GLboolean using_dri2_swapbuffers;
struct
{
@@ -326,12 +326,12 @@ extern int INTEL_DEBUG;
#define DEBUG_BUFMGR 0x200
#define DEBUG_REGION 0x400
#define DEBUG_FBO 0x800
-#define DEBUG_LOCK 0x1000
+#define DEBUG_GS 0x1000
#define DEBUG_SYNC 0x2000
#define DEBUG_PRIMS 0x4000
#define DEBUG_VERTS 0x8000
#define DEBUG_DRI 0x10000
-#define DEBUG_DMA 0x20000
+#define DEBUG_SF 0x20000
#define DEBUG_SANITY 0x40000
#define DEBUG_SLEEP 0x80000
#define DEBUG_STATS 0x100000
@@ -341,6 +341,7 @@ extern int INTEL_DEBUG;
#define DEBUG_URB 0x1000000
#define DEBUG_VS 0x2000000
#define DEBUG_GLSL_FORCE 0x4000000
+#define DEBUG_CLIP 0x8000000
#define DBG(...) do { \
if (INTEL_DEBUG & FILE_DEBUG_FLAG) \
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 3aed253e24..15a465c640 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -110,23 +110,16 @@ intelDRI2Flush(__DRIdrawable *drawable)
if (intel->gen < 4)
INTEL_FIREVERTICES(intel);
+ intel->need_throttle = GL_TRUE;
+
if (intel->batch->map != intel->batch->ptr)
intel_batchbuffer_flush(intel->batch);
}
-static void
-intelDRI2Invalidate(__DRIdrawable *drawable)
-{
- struct intel_context *intel = drawable->driContextPriv->driverPrivate;
-
- intel->using_dri2_swapbuffers = GL_TRUE;
- dri2InvalidateDrawable(drawable);
-}
-
static const struct __DRI2flushExtensionRec intelFlushExtension = {
{ __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
intelDRI2Flush,
- intelDRI2Invalidate,
+ dri2InvalidateDrawable,
};
static __DRIimage *
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 7be5231eae..610a169beb 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -1,7 +1,7 @@
#include "intel_context.h"
#include "intel_tex.h"
#include "main/enums.h"
-
+#include "main/formats.h"
/**
* Choose hardware texture format given the user's glTexImage parameters.
@@ -208,22 +208,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
int intel_compressed_num_bytes(GLuint mesaFormat)
{
- int bytes = 0;
- switch(mesaFormat) {
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- case MESA_FORMAT_RGB_DXT1:
- case MESA_FORMAT_RGBA_DXT1:
- bytes = 2;
- break;
-
- case MESA_FORMAT_RGBA_DXT3:
- case MESA_FORMAT_RGBA_DXT5:
- bytes = 4;
- default:
- break;
- }
-
- return bytes;
+ GLuint bw, bh;
+ GLuint block_size;
+
+ block_size = _mesa_get_format_bytes(mesaFormat);
+ _mesa_get_format_block_size(mesaFormat, &bw, &bh);
+
+ return block_size / bh;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index e432afc3d4..34d22b4559 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
+ radeon_optimize.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index cfa48a59e3..5d5de2f1b2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
@@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
default: return R300_ALU_ARGA_ONE;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 25bf373b6f..3e88ccbc46 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
+ rc_optimize(&c->Base);
+
+ debug_program_log(c, "after dataflow optimize");
+
rc_dataflow_swizzles(&c->Base);
if (c->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index 16e2f3a218..0e6c62541f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -30,7 +30,7 @@
#include "radeon_program.h"
-static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
@@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb,
refmask &= RC_MASK_XYZW;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(refmask, chan)) {
- cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
- }
- }
+ if (refmask)
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
if (refmask && inst->SrcReg[src].RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
-static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
@@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v
}
for(unsigned int src = 0; src < 3; ++src) {
- if (inst->RGB.Src[src].Used) {
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(refmasks[src], chan))
- cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
- }
- }
+ if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+ refmasks[src] & RC_MASK_XYZ);
- if (inst->Alpha.Src[src].Used) {
- if (GET_BIT(refmasks[src], 3))
- cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
- }
+ if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
/**
- * Calls a callback function for all sourced register channels.
+ * Calls a callback function for all register reads.
*
- * This is conservative, i.e. channels may be called multiple times,
- * and the writemask of the instruction is not taken into account.
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
*/
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
reads_normal(inst, cb, userdata);
@@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void *
-static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- if (opcode->HasDstReg) {
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(inst->DstReg.WriteMask, chan))
- cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
- }
- }
+ if (opcode->HasDstReg && inst->DstReg.WriteMask)
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
-static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(inst->RGB.WriteMask, chan))
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
- }
+ if (inst->RGB.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
if (inst->Alpha.WriteMask)
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
/**
- * Calls a callback function for all written register channels.
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
@@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *
}
+struct mask_to_chan_data {
+ void * UserData;
+ rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct mask_to_chan_data * d = data;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(mask, chan))
+ d->Fn(d->UserData, inst, file, index, chan);
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 62cda20eea..60a6e192a9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -39,10 +39,15 @@ struct rc_swizzle_caps;
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
-typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
@@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
void rc_dataflow_swizzles(struct radeon_compiler * c);
/*@}*/
+void rc_optimize(struct radeon_compiler * c);
+
#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
index d889612f4f..863654cf68 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s,
sap.Proxies = proxies;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
- rc_for_all_writes(inst, scan_write, &sap);
+ rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000000..21d7210888
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+ if (swz < 4)
+ combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+ }
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+struct peephole_state {
+ struct radeon_compiler * C;
+ struct rc_instruction * Mov;
+ unsigned int Conflict:1;
+
+ /** Whether Mov's source has been clobbered */
+ unsigned int SourceClobbered:1;
+
+ /** Which components of Mov's destination register are still from that Mov? */
+ unsigned int MovMask:4;
+
+ /** Which components of Mov's destination register are clearly *not* from that Mov */
+ unsigned int DefinedMask:4;
+
+ /** Which components of Mov's source register are sourced */
+ unsigned int SourcedMask:4;
+
+ /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+ int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ return;
+
+ /* These instructions cannot read from the constants file.
+ * see radeonTransformTEX()
+ */
+ if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ (inst->U.I.Opcode == RC_OPCODE_TEX ||
+ inst->U.I.Opcode == RC_OPCODE_TXB ||
+ inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_KIL)){
+ s->Conflict = 1;
+ return;
+ }
+ if ((mask & s->MovMask) == mask) {
+ if (s->SourceClobbered) {
+ s->Conflict = 1;
+ }
+ } else if ((mask & s->DefinedMask) == mask) {
+ /* read from something entirely written by other instruction: this is okay */
+ } else {
+ /* read from component combination that is not well-defined without
+ * the MOV: cannot remove it */
+ s->Conflict = 1;
+ }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (s->BranchDepth < 0)
+ return;
+
+ if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+ s->MovMask &= ~mask;
+ if (s->BranchDepth == 0)
+ s->DefinedMask |= mask;
+ else
+ s->DefinedMask &= ~mask;
+ }
+ if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (mask & s->SourcedMask)
+ s->SourceClobbered = 1;
+ } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+ s->SourceClobbered = 1;
+ }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct peephole_state s;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+ return;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.Mov = inst_mov;
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+ s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ /* 1st pass: Check whether all subsequent readers can be changed */
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ if (s.Conflict)
+ return;
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0) {
+ s.DefinedMask &= ~s.MovMask;
+ s.MovMask = 0;
+ }
+ }
+ }
+ }
+
+ if (s.Conflict)
+ return;
+
+ /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.BranchDepth = 0;
+
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+ unsigned int refmask = 0;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ refmask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ if ((refmask & s.MovMask) == refmask)
+ inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+ s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+ }
+ }
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0)
+ break; /* no more readers after this point */
+ }
+ }
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+ rc_swizzle * pswz, unsigned int * pnegate)
+{
+ int have_used = 0;
+
+ if (src.File != RC_FILE_NONE) {
+ *pswz = 0;
+ return 0;
+ }
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz < 4) {
+ *pswz = 0;
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+
+ if (!have_used) {
+ *pswz = swz;
+ *pnegate = GET_BIT(src.Negate, chan);
+ have_used = 1;
+ } else {
+ if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+ *pswz = 0;
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MUL;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ return;
+ }
+ }
+}
+
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+ inst->U.I.SrcReg[src].RelAddr ||
+ inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+ continue;
+
+ struct rc_constant * constant =
+ &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+ if (constant->Type != RC_CONSTANT_IMMEDIATE)
+ continue;
+
+ struct rc_src_register newsrc = inst->U.I.SrcReg[src];
+ int have_real_reference = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ if (swz >= 4)
+ continue;
+
+ unsigned int newswz;
+ float imm = constant->u.Immediate[swz];
+ float baseimm = imm;
+ if (imm < 0.0)
+ baseimm = -baseimm;
+
+ if (baseimm == 0.0) {
+ newswz = RC_SWIZZLE_ZERO;
+ } else if (baseimm == 1.0) {
+ newswz = RC_SWIZZLE_ONE;
+ } else if (baseimm == 0.5) {
+ newswz = RC_SWIZZLE_HALF;
+ } else {
+ have_real_reference = 1;
+ continue;
+ }
+
+ SET_SWZ(newsrc.Swizzle, chan, newswz);
+ if (imm < 0.0 && !newsrc.Abs)
+ newsrc.Negate ^= 1 << chan;
+ }
+
+ if (!have_real_reference) {
+ newsrc.File = RC_FILE_NONE;
+ newsrc.Index = 0;
+ }
+
+ /* don't make the swizzle worse */
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+ c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ continue;
+
+ inst->U.I.SrcReg[src] = newsrc;
+ }
+
+ /* Simplify instructions based on constants */
+ if (inst->U.I.Opcode == RC_OPCODE_MAD)
+ constant_folding_mad(inst);
+
+ /* note: MAD can simplify to MUL or ADD */
+ if (inst->U.I.Opcode == RC_OPCODE_MUL)
+ constant_folding_mul(inst);
+ else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+ constant_folding_add(inst);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ constant_folding(c, cur);
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+ peephole(c, cur);
+ /* cur may no longer be part of the program */
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index fdfee86701..8a912da461 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s,
}
static void scan_callback(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int chan)
+ rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
@@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
- rc_for_all_reads(inst, scan_callback, s);
- rc_for_all_writes(inst, scan_callback, s);
+ rc_for_all_reads_mask(inst, scan_callback, s);
+ rc_for_all_writes_mask(inst, scan_callback, s);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index df67aafe02..a279549ff8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
* counter-intuitive, to account for the case where an
* instruction writes to the same register as it reads
* from. */
- rc_for_all_writes(inst, &scan_write, &s);
- rc_for_all_reads(inst, &scan_read, &s);
+ rc_for_all_writes_chan(inst, &scan_write, &s);
+ rc_for_all_reads_chan(inst, &scan_read, &s);
DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 42c08cd550..8336e58d55 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -119,7 +119,7 @@ int radeonTransformTEX(
struct rc_instruction * inst_cmp;
unsigned tmp_texsample = rc_find_free_temporary(c);
unsigned tmp_sum = rc_find_free_temporary(c);
- unsigned tmp_recip_w;
+ unsigned tmp_recip_w = 0;
int pass, fail, tex;
/* Save the output register. */
diff --git a/src/mesa/main/dlopen.c b/src/mesa/main/dlopen.c
index 658ac9e40c..57a33292ed 100644
--- a/src/mesa/main/dlopen.c
+++ b/src/mesa/main/dlopen.c
@@ -67,22 +67,27 @@ _mesa_dlopen(const char *libname, int flags)
GenericFunc
_mesa_dlsym(void *handle, const char *fname)
{
+ union {
+ void *v;
+ GenericFunc f;
+ } u;
#if defined(__blrts)
- return (GenericFunc) NULL;
+ u.v = NULL;
#elif defined(__DJGPP__)
/* need '_' prefix on symbol names */
char fname2[1000];
fname2[0] = '_';
strncpy(fname2 + 1, fname, 998);
fname2[999] = 0;
- return (GenericFunc) dlsym(handle, fname2);
+ u.v = dlsym(handle, fname2);
#elif defined(_GNU_SOURCE)
- return (GenericFunc) dlsym(handle, fname);
+ u.v = dlsym(handle, fname);
#elif defined(__MINGW32__)
- return (GenericFunc) GetProcAddress(handle, fname);
+ u.v = (void *) GetProcAddress(handle, fname);
#else
- return (GenericFunc) NULL;
+ u.v = NULL;
#endif
+ return u.f;
}
diff --git a/src/mesa/main/drawtex.c b/src/mesa/main/drawtex.c
index 86d5b555e0..c2ad5f2386 100644
--- a/src/mesa/main/drawtex.c
+++ b/src/mesa/main/drawtex.c
@@ -25,8 +25,6 @@
#include "main/state.h"
#include "main/imports.h"
-#include "main/dispatch.h"
-
#if FEATURE_OES_draw_texture
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index d2dcddddf2..12d046b075 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -25,6 +25,7 @@
#include "glheader.h"
#include "context.h"
#include "enable.h"
+#include "enums.h"
#include "extensions.h"
#include "get.h"
#include "macros.h"
@@ -135,8 +136,8 @@ enum value_extra {
struct value_desc {
GLenum pname;
- enum value_location location : 8;
- enum value_type type : 8;
+ GLubyte location; /**< enum value_location */
+ GLubyte type; /**< enum value_type */
int offset;
const int *extra;
};
@@ -1678,7 +1679,8 @@ check_extra(GLcontext *ctx, const char *func, const struct value_desc *d)
}
if (total > 0 && enabled == 0) {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, d->pname);
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
+ _mesa_lookup_enum_by_nr(d->pname));
return GL_FALSE;
}
@@ -1727,7 +1729,8 @@ find_value(const char *func, GLenum pname, void **p, union value *v)
/* If the enum isn't valid, the hash walk ends with index 0,
* which is the API mask entry at the beginning of values[]. */
if (d->type == TYPE_API_MASK) {
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
+ _mesa_lookup_enum_by_nr(pname));
return &error_value;
}
hash += prime_step;
@@ -2256,10 +2259,12 @@ find_value_indexed(const char *func, GLenum pname, int index, union value *v)
}
invalid_enum:
- _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=0x%x)", func, pname);
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)", func,
+ _mesa_lookup_enum_by_nr(pname));
return TYPE_INVALID;
invalid_value:
- _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=0x%x)", func, pname);
+ _mesa_error(ctx, GL_INVALID_VALUE, "%s(pname=%s)", func,
+ _mesa_lookup_enum_by_nr(pname));
return TYPE_INVALID;
}
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index e5c08a6414..ca292aa0e8 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -70,7 +70,8 @@ fpclassify(double x)
}
}
-#elif defined(__APPLE__) || defined(__CYGWIN__)
+#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
+ (defined(__sun) && defined(__C99FEATURES__))
/* fpclassify is available. */
diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c
index cd3dd9b38c..050ebf0270 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -190,7 +190,8 @@ _mesa_free_transform_feedback(GLcontext *ctx)
/* Delete the default feedback object */
assert(ctx->Driver.DeleteTransformFeedback);
- ctx->Driver.DeleteTransformFeedback(ctx, ctx->TransformFeedback.DefaultObject);
+ ctx->Driver.DeleteTransformFeedback(ctx,
+ ctx->TransformFeedback.DefaultObject);
ctx->TransformFeedback.CurrentObject = NULL;
}
@@ -749,7 +750,7 @@ _mesa_BindTransformFeedback(GLenum target, GLuint name)
if (ctx->TransformFeedback.CurrentObject->Active &&
!ctx->TransformFeedback.CurrentObject->Paused) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glBindTransformFeedback(transform is active, or not paused)");
+ "glBindTransformFeedback(transform is active, or not paused)");
return;
}
@@ -844,7 +845,7 @@ _mesa_ResumeTransformFeedback(void)
if (!obj->Active || !obj->Paused) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glPauseTransformFeedback(feedback not active or not paused)");
+ "glPauseTransformFeedback(feedback not active or not paused)");
return;
}
@@ -871,6 +872,11 @@ _mesa_DrawTransformFeedback(GLenum mode, GLuint name)
struct gl_transform_feedback_object *obj =
lookup_transform_feedback_object(ctx, name);
+ if (mode > GL_POLYGON) {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "glDrawTransformFeedback(mode=0x%x)", mode);
+ return;
+ }
if (!obj) {
_mesa_error(ctx, GL_INVALID_VALUE,
"glDrawTransformFeedback(name = %u)", name);
diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c
index 505c7bb46f..f47f213ac8 100644
--- a/src/mesa/shader/shader_api.c
+++ b/src/mesa/shader/shader_api.c
@@ -1753,7 +1753,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
/* check that the sampler (tex unit index) is legal */
if (texUnit >= ctx->Const.MaxTextureImageUnits) {
_mesa_error(ctx, GL_INVALID_VALUE,
- "glUniform1(invalid sampler/tex unit index)");
+ "glUniform1(invalid sampler/tex unit index for '%s')",
+ param->Name);
return;
}
@@ -1801,7 +1802,8 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program,
/* non-array: count must be at most one; count == 0 is handled by the loop below */
if (count > 1) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glUniform(uniform is not an array)");
+ "glUniform(uniform '%s' is not an array)",
+ param->Name);
return;
}
}
@@ -1864,14 +1866,15 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
return; /* The standard specifies this as a no-op */
if (location < -1) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location)");
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glUniform(location=%d)",
+ location);
return;
}
split_location_offset(&location, &offset);
if (location < 0 || location >= (GLint) shProg->Uniforms->NumUniforms) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location)");
+ _mesa_error(ctx, GL_INVALID_VALUE, "glUniform(location=%d)", location);
return;
}
diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c
index a8bd5db630..e423d9d8a5 100644
--- a/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/src/mesa/state_tracker/st_cb_queryobj.c
@@ -94,6 +94,9 @@ st_BeginQuery(GLcontext *ctx, struct gl_query_object *q)
case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
type = PIPE_QUERY_PRIMITIVES_EMITTED;
break;
+ case GL_TIME_ELAPSED_EXT:
+ type = PIPE_QUERY_TIME_ELAPSED;
+ break;
default:
assert(0 && "unexpected query target in st_BeginQuery()");
return;
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index 12d3c99a35..b8493dab93 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -46,6 +46,7 @@
#include "st_debug.h"
#include "st_context.h"
#include "st_atom.h"
+#include "st_cb_bitmap.h"
#include "st_cb_readpixels.h"
#include "st_cb_fbo.h"
@@ -344,6 +345,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
return;
}
+ st_flush_bitmap_cache(st);
+
dest = _mesa_map_pbo_dest(ctx, &clippedPacking, dest);
if (!dest)
return;
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 0cd80fa59f..459e924cca 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -296,6 +296,9 @@ void st_init_extensions(struct st_context *st)
if (screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY)) {
ctx->Extensions.ARB_occlusion_query = GL_TRUE;
}
+ if (screen->get_param(screen, PIPE_CAP_TIMER_QUERY)) {
+ ctx->Extensions.EXT_timer_query = GL_TRUE;
+ }
if (screen->get_param(screen, PIPE_CAP_TEXTURE_SHADOW_MAP)) {
ctx->Extensions.ARB_depth_texture = GL_TRUE;