summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/common/driverfuncs.c15
-rw-r--r--src/mesa/drivers/dri/Makefile.template1
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c2
-rw-r--r--src/mesa/drivers/dri/common/dri_util.h1
-rw-r--r--src/mesa/drivers/dri/i915/i830_state.c2
-rw-r--r--src/mesa/drivers/dri/i915/i915_state.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_tris.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_tri.c20
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_util.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c30
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c31
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c51
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs.c16
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_emit.c19
-rw-r--r--src/mesa/drivers/dri/i965/brw_queryobj.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_sf.c10
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c23
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffer_objects.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c150
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h15
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c17
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h3
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c7
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_draw.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_read.c15
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c13
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_syncobj.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c27
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c13
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_subimage.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c106
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h13
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c446
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c97
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c10
-rw-r--r--src/mesa/drivers/fbdev/Makefile3
-rw-r--r--src/mesa/drivers/glslcompiler/Makefile3
-rw-r--r--src/mesa/drivers/osmesa/Makefile3
-rw-r--r--src/mesa/drivers/osmesa/osmesa.c38
-rw-r--r--src/mesa/drivers/x11/Makefile3
-rw-r--r--src/mesa/drivers/x11/xm_api.c8
68 files changed, 1025 insertions, 381 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index ebfaa2f07b..ca5eb5c755 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -47,6 +47,9 @@
#if FEATURE_ARB_sync
#include "main/syncobj.h"
#endif
+#if FEATURE_EXT_transform_feedback
+#include "main/transformfeedback.h"
+#endif
#include "shader/program.h"
#include "shader/shader_api.h"
@@ -178,14 +181,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->TexParameter = NULL;
driver->Viewport = NULL;
- /* state queries */
- driver->GetBooleanv = NULL;
- driver->GetDoublev = NULL;
- driver->GetFloatv = NULL;
- driver->GetIntegerv = NULL;
- driver->GetInteger64v = NULL;
- driver->GetPointerv = NULL;
-
/* buffer objects */
_mesa_init_buffer_object_functions(driver);
@@ -213,6 +208,10 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->DeleteArrayObject = _mesa_delete_array_object;
driver->BindArrayObject = NULL;
+#if FEATURE_EXT_transform_feedback
+ _mesa_init_transform_feedback_functions(driver);
+#endif
+
/* T&L stuff */
driver->NeedValidate = GL_FALSE;
driver->ValidateTnlModule = NULL;
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template
index 4b9a0c1786..8cb25439e4 100644
--- a/src/mesa/drivers/dri/Makefile.template
+++ b/src/mesa/drivers/dri/Makefile.template
@@ -26,6 +26,7 @@ SHARED_INCLUDES = \
-I$(TOP)/src/mesa/drivers/dri/common \
-Iserver \
-I$(TOP)/include \
+ -I$(TOP)/src/mapi \
-I$(TOP)/src/mesa \
-I$(TOP)/src/egl/main \
-I$(TOP)/src/egl/drivers/dri \
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index 360c524754..c3d1f2c454 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -736,6 +736,8 @@ setupLoaderExtensions(__DRIscreen *psp,
psp->dri2.loader = (__DRIdri2LoaderExtension *) extensions[i];
if (strcmp(extensions[i]->name, __DRI_IMAGE_LOOKUP) == 0)
psp->dri2.image = (__DRIimageLookupExtension *) extensions[i];
+ if (strcmp(extensions[i]->name, __DRI_USE_INVALIDATE) == 0)
+ psp->dri2.useInvalidate = (__DRIuseInvalidateExtension *) extensions[i];
}
}
diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h
index ab6c6e57af..e4c590b132 100644
--- a/src/mesa/drivers/dri/common/dri_util.h
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -527,6 +527,7 @@ struct __DRIscreenRec {
int enabled;
__DRIdri2LoaderExtension *loader;
__DRIimageLookupExtension *image;
+ __DRIuseInvalidateExtension *useInvalidate;
} dri2;
/* The lock actually in use, old sarea or DRI2 */
diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c
index 3b9b3ae329..38e524e183 100644
--- a/src/mesa/drivers/dri/i915/i830_state.c
+++ b/src/mesa/drivers/dri/i915/i830_state.c
@@ -453,8 +453,6 @@ i830Viewport(GLcontext * ctx,
GLint x, GLint y, GLsizei width, GLsizei height)
{
intelCalcViewport(ctx);
-
- intel_viewport(ctx, x, y, width, height);
}
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
index 91b228d52b..26d387f383 100644
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -394,8 +394,6 @@ i915Viewport(GLcontext * ctx,
GLint x, GLint y, GLsizei width, GLsizei height)
{
intelCalcViewport(ctx);
-
- intel_viewport(ctx, x, y, width, height);
}
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index 9449a158dc..7aecf68e4a 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -1208,7 +1208,7 @@ intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode)
if (mode) {
intel->Fallback |= bit;
if (oldfallback == 0) {
- intelFlush(ctx);
+ intel_flush(ctx);
if (INTEL_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "ENTER FALLBACK %x: %s\n",
bit, getFallbackString(bit));
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 029a16500b..49ef859e45 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -42,7 +42,6 @@
#include "brw_state.h"
#include "brw_clip.h"
-
#define FRONT_UNFILLED_BIT 0x1
#define BACK_UNFILLED_BIT 0x2
@@ -127,6 +126,14 @@ static void compile_clip_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_CLIP) {
+ printf("clip:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->clip.prog_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index d71bac7f61..68222c6c27 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -114,8 +114,6 @@ struct brw_clip_compile {
GLboolean need_direction;
- GLuint last_mrf;
-
GLuint header_position_offset;
GLuint offset[VERT_ATTRIB_MAX];
};
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
index b27fe654ca..916a99ea00 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -177,7 +177,7 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
{
struct brw_compile *p = &c->func;
- struct brw_instruction *is_poly;
+ struct brw_instruction *is_poly, *is_trifan;
struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
@@ -195,8 +195,22 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
is_poly = brw_ELSE(p, is_poly);
{
if (c->key.pv_first) {
- brw_clip_copy_colors(c, 1, 0);
- brw_clip_copy_colors(c, 2, 0);
+ brw_CMP(p,
+ vec1(brw_null_reg()),
+ BRW_CONDITIONAL_EQ,
+ tmp0,
+ brw_imm_ud(_3DPRIM_TRIFAN));
+ is_trifan = brw_IF(p, BRW_EXECUTE_1);
+ {
+ brw_clip_copy_colors(c, 0, 1);
+ brw_clip_copy_colors(c, 2, 1);
+ }
+ is_trifan = brw_ELSE(p, is_trifan);
+ {
+ brw_clip_copy_colors(c, 1, 0);
+ brw_clip_copy_colors(c, 2, 0);
+ }
+ brw_ENDIF(p, is_trifan);
}
else {
brw_clip_copy_colors(c, 0, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 34a966a47a..2148bc8244 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -211,27 +211,14 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
GLuint header)
{
struct brw_compile *p = &c->func;
- GLuint start = c->last_mrf;
brw_clip_ff_sync(c);
assert(!(allocate && eot));
-
- /* Cycle through mrf regs - probably futile as we have to wait for
- * the allocation response anyway. Also, the order this function
- * is invoked doesn't correspond to the order the instructions will
- * be executed, so it won't have any effect in many cases.
- */
-#if 0
- if (start + c->nr_regs + 1 >= MAX_MRF)
- start = 0;
- c->last_mrf = start + c->nr_regs + 1;
-#endif
-
/* Copy the vertex from vertn into m1..mN+1:
*/
- brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+ brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
/* Overwrite PrimType and PrimStart in the message header, for
* each vertex in turn:
@@ -247,7 +234,7 @@ void brw_clip_emit_vue(struct brw_clip_compile *c,
*/
brw_urb_WRITE(p,
allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
- start,
+ 0,
c->reg.R0,
allocate,
1, /* used */
@@ -370,18 +357,13 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
{
brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1, /* allocate */
+ 1, /* response length */
+ 0 /* eot */);
}
brw_ENDIF(p, need_ff_sync);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 523a11aea3..dc4bd5802d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -64,8 +64,6 @@ static void brwInitDriverFunctions( struct dd_function_table *functions )
brwInitFragProgFuncs( functions );
brwInitProgFuncs( functions );
brw_init_queryobj_functions(functions);
-
- functions->Viewport = intel_viewport;
}
GLboolean brwCreateContext( int api,
@@ -194,8 +192,6 @@ GLboolean brwCreateContext( int api,
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
- make_empty_list(&brw->query.active_head);
-
brw_draw_init( brw );
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 1f09651126..a97fcb0f4d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -418,18 +418,12 @@ struct brw_vertex_info {
struct brw_query_object {
struct gl_query_object Base;
- /** Doubly linked list of active query objects in the context. */
- struct brw_query_object *prev, *next;
-
/** Last query BO associated with this query. */
dri_bo *bo;
/** First index in bo with query data for this object. */
int first_index;
/** Last index in bo with query data for this object. */
int last_index;
-
- /* Total count of pixels from previous BOs */
- unsigned int count;
};
@@ -664,7 +658,7 @@ struct brw_context
} cc;
struct {
- struct brw_query_object active_head;
+ struct brw_query_object *obj;
dri_bo *bo;
int index;
GLboolean active;
@@ -726,7 +720,7 @@ void brw_upload_urb_fence(struct brw_context *brw);
void brw_upload_cs_urb_state(struct brw_context *brw);
/* brw_disasm.c */
-int brw_disasm (FILE *file, struct brw_instruction *inst);
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
/*======================================================================
* Inline conversion functions. These are better-typed than the
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f26a13fc3c..2d3556b805 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -783,7 +783,7 @@
#define CMD_BINDING_TABLE_PTRS 0x7801
# define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8)
# define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9)
-# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 10)
+# define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12)
#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */
# define PS_SAMPLER_STATE_CHANGE (1 << 12)
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index db3fc50a63..ff12daf497 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -323,6 +323,11 @@ char *math_precision[2] = {
[1] = "partial_precision"
};
+char *urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
char *urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
@@ -774,7 +779,7 @@ static int src1 (FILE *file, struct brw_instruction *inst)
}
}
-int brw_disasm (FILE *file, struct brw_instruction *inst)
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -829,12 +834,20 @@ int brw_disasm (FILE *file, struct brw_instruction *inst)
}
if (inst->header.opcode == BRW_OPCODE_SEND) {
+ int target;
+
+ if (gen >= 5)
+ target = inst->bits2.send_gen5.sfid;
+ else
+ target = inst->bits3.generic.msg_target;
+
newline (file);
pad (file, 16);
space = 0;
err |= control (file, "target function", target_function,
- inst->bits3.generic.msg_target, &space);
- switch (inst->bits3.generic.msg_target) {
+ target, &space);
+
+ switch (target) {
case BRW_MESSAGE_TARGET_MATH:
err |= control (file, "math function", math_function,
inst->bits3.math.function, &space);
@@ -864,8 +877,17 @@ int brw_disasm (FILE *file, struct brw_instruction *inst)
inst->bits3.dp_write.send_commit_msg);
break;
case BRW_MESSAGE_TARGET_URB:
- format (file, " %d", inst->bits3.urb.offset);
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
+
space = 1;
+ if (gen >= 5) {
+ err |= control (file, "urb opcode", urb_opcode,
+ inst->bits3.urb_gen5.opcode, &space);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index e348d4686b..fe633d3e25 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -77,32 +77,41 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
-static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+static GLuint brw_set_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim)
{
GLcontext *ctx = &brw->intel.ctx;
+ GLenum mode = prim->mode;
if (INTEL_DEBUG & DEBUG_PRIMS)
- printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
-
+ printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+
/* Slight optimization to avoid the GS program when not needed:
*/
- if (prim == GL_QUAD_STRIP &&
+ if (mode == GL_QUAD_STRIP &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL)
- prim = GL_TRIANGLE_STRIP;
+ mode = GL_TRIANGLE_STRIP;
+
+ if (prim->mode == GL_QUADS && prim->count == 4 &&
+ ctx->Light.ShadeModel != GL_FLAT &&
+ ctx->Polygon.FrontMode == GL_FILL &&
+ ctx->Polygon.BackMode == GL_FILL) {
+ mode = GL_TRIANGLE_FAN;
+ }
- if (prim != brw->primitive) {
- brw->primitive = prim;
+ if (mode != brw->primitive) {
+ brw->primitive = mode;
brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
- if (reduced_prim[prim] != brw->intel.reduced_primitive) {
- brw->intel.reduced_primitive = reduced_prim[prim];
+ if (reduced_prim[mode] != brw->intel.reduced_primitive) {
+ brw->intel.reduced_primitive = reduced_prim[mode];
brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
}
}
- return prim_to_hw_prim[prim];
+ return prim_to_hw_prim[mode];
}
@@ -351,7 +360,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
*/
intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
- hw_prim = brw_set_prim(brw, prim[i].mode);
+ hw_prim = brw_set_prim(brw, &prim[i]);
if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
first_time = GL_FALSE;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 8247faa36d..9cbff24863 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -59,7 +59,7 @@ static GLuint half_float_types[5] = {
0,
BRW_SURFACEFORMAT_R16_FLOAT,
BRW_SURFACEFORMAT_R16G16_FLOAT,
- 0, /* can't seem to render this one */
+ BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
};
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 4f55158e8f..3a32ad26c1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -822,13 +822,8 @@ void brw_ff_sync(struct brw_compile *p,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
GLuint response_length,
- GLboolean eot,
- GLboolean writes_complete,
- GLuint offset,
- GLuint swizzle);
+ GLboolean eot);
void brw_fb_WRITE(struct brw_compile *p,
struct brw_reg dest,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 785d382a00..175899b026 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -280,28 +280,23 @@ static void brw_set_math_message( struct brw_context *brw,
}
-static void brw_set_ff_sync_message( struct brw_context *brw,
- struct brw_instruction *insn,
- GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
- GLuint response_length,
- GLboolean end_of_thread,
- GLboolean complete,
- GLuint offset,
- GLuint swizzle_control )
+static void brw_set_ff_sync_message(struct brw_context *brw,
+ struct brw_instruction *insn,
+ GLboolean allocate,
+ GLuint response_length,
+ GLboolean end_of_thread)
{
brw_set_src1(insn, brw_imm_d(0));
- insn->bits3.urb_gen5.opcode = 1;
- insn->bits3.urb_gen5.offset = offset;
- insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+ insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+ insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.allocate = allocate;
- insn->bits3.urb_gen5.used = used;
- insn->bits3.urb_gen5.complete = complete;
+ insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+ insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.header_present = 1;
- insn->bits3.urb_gen5.response_length = response_length;
- insn->bits3.urb_gen5.msg_length = msg_length;
+ insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
+ insn->bits3.urb_gen5.msg_length = 1;
insn->bits3.urb_gen5.end_of_thread = end_of_thread;
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
insn->bits2.send_gen5.end_of_thread = end_of_thread;
@@ -1451,18 +1446,11 @@ void brw_ff_sync(struct brw_compile *p,
GLuint msg_reg_nr,
struct brw_reg src0,
GLboolean allocate,
- GLboolean used,
- GLuint msg_length,
GLuint response_length,
- GLboolean eot,
- GLboolean writes_complete,
- GLuint offset,
- GLuint swizzle)
+ GLboolean eot)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
- assert(msg_length < 16);
-
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_src1(insn, brw_imm_d(0));
@@ -1470,13 +1458,8 @@ void brw_ff_sync(struct brw_compile *p,
insn->header.destreg__conditionalmod = msg_reg_nr;
brw_set_ff_sync_message(p->brw,
- insn,
- allocate,
- used,
- msg_length,
- response_length,
- eot,
- writes_complete,
- offset,
- swizzle);
+ insn,
+ allocate,
+ response_length,
+ eot);
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 4b13494ecf..94d93f3aa6 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -122,6 +122,16 @@ static void compile_gs_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_GS) {
+ int i;
+
+ printf("gs:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->gs.prog_bo);
@@ -163,6 +173,12 @@ static void populate_key( struct brw_context *brw,
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+ if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+ /* Provide consistent primitive order with brw_set_prim's
+ * optimization of single quads to trifans.
+ */
+ key->pv_first = GL_TRUE;
+ }
key->need_gs_prog = (key->hint_gs_always ||
brw->primitive == GL_QUADS ||
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
index dd7b057d62..99a6f6be11 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -104,18 +104,13 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
{
struct brw_compile *p = &c->func;
brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
- brw_ff_sync(p,
- c->reg.R0,
- 0,
- c->reg.R0,
- 1,
- 1, /* used */
- 1, /* msg length */
- 1, /* response length */
- 0, /* eot */
- 1, /* write compelete */
- 0, /* urb offset */
- BRW_URB_SWIZZLE_NONE);
+ brw_ff_sync(p,
+ c->reg.R0,
+ 0,
+ c->reg.R0,
+ 1, /* allocate */
+ 1, /* response length */
+ 0 /* eot */);
}
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index 6cce7e5089..3f47a68049 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -38,7 +38,6 @@
* required for handling queries, so that we can be sure that we won't
* have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
*/
-#include "main/simple_list.h"
#include "main/imports.h"
#include "brw_context.h"
@@ -105,7 +104,7 @@ brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
query->first_index = -1;
query->last_index = -1;
- insert_at_head(&brw->query.active_head, query);
+ brw->query.obj = query;
intel->stats_wm++;
}
@@ -131,7 +130,7 @@ brw_end_query(GLcontext *ctx, struct gl_query_object *q)
brw->query.bo = NULL;
}
- remove_from_list(query);
+ brw->query.obj = NULL;
intel->stats_wm--;
}
@@ -161,7 +160,7 @@ brw_prepare_query_begin(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
/* Skip if we're not doing any queries. */
- if (is_empty_list(&brw->query.active_head))
+ if (!brw->query.obj)
return;
/* Get a new query BO if we're going to need it. */
@@ -182,10 +181,10 @@ void
brw_emit_query_begin(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
- struct brw_query_object *query;
+ struct brw_query_object *query = brw->query.obj;
/* Skip if we're not doing any queries, or we've emitted the start. */
- if (brw->query.active || is_empty_list(&brw->query.active_head))
+ if (!query || brw->query.active)
return;
BEGIN_BATCH(4);
@@ -205,16 +204,14 @@ brw_emit_query_begin(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
- foreach(query, &brw->query.active_head) {
- if (query->bo != brw->query.bo) {
- if (query->bo != NULL)
- brw_queryobj_get_results(query);
- dri_bo_reference(brw->query.bo);
- query->bo = brw->query.bo;
- query->first_index = brw->query.index;
- }
- query->last_index = brw->query.index;
+ if (query->bo != brw->query.bo) {
+ if (query->bo != NULL)
+ brw_queryobj_get_results(query);
+ dri_bo_reference(brw->query.bo);
+ query->bo = brw->query.bo;
+ query->first_index = brw->query.index;
}
+ query->last_index = brw->query.index;
brw->query.active = GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 57d1c29ade..b0dd1ff3af 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -46,6 +46,7 @@
static void compile_sf_prog( struct brw_context *brw,
struct brw_sf_prog_key *key )
{
+ struct intel_context *intel = &brw->intel;
struct brw_sf_compile c;
const GLuint *program;
GLuint program_size;
@@ -107,6 +108,14 @@ static void compile_sf_prog( struct brw_context *brw,
*/
program = brw_get_program(&c.func, &program_size);
+ if (INTEL_DEBUG & DEBUG_SF) {
+ printf("sf:\n");
+ for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+ brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+ intel->gen);
+ printf("\n");
+ }
+
/* Upload
*/
dri_bo_unreference(brw->sf.prog_bo);
@@ -154,6 +163,7 @@ static void upload_sf_prog(struct brw_context *brw)
break;
}
+ /* _NEW_POINT */
key.do_point_sprite = ctx->Point.PointSprite;
if (key.do_point_sprite) {
int i;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 44b085e214..57ffb2d89e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -48,6 +48,7 @@ static void do_vs_prog( struct brw_context *brw,
const GLuint *program;
struct brw_vs_compile c;
int aux_size;
+ int i;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -63,6 +64,17 @@ static void do_vs_prog( struct brw_context *brw,
c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
}
+ /* Put dummy slots into the VUE for the SF to put the replaced
+ * point sprite coords in. We shouldn't need these dummy slots,
+ * which take up precious URB space, but it would mean that the SF
+ * doesn't get nice aligned pairs of input coords into output
+ * coords, which would be a pain to handle.
+ */
+ for (i = 0; i < 8; i++) {
+ if (c.key.point_coord_replace & (1 << i))
+ c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
+ }
+
if (0)
_mesa_print_program(&c.vp->program.Base);
@@ -106,6 +118,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
struct brw_vs_prog_key key;
struct brw_vertex_program *vp =
(struct brw_vertex_program *)brw->vertex_program;
+ int i;
memset(&key, 0, sizeof(key));
@@ -117,6 +130,14 @@ static void brw_upload_vs_prog(struct brw_context *brw)
key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
ctx->Polygon.BackMode != GL_FILL);
+ /* _NEW_POINT */
+ if (ctx->Point.PointSprite) {
+ for (i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ key.point_coord_replace |= (1 << i);
+ }
+ }
+
/* Make an early check for the key.
*/
dri_bo_unreference(brw->vs.prog_bo);
@@ -135,7 +156,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_POLYGON,
+ .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 95e0501b1e..6493744f3e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -43,7 +43,7 @@ struct brw_vs_prog_key {
GLuint program_string_id;
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
- GLuint pad:26;
+ GLuint point_coord_replace:8;
};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dc6ab81c4a..0b44deeb63 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1882,7 +1882,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
printf("vs-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 375e795391..323cfac8fa 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1717,7 +1717,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 88b885cb94..fe3c89b721 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -2111,7 +2111,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (INTEL_DEBUG & DEBUG_WM) {
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i]);
+ brw_disasm(stderr, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 9768b0deee..ca8e344836 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -130,8 +130,7 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
struct intel_context *intel = batch->intel;
GLuint used = batch->ptr - batch->map;
- if (!intel->using_dri2_swapbuffers &&
- intel->first_post_swapbuffers_batch == NULL) {
+ if (intel->first_post_swapbuffers_batch == NULL) {
intel->first_post_swapbuffers_batch = intel->batch->buf;
drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 7d9f302dca..a590c799ad 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -353,6 +353,9 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
OUT_BATCH(clear_val);
ADVANCE_BATCH();
+ if (intel->always_flush_cache)
+ intel_batchbuffer_emit_mi_flush(intel->batch);
+
if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
else
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 103aaf2b95..c38551bf95 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -277,7 +277,7 @@ intel_bufferobj_map(GLcontext * ctx,
/* Flush any existing batchbuffer that might reference this data. */
if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
- intelFlush(ctx);
+ intel_flush(ctx);
if (intel_obj->region)
intel_bufferobj_cow(intel, intel_obj);
@@ -349,7 +349,7 @@ intel_bufferobj_map_range(GLcontext * ctx,
*/
if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
- intelFlush(ctx);
+ intel_flush(ctx);
if (intel_obj->buffer == NULL) {
obj->Pointer = NULL;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 8ee9a292a1..05d4998654 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -177,6 +177,30 @@ intelGetString(GLcontext * ctx, GLenum name)
}
}
+static void
+intel_flush_front(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ __DRIcontext *driContext = intel->driContext;
+ __DRIscreen *const screen = intel->intelScreen->driScrnPriv;
+
+ if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) {
+ if (screen->dri2.loader &&
+ (screen->dri2.loader->base.version >= 2)
+ && (screen->dri2.loader->flushFrontBuffer != NULL) &&
+ driContext->driDrawablePriv &&
+ driContext->driDrawablePriv->loaderPrivate) {
+ (*screen->dri2.loader->flushFrontBuffer)(driContext->driDrawablePriv,
+ driContext->driDrawablePriv->loaderPrivate);
+
+ /* We set the dirty bit in intel_prepare_render() if we're
+ * front buffer rendering once we get there.
+ */
+ intel->front_buffer_dirty = GL_FALSE;
+ }
+ }
+}
+
static unsigned
intel_bits_per_pixel(const struct intel_renderbuffer *rb)
{
@@ -203,8 +227,10 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
* real front buffer contents will get copied to the new fake front
* buffer.
*/
- if (intel->is_front_buffer_rendering)
- intel_flush(&intel->ctx, GL_FALSE);
+ if (intel->is_front_buffer_rendering) {
+ intel_flush(&intel->ctx);
+ intel_flush_front(&intel->ctx);
+ }
/* Set this up front, so that in case our buffers get invalidated
* while we're getting new buffers, we don't clobber the stamp and
@@ -362,7 +388,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
if (buffers[i].attachment == __DRI_BUFFER_DEPTH)
depth_region = region;
- intel_renderbuffer_set_region(rb, region);
+ intel_renderbuffer_set_region(intel, rb, region);
intel_region_release(&region);
if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
@@ -374,7 +400,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
continue;
intel_region_reference(&stencil_region, region);
- intel_renderbuffer_set_region(rb, stencil_region);
+ intel_renderbuffer_set_region(intel, rb, stencil_region);
intel_region_release(&stencil_region);
}
}
@@ -383,6 +409,10 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
driUpdateFramebufferSize(&intel->ctx, drawable);
}
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
void
intel_prepare_render(struct intel_context *intel)
{
@@ -403,16 +433,47 @@ intel_prepare_render(struct intel_context *intel)
intel_update_renderbuffers(driContext, drawable);
driContext->dri2.read_stamp = drawable->dri2.stamp;
}
+
+ /* If we're currently rendering to the front buffer, the rendering
+ * that will happen next will probably dirty the front buffer. So
+ * mark it as dirty here.
+ */
+ if (intel->is_front_buffer_rendering)
+ intel->front_buffer_dirty = GL_TRUE;
+
+ /* Wait for the swapbuffers before the one we just emitted, so we
+ * don't get too many swaps outstanding for apps that are GPU-heavy
+ * but not CPU-heavy.
+ *
+ * We're using intelDRI2Flush (called from the loader before
+ * swapbuffer) and glFlush (for front buffer rendering) as the
+ * indicator that a frame is done and then throttle when we get
+ * here as we prepare to render the next frame. At this point for
+ * round trips for swap/copy and getting new buffers are done and
+ * we'll spend less time waiting on the GPU.
+ *
+ * Unfortunately, we don't have a handle to the batch containing
+ * the swap, and getting our hands on that doesn't seem worth it,
+ * so we just us the first batch we emitted after the last swap.
+ */
+ if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
+ drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+ drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+ intel->first_post_swapbuffers_batch = NULL;
+ intel->need_throttle = GL_FALSE;
+ }
}
-void
+static void
intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
{
struct intel_context *intel = intel_context(ctx);
__DRIcontext *driContext = intel->driContext;
- if (!intel->using_dri2_swapbuffers &&
- !intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+ if (intel->saved_viewport)
+ intel->saved_viewport(ctx, x, y, w, h);
+
+ if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
dri2InvalidateDrawable(driContext->driDrawablePriv);
dri2InvalidateDrawable(driContext->driReadablePriv);
}
@@ -431,12 +492,12 @@ static const struct dri_debug_control debug_control[] = {
{ "buf", DEBUG_BUFMGR},
{ "reg", DEBUG_REGION},
{ "fbo", DEBUG_FBO},
- { "lock", DEBUG_LOCK},
+ { "gs", DEBUG_GS},
{ "sync", DEBUG_SYNC},
{ "prim", DEBUG_PRIMS },
{ "vert", DEBUG_VERTS },
{ "dri", DEBUG_DRI },
- { "dma", DEBUG_DMA },
+ { "sf", DEBUG_SF },
{ "san", DEBUG_SANITY },
{ "sleep", DEBUG_SLEEP },
{ "stats", DEBUG_STATS },
@@ -447,6 +508,7 @@ static const struct dri_debug_control debug_control[] = {
{ "glsl_force", DEBUG_GLSL_FORCE },
{ "urb", DEBUG_URB },
{ "vs", DEBUG_VS },
+ { "clip", DEBUG_CLIP },
{ NULL, 0 }
};
@@ -469,10 +531,9 @@ intelInvalidateState(GLcontext * ctx, GLuint new_state)
}
void
-intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
+intel_flush(GLcontext *ctx)
{
struct intel_context *intel = intel_context(ctx);
- __DRIcontext *driContext = intel->driContext;
if (intel->Fallback)
_swrast_flush(ctx);
@@ -482,35 +543,6 @@ intel_flush(GLcontext *ctx, GLboolean needs_mi_flush)
if (intel->batch->map != intel->batch->ptr)
intel_batchbuffer_flush(intel->batch);
-
- if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) {
- __DRIscreen *const screen = intel->intelScreen->driScrnPriv;
-
- if (screen->dri2.loader &&
- (screen->dri2.loader->base.version >= 2)
- && (screen->dri2.loader->flushFrontBuffer != NULL) &&
- driContext->driDrawablePriv &&
- driContext->driDrawablePriv->loaderPrivate) {
- (*screen->dri2.loader->flushFrontBuffer)(driContext->driDrawablePriv,
- driContext->driDrawablePriv->loaderPrivate);
-
- /* Only clear the dirty bit if front-buffer rendering is no longer
- * enabled. This is done so that the dirty bit can only be set in
- * glDrawBuffer. Otherwise the dirty bit would have to be set at
- * each of N places that do rendering. This has worse performances,
- * but it is much easier to get correct.
- */
- if (!intel->is_front_buffer_rendering) {
- intel->front_buffer_dirty = GL_FALSE;
- }
- }
- }
-}
-
-void
-intelFlush(GLcontext * ctx)
-{
- intel_flush(ctx, GL_FALSE);
}
static void
@@ -518,26 +550,9 @@ intel_glFlush(GLcontext *ctx)
{
struct intel_context *intel = intel_context(ctx);
- intel_flush(ctx, GL_TRUE);
-
- /* We're using glFlush as an indicator that a frame is done, which is
- * what DRI2 does before calling SwapBuffers (and means we should catch
- * people doing front-buffer rendering, as well)..
- *
- * Wait for the swapbuffers before the one we just emitted, so we don't
- * get too many swaps outstanding for apps that are GPU-heavy but not
- * CPU-heavy.
- *
- * Unfortunately, we don't have a handle to the batch containing the swap,
- * and getting our hands on that doesn't seem worth it, so we just us the
- * first batch we emitted after the last swap.
- */
- if (!intel->using_dri2_swapbuffers &&
- intel->first_post_swapbuffers_batch != NULL) {
- drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
- drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
- intel->first_post_swapbuffers_batch = NULL;
- }
+ intel_flush(ctx);
+ intel_flush_front(ctx);
+ intel->need_throttle = GL_TRUE;
}
void
@@ -546,7 +561,8 @@ intelFinish(GLcontext * ctx)
struct gl_framebuffer *fb = ctx->DrawBuffer;
int i;
- intelFlush(ctx);
+ intel_flush(ctx);
+ intel_flush_front(ctx);
for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
struct intel_renderbuffer *irb;
@@ -602,6 +618,12 @@ intelInitContext(struct intel_context *intel,
if (intelScreen->bufmgr == NULL)
return GL_FALSE;
+ /* Can't rely on invalidate events, fall back to glViewport hack */
+ if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
+ intel->saved_viewport = functions->Viewport;
+ functions->Viewport = intel_viewport;
+ }
+
if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
functions, (void *) intel)) {
printf("%s: failed to init mesa context\n", __FUNCTION__);
@@ -886,6 +908,12 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
_mesa_make_current(&intel->ctx, fb, readFb);
+
+ /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
+ * is NULL at that point. We can't call _mesa_makecurrent()
+ * first, since we need the buffer size for the initial
+ * viewport. So just call intel_draw_buffer() again here. */
+ intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
}
else {
_mesa_make_current(NULL, NULL, NULL);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 02bb4d0d64..04d5fc92a2 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -150,8 +150,8 @@ struct intel_context
struct intel_batchbuffer *batch;
drm_intel_bo *first_post_swapbuffers_batch;
+ GLboolean need_throttle;
GLboolean no_batch_wrap;
- GLboolean using_dri2_swapbuffers;
struct
{
@@ -243,6 +243,8 @@ struct intel_context
__DRIcontext *driContext;
struct intel_screen *intelScreen;
+ void (*saved_viewport)(GLcontext * ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height);
/**
* Configuration cache
@@ -324,12 +326,12 @@ extern int INTEL_DEBUG;
#define DEBUG_BUFMGR 0x200
#define DEBUG_REGION 0x400
#define DEBUG_FBO 0x800
-#define DEBUG_LOCK 0x1000
+#define DEBUG_GS 0x1000
#define DEBUG_SYNC 0x2000
#define DEBUG_PRIMS 0x4000
#define DEBUG_VERTS 0x8000
#define DEBUG_DRI 0x10000
-#define DEBUG_DMA 0x20000
+#define DEBUG_SF 0x20000
#define DEBUG_SANITY 0x40000
#define DEBUG_SLEEP 0x80000
#define DEBUG_STATS 0x100000
@@ -339,6 +341,7 @@ extern int INTEL_DEBUG;
#define DEBUG_URB 0x1000000
#define DEBUG_VS 0x2000000
#define DEBUG_GLSL_FORCE 0x4000000
+#define DEBUG_CLIP 0x8000000
#define DBG(...) do { \
if (INTEL_DEBUG & FILE_DEBUG_FLAG) \
@@ -371,8 +374,7 @@ extern GLboolean intelInitContext(struct intel_context *intel,
struct dd_function_table *functions);
extern void intelFinish(GLcontext * ctx);
-extern void intelFlush(GLcontext * ctx);
-extern void intel_flush(GLcontext * ctx, GLboolean needs_mi_flush);
+extern void intel_flush(GLcontext * ctx);
extern void intelInitDriverFunctions(struct dd_function_table *functions);
@@ -447,9 +449,6 @@ extern int intel_translate_stencil_op(GLenum op);
extern int intel_translate_blend_factor(GLenum factor);
extern int intel_translate_logic_op(GLenum opcode);
-void intel_viewport(GLcontext * ctx, GLint x, GLint y,
- GLsizei width, GLsizei height);
-
void intel_update_renderbuffers(__DRIcontext *context,
__DRIdrawable *drawable);
void intel_prepare_render(struct intel_context *intel);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 8278d12bb9..217be7ef6c 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -42,7 +42,9 @@
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
-
+#ifndef I915
+#include "brw_state.h"
+#endif
#define FILE_DEBUG_FLAG DEBUG_FBO
@@ -166,7 +168,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
cpp = _mesa_get_format_bytes(rb->Format);
- intelFlush(ctx);
+ intel_flush(ctx);
/* free old region */
if (irb->region) {
@@ -280,7 +282,8 @@ intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
void
-intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
+intel_renderbuffer_set_region(struct intel_context *intel,
+ struct intel_renderbuffer *rb,
struct intel_region *region)
{
struct intel_region *old;
@@ -288,6 +291,12 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
old = rb->region;
rb->region = NULL;
intel_region_reference(&rb->region, region);
+#ifndef I915
+ if (old) {
+ brw_state_cache_bo_delete(&brw_context(&intel->ctx)->surface_cache,
+ old->buffer);
+ }
+#endif
intel_region_release(&old);
}
@@ -411,7 +420,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx,
{
DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0);
- intelFlush(ctx);
+ intel_flush(ctx);
_mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
intel_draw_buffer(ctx, fb);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 72413f7369..028f657d12 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -82,7 +82,8 @@ intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex)
extern void
-intel_renderbuffer_set_region(struct intel_renderbuffer *irb,
+intel_renderbuffer_set_region(struct intel_context *intel,
+ struct intel_renderbuffer *irb,
struct intel_region *region);
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index ef1966ea7e..71ef7a8e39 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -116,17 +116,16 @@ intel_miptree_create(struct intel_context *intel,
GLboolean expect_accelerated_upload)
{
struct intel_mipmap_tree *mt;
- uint32_t tiling;
+ uint32_t tiling = I915_TILING_NONE;
if (intel->use_texture_tiling && compress_byte == 0) {
if (intel->gen >= 4 &&
(base_format == GL_DEPTH_COMPONENT ||
base_format == GL_DEPTH_STENCIL_EXT))
tiling = I915_TILING_Y;
- else
+ else if (width0 >= 64)
tiling = I915_TILING_X;
- } else
- tiling = I915_TILING_NONE;
+ }
mt = intel_miptree_create_internal(intel, target, internal_format,
first_level, last_level, width0,
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index 56faf076c7..2008a4c2be 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -142,7 +142,7 @@ do_blit_copypixels(GLcontext * ctx,
if (!src || !dst)
return GL_FALSE;
- intelFlush(&intel->ctx);
+ intel_flush(&intel->ctx);
/* Clip to destination buffer. */
orig_dstx = dstx;
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index bd1dd13fb7..a40b232fff 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -167,7 +167,7 @@ intel_stencil_drawpixels(GLcontext * ctx,
irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888);
irb->Base.Width = depth_irb->Base.Width;
irb->Base.Height = depth_irb->Base.Height;
- intel_renderbuffer_set_region(irb, depth_irb->region);
+ intel_renderbuffer_set_region(intel, irb, depth_irb->region);
/* Create a name for our renderbuffer, which lets us use other mesa
* rb functions for convenience.
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c
index 2ac3da7f42..21d2a7a93e 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -170,11 +170,19 @@ intelReadPixels(GLcontext * ctx,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
+ struct intel_context *intel = intel_context(ctx);
+ GLboolean dirty;
+
if (INTEL_DEBUG & DEBUG_PIXEL)
fprintf(stderr, "%s\n", __FUNCTION__);
- intelFlush(ctx);
- intel_prepare_render(intel_context(ctx));
+ intel_flush(ctx);
+
+ /* glReadPixels() wont dirty the front buffer, so reset the dirty
+ * flag after calling intel_prepare_render(). */
+ dirty = intel->front_buffer_dirty;
+ intel_prepare_render(intel);
+ intel->front_buffer_dirty = dirty;
if (do_blit_readpixels
(ctx, x, y, width, height, format, type, pack, pixels))
@@ -193,4 +201,7 @@ intelReadPixels(GLcontext * ctx,
_mesa_update_state(ctx);
_swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+
+ /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
+ intel->front_buffer_dirty = dirty;
}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 1172de90b1..8cdeaf608c 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -111,7 +111,7 @@ debug_backtrace(void)
GLubyte *
intel_region_map(struct intel_context *intel, struct intel_region *region)
{
- intelFlush(&intel->ctx);
+ intel_flush(&intel->ctx);
_DBG("%s %p\n", __FUNCTION__, region);
if (!region->map_refcount++) {
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 3aed253e24..15a465c640 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -110,23 +110,16 @@ intelDRI2Flush(__DRIdrawable *drawable)
if (intel->gen < 4)
INTEL_FIREVERTICES(intel);
+ intel->need_throttle = GL_TRUE;
+
if (intel->batch->map != intel->batch->ptr)
intel_batchbuffer_flush(intel->batch);
}
-static void
-intelDRI2Invalidate(__DRIdrawable *drawable)
-{
- struct intel_context *intel = drawable->driContextPriv->driverPrivate;
-
- intel->using_dri2_swapbuffers = GL_TRUE;
- dri2InvalidateDrawable(drawable);
-}
-
static const struct __DRI2flushExtensionRec intelFlushExtension = {
{ __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
intelDRI2Flush,
- intelDRI2Invalidate,
+ dri2InvalidateDrawable,
};
static __DRIimage *
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index c1e15d1b0f..059f76f289 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -244,7 +244,7 @@ intelSpanRenderStart(GLcontext * ctx)
struct intel_context *intel = intel_context(ctx);
GLuint i;
- intelFlush(&intel->ctx);
+ intel_flush(&intel->ctx);
intel_prepare_render(intel);
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c
index d67f0cb4a6..c2d86432ff 100644
--- a/src/mesa/drivers/dri/intel/intel_syncobj.c
+++ b/src/mesa/drivers/dri/intel/intel_syncobj.c
@@ -77,7 +77,7 @@ intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
sync->bo = intel->batch->buf;
drm_intel_bo_reference(sync->bo);
- intelFlush(ctx);
+ intel_flush(ctx);
}
/* We ignore the user-supplied timeout. This is weaselly -- we're allowed to
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 62e1e78f59..549a4acc7d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -108,7 +108,7 @@ do_copy_texsubimage(struct intel_context *intel,
return GL_FALSE;
}
- /* intelFlush(ctx); */
+ /* intel_flush(ctx); */
intel_prepare_render(intel);
{
drm_intel_bo *dst_bo = intel_region_buffer(intel,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 7be5231eae..610a169beb 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -1,7 +1,7 @@
#include "intel_context.h"
#include "intel_tex.h"
#include "main/enums.h"
-
+#include "main/formats.h"
/**
* Choose hardware texture format given the user's glTexImage parameters.
@@ -208,22 +208,11 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
int intel_compressed_num_bytes(GLuint mesaFormat)
{
- int bytes = 0;
- switch(mesaFormat) {
-
- case MESA_FORMAT_RGB_FXT1:
- case MESA_FORMAT_RGBA_FXT1:
- case MESA_FORMAT_RGB_DXT1:
- case MESA_FORMAT_RGBA_DXT1:
- bytes = 2;
- break;
-
- case MESA_FORMAT_RGBA_DXT3:
- case MESA_FORMAT_RGBA_DXT5:
- bytes = 4;
- default:
- break;
- }
-
- return bytes;
+ GLuint bw, bh;
+ GLuint block_size;
+
+ block_size = _mesa_get_format_bytes(mesaFormat);
+ _mesa_get_format_block_size(mesaFormat, &bw, &bh);
+
+ return block_size / bh;
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index 9db96acdc0..06bf262704 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -239,8 +239,8 @@ try_pbo_upload(struct intel_context *intel,
dst_stride = intelImage->mt->region->pitch;
if (drm_intel_bo_references(intel->batch->buf, dst_buffer))
- intelFlush(&intel->ctx);
- intel_prepare_render(intel);
+ intel_flush(&intel->ctx);
+
{
dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
@@ -473,14 +473,12 @@ intelTexImage(GLcontext * ctx,
pixels, unpack, "glTexImage");
}
- intel_prepare_render(intel);
-
if (intelImage->mt) {
if (pixels != NULL) {
/* Flush any queued rendering with the texture before mapping. */
if (drm_intel_bo_references(intel->batch->buf,
intelImage->mt->region->buffer)) {
- intelFlush(ctx);
+ intel_flush(ctx);
}
texImage->Data = intel_miptree_image_map(intel,
intelImage->mt,
@@ -638,7 +636,7 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
* make sure rendering is complete.
* We could probably predicate this on texObj->_RenderToTexture
*/
- intelFlush(ctx);
+ intel_flush(ctx);
/* Map */
if (intelImage->mt) {
@@ -728,7 +726,8 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
if (!intelObj)
return;
- if (dPriv->lastStamp != dPriv->dri2.stamp)
+ if (dPriv->lastStamp != dPriv->dri2.stamp ||
+ !pDRICtx->driScreenPriv->dri2.useInvalidate)
intel_update_renderbuffers(pDRICtx, dPriv);
rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index c35d2e8757..4f5c26acf2 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -59,7 +59,7 @@ intelTexSubimage(GLcontext * ctx,
_mesa_lookup_enum_by_nr(target),
level, xoffset, yoffset, width, height);
- intelFlush(ctx);
+ intel_flush(ctx);
if (compressed)
pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize,
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index e432afc3d4..34d22b4559 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
+ radeon_optimize.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index cfa48a59e3..5d5de2f1b2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -56,7 +56,8 @@ static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
- {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
@@ -221,6 +222,7 @@ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
default: return R300_ALU_ARGA_ONE;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 25bf373b6f..3e88ccbc46 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after deadcode");
+ rc_optimize(&c->Base);
+
+ debug_program_log(c, "after dataflow optimize");
+
rc_dataflow_swizzles(&c->Base);
if (c->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
index 853b2becd1..0eab18c344 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -146,7 +146,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
unsigned comp;
for(comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
- *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return index;
}
}
@@ -159,7 +159,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
if (free_index >= 0) {
unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
- *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
return free_index;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 27274f0712..1979e7e4e4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -132,7 +132,7 @@ struct r300_fragment_program_external_state {
* 2 - GL_ALPHA
* depending on the depth texture mode.
*/
- unsigned depth_texture_mode : 2;
+ unsigned depth_texture_swizzle:12;
/**
* If the sampler is used as a shadow sampler,
@@ -144,6 +144,12 @@ struct r300_fragment_program_external_state {
unsigned texture_compare_func : 3;
/**
+ * No matter what the sampler type is,
+ * this field turns it into a shadow sampler.
+ */
+ unsigned compare_mode_enabled : 1;
+
+ /**
* If the sampler needs to fake NPOT, this field is set.
*/
unsigned fake_npot : 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
index 16e2f3a218..0e6c62541f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -30,7 +30,7 @@
#include "radeon_program.h"
-static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
@@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb,
refmask &= RC_MASK_XYZW;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(refmask, chan)) {
- cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
- }
- }
+ if (refmask)
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
if (refmask && inst->SrcReg[src].RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
-static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
@@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, v
}
for(unsigned int src = 0; src < 3; ++src) {
- if (inst->RGB.Src[src].Used) {
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(refmasks[src], chan))
- cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
- }
- }
+ if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+ refmasks[src] & RC_MASK_XYZ);
- if (inst->Alpha.Src[src].Used) {
- if (GET_BIT(refmasks[src], 3))
- cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
- }
+ if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
/**
- * Calls a callback function for all sourced register channels.
+ * Calls a callback function for all register reads.
*
- * This is conservative, i.e. channels may be called multiple times,
- * and the writemask of the instruction is not taken into account.
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
*/
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
reads_normal(inst, cb, userdata);
@@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void *
-static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- if (opcode->HasDstReg) {
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(inst->DstReg.WriteMask, chan))
- cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
- }
- }
+ if (opcode->HasDstReg && inst->DstReg.WriteMask)
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
-static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(inst->RGB.WriteMask, chan))
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
- }
+ if (inst->RGB.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
if (inst->Alpha.WriteMask)
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
/**
- * Calls a callback function for all written register channels.
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
@@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *
}
+struct mask_to_chan_data {
+ void * UserData;
+ rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct mask_to_chan_data * d = data;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(mask, chan))
+ d->Fn(d->UserData, inst, file, index, chan);
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
index 62cda20eea..60a6e192a9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -39,10 +39,15 @@ struct rc_swizzle_caps;
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
-typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
@@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
void rc_dataflow_swizzles(struct radeon_compiler * c);
/*@}*/
+void rc_optimize(struct radeon_compiler * c);
+
#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
index d889612f4f..863654cf68 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s,
sap.Proxies = proxies;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
- rc_for_all_writes(inst, scan_write, &sap);
+ rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000000..21d7210888
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+ if (swz < 4)
+ combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+ }
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+struct peephole_state {
+ struct radeon_compiler * C;
+ struct rc_instruction * Mov;
+ unsigned int Conflict:1;
+
+ /** Whether Mov's source has been clobbered */
+ unsigned int SourceClobbered:1;
+
+ /** Which components of Mov's destination register are still from that Mov? */
+ unsigned int MovMask:4;
+
+ /** Which components of Mov's destination register are clearly *not* from that Mov */
+ unsigned int DefinedMask:4;
+
+ /** Which components of Mov's source register are sourced */
+ unsigned int SourcedMask:4;
+
+ /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+ int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ return;
+
+ /* These instructions cannot read from the constants file.
+ * see radeonTransformTEX()
+ */
+ if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ (inst->U.I.Opcode == RC_OPCODE_TEX ||
+ inst->U.I.Opcode == RC_OPCODE_TXB ||
+ inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_KIL)){
+ s->Conflict = 1;
+ return;
+ }
+ if ((mask & s->MovMask) == mask) {
+ if (s->SourceClobbered) {
+ s->Conflict = 1;
+ }
+ } else if ((mask & s->DefinedMask) == mask) {
+ /* read from something entirely written by other instruction: this is okay */
+ } else {
+ /* read from component combination that is not well-defined without
+ * the MOV: cannot remove it */
+ s->Conflict = 1;
+ }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (s->BranchDepth < 0)
+ return;
+
+ if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+ s->MovMask &= ~mask;
+ if (s->BranchDepth == 0)
+ s->DefinedMask |= mask;
+ else
+ s->DefinedMask &= ~mask;
+ }
+ if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (mask & s->SourcedMask)
+ s->SourceClobbered = 1;
+ } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+ s->SourceClobbered = 1;
+ }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct peephole_state s;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+ return;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.Mov = inst_mov;
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+ s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ /* 1st pass: Check whether all subsequent readers can be changed */
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ if (s.Conflict)
+ return;
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0) {
+ s.DefinedMask &= ~s.MovMask;
+ s.MovMask = 0;
+ }
+ }
+ }
+ }
+
+ if (s.Conflict)
+ return;
+
+ /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.BranchDepth = 0;
+
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+ unsigned int refmask = 0;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ refmask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ if ((refmask & s.MovMask) == refmask)
+ inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+ s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+ }
+ }
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0)
+ break; /* no more readers after this point */
+ }
+ }
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+ rc_swizzle * pswz, unsigned int * pnegate)
+{
+ int have_used = 0;
+
+ if (src.File != RC_FILE_NONE) {
+ *pswz = 0;
+ return 0;
+ }
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz < 4) {
+ *pswz = 0;
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+
+ if (!have_used) {
+ *pswz = swz;
+ *pnegate = GET_BIT(src.Negate, chan);
+ have_used = 1;
+ } else {
+ if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+ *pswz = 0;
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MUL;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ return;
+ }
+ }
+}
+
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+ inst->U.I.SrcReg[src].RelAddr ||
+ inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+ continue;
+
+ struct rc_constant * constant =
+ &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+ if (constant->Type != RC_CONSTANT_IMMEDIATE)
+ continue;
+
+ struct rc_src_register newsrc = inst->U.I.SrcReg[src];
+ int have_real_reference = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ if (swz >= 4)
+ continue;
+
+ unsigned int newswz;
+ float imm = constant->u.Immediate[swz];
+ float baseimm = imm;
+ if (imm < 0.0)
+ baseimm = -baseimm;
+
+ if (baseimm == 0.0) {
+ newswz = RC_SWIZZLE_ZERO;
+ } else if (baseimm == 1.0) {
+ newswz = RC_SWIZZLE_ONE;
+ } else if (baseimm == 0.5) {
+ newswz = RC_SWIZZLE_HALF;
+ } else {
+ have_real_reference = 1;
+ continue;
+ }
+
+ SET_SWZ(newsrc.Swizzle, chan, newswz);
+ if (imm < 0.0 && !newsrc.Abs)
+ newsrc.Negate ^= 1 << chan;
+ }
+
+ if (!have_real_reference) {
+ newsrc.File = RC_FILE_NONE;
+ newsrc.Index = 0;
+ }
+
+ /* don't make the swizzle worse */
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+ c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ continue;
+
+ inst->U.I.SrcReg[src] = newsrc;
+ }
+
+ /* Simplify instructions based on constants */
+ if (inst->U.I.Opcode == RC_OPCODE_MAD)
+ constant_folding_mad(inst);
+
+ /* note: MAD can simplify to MUL or ADD */
+ if (inst->U.I.Opcode == RC_OPCODE_MUL)
+ constant_folding_mul(inst);
+ else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+ constant_folding_add(inst);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ constant_folding(c, cur);
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+ peephole(c, cur);
+ /* cur may no longer be part of the program */
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index fdfee86701..8a912da461 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s,
}
static void scan_callback(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int chan)
+ rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
@@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
- rc_for_all_reads(inst, scan_callback, s);
- rc_for_all_writes(inst, scan_callback, s);
+ rc_for_all_reads_mask(inst, scan_callback, s);
+ rc_for_all_writes_mask(inst, scan_callback, s);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index df67aafe02..a279549ff8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
* counter-intuitive, to account for the case where an
* instruction writes to the same register as it reads
* from. */
- rc_for_all_writes(inst, &scan_write, &s);
- rc_for_all_reads(inst, &scan_read, &s);
+ rc_for_all_writes_chan(inst, &scan_write, &s);
+ rc_for_all_reads_chan(inst, &scan_read, &s);
DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 05b874ba7c..5ba2c29408 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -556,6 +556,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ unsigned constant_swizzle;
+ int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+ 0.0000000000000000001,
+ &constant_swizzle);
+
+ /* MOV dst, src */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[0]);
+
+ /* MAX dst.z, src, 0.00...001 */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(tempreg, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -572,6 +595,7 @@ int r300_transform_vertex_alu(
case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
@@ -789,7 +813,7 @@ int radeonTransformDeriv(struct radeon_compiler* c,
if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
return 0;
- inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE);
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
index 842012def0..2ddf60b677 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -115,6 +115,7 @@ typedef enum {
#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index b4ba0b3f87..8336e58d55 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -97,7 +97,8 @@ int radeonTransformTEX(
/* ARB_shadow & EXT_shadow_funcs */
if (inst->U.I.Opcode != RC_OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+ (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
@@ -113,52 +114,75 @@ int radeonTransformTEX(
return 1;
} else {
rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
- struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
- struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
- struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
- int pass, fail;
+ struct rc_instruction * inst_rcp = NULL;
+ struct rc_instruction * inst_mad;
+ struct rc_instruction * inst_cmp;
+ unsigned tmp_texsample = rc_find_free_temporary(c);
+ unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_recip_w = 0;
+ int pass, fail, tex;
- inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
- inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
- inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ /* Save the output register. */
+ struct rc_dst_register output_reg = inst->U.I.DstReg;
- inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ /* Redirect TEX to a new temp. */
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ tmp_recip_w = rc_find_free_temporary(c);
+
+ /* Compute 1/W. */
+ inst_rcp = rc_insert_new_instruction(c, inst);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ }
+
+ /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
- inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.DstReg.Index = tmp_sum;
inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
- inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
- inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
- inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
- inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
- if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
- else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ tex = 2;
+ } else {
+ inst_mad->U.I.Opcode = RC_OPCODE_ADD;
+ tex = 1;
+ }
+ inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
+ inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
- /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
- * r < tex <=> -tex+r < 0
- * r >= tex <=> not (-tex+r < 0 */
+ /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
+ if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
+ comparefunc = RC_COMPARE_FUNC_GEQUAL;
+ } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+ comparefunc = RC_COMPARE_FUNC_LESS;
+ }
+
+ /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+ * LESS: r < tex <=> -tex+r < 0
+ * GEQUAL: r >= tex <=> not (-tex+r < 0)
+ * GREATER: r > tex <=> tex-r < 0
+ * LEQUAL: r <= tex <=> not ( tex-r < 0)
+ *
+ * This negates either r or tex: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
- inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
+ inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
else
inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
- inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
- /* DstReg has been filled out above */
- inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
- inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
-
+ /* This negates the whole expresion: */
if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
pass = 1;
fail = 2;
@@ -167,6 +191,11 @@ int radeonTransformTEX(
fail = 1;
}
+ inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg = output_reg;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 2b7c93a957..e678a42ca2 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -46,13 +46,13 @@
#include "radeon_mesa_to_rc.h"
-static GLuint build_dtm(GLuint depthmode)
+static GLuint build_dts(GLuint depthmode)
{
switch(depthmode) {
default:
- case GL_LUMINANCE: return 0;
- case GL_INTENSITY: return 1;
- case GL_ALPHA: return 2;
+ case GL_LUMINANCE: return RC_SWIZZLE_XYZZ;
+ case GL_INTENSITY: return RC_SWIZZLE_XYZW;
+ case GL_ALPHA: return RC_SWIZZLE_WWWW;
}
}
@@ -78,7 +78,7 @@ static void build_state(
if (fp->Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
- state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
+ state->unit[unit].depth_texture_swizzle = build_dts(tex->DepthMode);
state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
}
}
diff --git a/src/mesa/drivers/fbdev/Makefile b/src/mesa/drivers/fbdev/Makefile
index ee73f29a46..5120e1ac9e 100644
--- a/src/mesa/drivers/fbdev/Makefile
+++ b/src/mesa/drivers/fbdev/Makefile
@@ -11,10 +11,11 @@ OBJECTS = $(SOURCES:.c=.o)
INCLUDE_DIRS = \
-I$(TOP)/include \
+ -I$(TOP)/src/mapi \
-I$(TOP)/src/mesa \
-I$(TOP)/src/mesa/main
-CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a
+CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
.c.o:
diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile
index 7dcf9a6541..6da9f93f59 100644
--- a/src/mesa/drivers/glslcompiler/Makefile
+++ b/src/mesa/drivers/glslcompiler/Makefile
@@ -11,11 +11,12 @@ OBJECTS = \
glslcompiler.o \
../common/driverfuncs.o \
../../libmesa.a \
- ../../libglapi.a
+ $(TOP)/src/mapi/glapi/libglapi.a
INCLUDES = \
-I$(TOP)/include \
-I$(TOP)/include/GL/internal \
+ -I$(TOP)/src/mapi \
-I$(TOP)/src/mesa \
-I$(TOP)/src/mesa/main \
-I$(TOP)/src/mesa/glapi \
diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile
index 9010bbd130..ea49a89659 100644
--- a/src/mesa/drivers/osmesa/Makefile
+++ b/src/mesa/drivers/osmesa/Makefile
@@ -16,6 +16,7 @@ OBJECTS = $(SOURCES:.c=.o)
INCLUDE_DIRS = \
-I$(TOP)/include \
+ -I$(TOP)/src/mapi \
-I$(TOP)/src/mesa \
-I$(TOP)/src/mesa/main
@@ -23,7 +24,7 @@ INCLUDE_DIRS = \
ifeq ($(DRIVER_DIRS), osmesa)
CORE_MESA = \
$(TOP)/src/mesa/libmesa.a \
- $(TOP)/src/mesa/libglapi.a \
+ $(TOP)/src/mapi/glapi/libglapi.a \
$(TOP)/src/glsl/cl/libglslcl.a \
$(TOP)/src/glsl/pp/libglslpp.a
else
diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c
index e20507ae92..ead4050397 100644
--- a/src/mesa/drivers/osmesa/osmesa.c
+++ b/src/mesa/drivers/osmesa/osmesa.c
@@ -1004,7 +1004,20 @@ new_osmesa_renderbuffer(GLcontext *ctx, GLenum format, GLenum type)
rb->AllocStorage = osmesa_renderbuffer_storage;
rb->InternalFormat = GL_RGBA;
- rb->Format = MESA_FORMAT_RGBA8888;
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ rb->Format = MESA_FORMAT_RGBA8888;
+ break;
+ case GL_UNSIGNED_SHORT:
+ rb->Format = MESA_FORMAT_RGBA_16;
+ break;
+ case GL_FLOAT:
+ rb->Format = MESA_FORMAT_RGBA_FLOAT32;
+ break;
+ default:
+ assert(0 && "Unexpected type in new_osmesa_renderbuffer()");
+ rb->Format = MESA_FORMAT_RGBA8888;
+ }
rb->_BaseFormat = GL_RGBA;
rb->DataType = type;
}
@@ -1048,7 +1061,6 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
struct dd_function_table functions;
GLint rind, gind, bind, aind;
GLint redBits = 0, greenBits = 0, blueBits = 0, alphaBits =0;
- GLenum type = CHAN_TYPE;
rind = gind = bind = aind = 0;
if (format==OSMESA_RGBA) {
@@ -1167,11 +1179,9 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
return NULL;
}
- /* create front color buffer in user-provided memory (no back buffer) */
- osmesa->rb = new_osmesa_renderbuffer(&osmesa->mesa, format, type);
- _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb);
- assert(osmesa->rb->RefCount == 2);
-
+ /* Create depth/stencil/accum buffers. We'll create the color
+ * buffer later in OSMesaMakeCurrent().
+ */
_mesa_add_soft_renderbuffers(osmesa->gl_buffer,
GL_FALSE, /* color */
osmesa->gl_visual->haveDepthBuffer,
@@ -1308,11 +1318,23 @@ OSMesaMakeCurrent( OSMesaContext osmesa, void *buffer, GLenum type,
*/
_glapi_check_multithread();
+
+ /* Create a front/left color buffer which wraps the user-provided buffer.
+ * There is no back color buffer.
+ * If the user tries to use a 8, 16 or 32-bit/channel buffer that
+ * doesn't match what Mesa was compiled for (CHAN_BITS) the
+ * _mesa_add_renderbuffer() function will create a "wrapper" renderbuffer
+ * that converts rendering from CHAN_BITS to the user-requested channel
+ * size.
+ */
+ osmesa->rb = new_osmesa_renderbuffer(&osmesa->mesa, osmesa->format, type);
+ _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb);
+ assert(osmesa->rb->RefCount == 2);
+
/* Set renderbuffer fields. Set width/height = 0 to force
* osmesa_renderbuffer_storage() being called by _mesa_resize_framebuffer()
*/
osmesa->rb->Data = buffer;
- osmesa->rb->DataType = type;
osmesa->rb->Width = osmesa->rb->Height = 0;
/* Set the framebuffer's size. This causes the
diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile
index 5e427d2d5c..b5b0c1f11a 100644
--- a/src/mesa/drivers/x11/Makefile
+++ b/src/mesa/drivers/x11/Makefile
@@ -40,11 +40,12 @@ OBJECTS = $(SOURCES:.c=.o)
INCLUDE_DIRS = \
-I$(TOP)/include \
+ -I$(TOP)/src/mapi \
-I$(TOP)/src/mesa \
-I$(TOP)/src/mesa/main \
$(X11_INCLUDES)
-CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a
+CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index a1723fa37b..dac1668cfe 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -210,7 +210,7 @@ gamma_adjust( GLfloat gamma, GLint value, GLint max )
}
else {
double x = (double) value / (double) max;
- return IROUND_POS((GLfloat) max * _mesa_pow(x, 1.0F/gamma));
+ return IROUND_POS((GLfloat) max * pow(x, 1.0F/gamma));
}
}
@@ -846,19 +846,19 @@ setup_8bit_hpcr(XMesaVisual v)
g = 1.0 / v->RedGamma;
for (i=0; i<256; i++) {
- GLint red = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[0][i]/255.0, g ));
+ GLint red = IROUND_POS(255.0 * pow( hpcr_rgbTbl[0][i]/255.0, g ));
v->hpcr_rgbTbl[0][i] = CLAMP( red, 16, 239 );
}
g = 1.0 / v->GreenGamma;
for (i=0; i<256; i++) {
- GLint green = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[1][i]/255.0, g ));
+ GLint green = IROUND_POS(255.0 * pow( hpcr_rgbTbl[1][i]/255.0, g ));
v->hpcr_rgbTbl[1][i] = CLAMP( green, 16, 239 );
}
g = 1.0 / v->BlueGamma;
for (i=0; i<256; i++) {
- GLint blue = IROUND_POS(255.0 * _mesa_pow( hpcr_rgbTbl[2][i]/255.0, g ));
+ GLint blue = IROUND_POS(255.0 * pow( hpcr_rgbTbl[2][i]/255.0, g ));
v->hpcr_rgbTbl[2][i] = CLAMP( blue, 32, 223 );
}
v->undithered_pf = PF_HPCR; /* can't really disable dithering for now */