summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c38
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.c6
-rw-r--r--src/gallium/auxiliary/draw/draw_vertex.h44
-rw-r--r--src/gallium/auxiliary/gallivm/soabuiltins.c1
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c18
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h2
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_emit.c4
-rw-r--r--src/gallium/drivers/i915simple/i915_state_derived.c4
-rw-r--r--src/gallium/drivers/softpipe/sp_setup.c12
-rw-r--r--src/mesa/main/api_exec.c4
-rw-r--r--src/mesa/main/mfeatures.h2
-rw-r--r--src/mesa/main/mtypes.h3
-rw-r--r--src/mesa/main/queryobj.c2
-rw-r--r--src/mesa/main/queryobj.h2
-rw-r--r--src/mesa/main/state.c42
-rw-r--r--src/mesa/main/state.h3
-rw-r--r--src/mesa/main/texenvprogram.c122
-rw-r--r--src/mesa/shader/prog_cache.c29
-rw-r--r--src/mesa/shader/prog_cache.h2
-rw-r--r--src/mesa/shader/prog_statevars.c27
-rw-r--r--src/mesa/shader/prog_statevars.h1
-rw-r--r--src/mesa/shader/program.c41
-rw-r--r--src/mesa/state_tracker/st_atom_rasterizer.c5
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c4
-rw-r--r--src/mesa/state_tracker/st_draw.c2
-rw-r--r--src/mesa/vbo/vbo_exec_api.c44
-rw-r--r--src/mesa/vbo/vbo_exec_array.c41
-rw-r--r--src/mesa/vbo/vbo_exec_draw.c7
-rw-r--r--src/mesa/vbo/vbo_save_draw.c33
36 files changed, 455 insertions, 122 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 36751c2621..41a4cba1dd 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -272,6 +272,14 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable)
}
+void
+draw_set_force_passthrough( struct draw_context *draw, boolean enable )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->force_passthrough = enable;
+}
+
+
/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 0ab3681b64..3eeb453531 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,6 +160,9 @@ void draw_set_render( struct draw_context *draw,
void draw_set_driver_clipping( struct draw_context *draw,
boolean bypass_clipping );
+void draw_set_force_passthrough( struct draw_context *draw,
+ boolean enable );
+
/*******************************************************************************
* Draw pipeline
*/
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index c0cf4269db..9825e116c3 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim )
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) );
+ unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );
- switch (vbuf->vinfo->emit[i]) {
+ switch (vbuf->vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 40a72c9dcf..5d531146c5 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -163,12 +163,15 @@ struct draw_context
struct {
boolean bypass_clipping;
+ boolean bypass_vs;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+ boolean force_passthrough; /**< never clip or shade */
+
/* pipe state that we need: */
const struct pipe_rasterizer_state *rasterizer;
struct pipe_viewport_state viewport;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 669c11c993..87ec6ae20c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -69,26 +69,26 @@ draw_pt_arrays(struct draw_context *draw,
return TRUE;
}
-
- if (!draw->render) {
- opt |= PT_PIPELINE;
- }
-
- if (draw_need_pipeline(draw,
- draw->rasterizer,
- prim)) {
- opt |= PT_PIPELINE;
- }
-
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
- opt |= PT_CLIPTEST;
+ if (!draw->force_passthrough) {
+ if (!draw->render) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (draw_need_pipeline(draw,
+ draw->rasterizer,
+ prim)) {
+ opt |= PT_PIPELINE;
+ }
+
+ if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ opt |= PT_CLIPTEST;
+ }
+
+ if (!draw->rasterizer->bypass_vs) {
+ opt |= PT_SHADE;
+ }
}
-
- if (!draw->rasterizer->bypass_vs) {
- opt |= PT_SHADE;
- }
-
-
+
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
else if (opt == PT_SHADE && !draw->pt.no_fse)
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d4eca80588..d520b05869 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
+ unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5a4db6cfe5..3966ad48ba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
memset(&key, 0, sizeof(key));
for (i = 0; i < vinfo->num_attribs; i++) {
- const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]];
+ const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index];
unsigned emit_sz = 0;
unsigned input_format = src->src_format;
@@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
unsigned input_offset = src->src_offset;
unsigned output_format;
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
emit_sz = 4 * sizeof(float);
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index a0e08dd10a..f7e6a1a8ee 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -133,7 +133,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
for (i = 0; i < vinfo->num_attribs; i++) {
unsigned emit_sz = 0;
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_4F:
emit_sz = 4 * sizeof(float);
break;
@@ -161,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
* numbers, not to positions in the hw vertex description --
* that's handled by the output_offset field.
*/
- fse->key.element[i].out.format = vinfo->emit[i];
- fse->key.element[i].out.vs_output = vinfo->src_index[i];
+ fse->key.element[i].out.format = vinfo->attrib[i].emit;
+ fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index;
fse->key.element[i].out.offset = dst_offset;
dst_offset += emit_sz;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c
index 1446f785c5..3214213e44 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.c
+++ b/src/gallium/auxiliary/draw/draw_vertex.c
@@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo)
vinfo->size = 0;
for (i = 0; i < vinfo->num_attribs; i++) {
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_OMIT:
break;
case EMIT_4UB:
@@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
unsigned i, j;
for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
+ j = vinfo->attrib[i].src_index;
+ switch (vinfo->attrib[i].emit) {
case EMIT_OMIT:
debug_printf("EMIT_OMIT:");
break;
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index 16c65c4317..a943607d7e 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -75,12 +75,41 @@ struct vertex_info
{
uint num_attribs;
uint hwfmt[4]; /**< hardware format info for this format */
- enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS];
- enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */
- uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */
uint size; /**< total vertex size in dwords */
+
+ /* Keep this small and at the end of the struct to allow quick
+ * memcmp() comparisons.
+ */
+ struct {
+ ubyte interp_mode:4; /**< INTERP_x */
+ ubyte emit:4; /**< EMIT_x */
+ ubyte src_index; /**< map to post-xform attribs */
+ } attrib[PIPE_MAX_SHADER_INPUTS];
};
+static INLINE int
+draw_vinfo_size( const struct vertex_info *a )
+{
+ return ((const char *)&a->attrib[a->num_attribs] -
+ (const char *)a);
+}
+
+static INLINE int
+draw_vinfo_compare( const struct vertex_info *a,
+ const struct vertex_info *b )
+{
+ unsigned sizea = draw_vinfo_size( a );
+ return memcmp( a, b, sizea );
+}
+
+static INLINE void
+draw_vinfo_copy( struct vertex_info *dst,
+ const struct vertex_info *src )
+{
+ unsigned size = draw_vinfo_size( src );
+ memcpy( dst, src, size );
+}
+
/**
@@ -91,14 +120,15 @@ struct vertex_info
*/
static INLINE uint
draw_emit_vertex_attr(struct vertex_info *vinfo,
- enum attrib_emit emit, enum interp_mode interp,
+ enum attrib_emit emit,
+ enum interp_mode interp, /* only used by softpipe??? */
uint src_index)
{
const uint n = vinfo->num_attribs;
assert(n < PIPE_MAX_SHADER_INPUTS);
- vinfo->emit[n] = emit;
- vinfo->interp_mode[n] = interp;
- vinfo->src_index[n] = src_index;
+ vinfo->attrib[n].emit = emit;
+ vinfo->attrib[n].interp_mode = interp;
+ vinfo->attrib[n].src_index = src_index;
vinfo->num_attribs++;
return n;
}
diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c
index b20f3c4963..cb85e1734e 100644
--- a/src/gallium/auxiliary/gallivm/soabuiltins.c
+++ b/src/gallium/auxiliary/gallivm/soabuiltins.c
@@ -167,6 +167,7 @@ void min(float4 *res,
res[3] = minvec(tmp0w, tmp1w);
}
+
void max(float4 *res,
float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index a5abbcde49..99ee74cf14 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -371,7 +371,11 @@ void x86_jcc( struct x86_function *p,
DUMP_I(cc);
if (offset < 0) {
- assert(p->csr - p->store > -offset);
+ /*assert(p->csr - p->store > -offset);*/
+ if (p->csr - p->store <= -offset) {
+ /* probably out of memory (using the error_overflow buffer) */
+ return;
+ }
}
if (offset <= 127 && offset >= -128) {
@@ -699,6 +703,18 @@ void sse_prefetch1( struct x86_function *p, struct x86_reg ptr)
emit_modrm_noreg(p, 2, ptr);
}
+void sse_movntps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src)
+{
+ DUMP_RR( dst, src );
+
+ assert(dst.mod != mod_REG);
+ assert(src.mod == mod_REG);
+ emit_2ub(p, 0x0f, 0x2b);
+ emit_modrm(p, src, dst);
+}
+
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 86091e7f6b..1b5eaaca85 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -190,6 +190,8 @@ void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
+void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
+
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c
index d194c2fb15..8f1f58b2dd 100644
--- a/src/gallium/drivers/i915simple/i915_prim_emit.c
+++ b/src/gallium/drivers/i915simple/i915_prim_emit.c
@@ -77,9 +77,9 @@ emit_hw_vertex( struct i915_context *i915,
assert(!i915->dirty);
for (i = 0; i < vinfo->num_attribs; i++) {
- const uint j = vinfo->src_index[i];
+ const uint j = vinfo->attrib[i].src_index;
const float *attrib = vertex->data[j];
- switch (vinfo->emit[i]) {
+ switch (vinfo->attrib[i].emit) {
case EMIT_1F:
OUT_BATCH( fui(attrib[0]) );
count++;
diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c
index 488615067c..178d4e8781 100644
--- a/src/gallium/drivers/i915simple/i915_state_derived.c
+++ b/src/gallium/drivers/i915simple/i915_state_derived.c
@@ -88,12 +88,12 @@ static void calculate_vertex_layout( struct i915_context *i915 )
if (needW) {
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZW;
- vinfo.emit[0] = EMIT_4F;
+ vinfo.attrib[0].emit = EMIT_4F;
}
else {
draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src);
vinfo.hwfmt[0] |= S4_VFMT_XYZ;
- vinfo.emit[0] = EMIT_3F;
+ vinfo.attrib[0].emit = EMIT_3F;
}
/* hardware point size */
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index bc8263c33e..13d8017393 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -773,10 +773,10 @@ static void setup_tri_coefficients( struct setup_context *setup )
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->src_index[fragSlot];
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->interp_mode[fragSlot]) {
+ switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
for (j = 0; j < NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
@@ -1084,10 +1084,10 @@ setup_line_coefficients(struct setup_context *setup,
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->src_index[fragSlot];
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->interp_mode[fragSlot]) {
+ switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
for (j = 0; j < NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
@@ -1331,10 +1331,10 @@ setup_point( struct setup_context *setup,
const_coeff(setup, &setup->posCoef, 0, 3);
for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) {
- const uint vertSlot = vinfo->src_index[fragSlot];
+ const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
- switch (vinfo->interp_mode[fragSlot]) {
+ switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
/* fall-through */
case INTERP_LINEAR:
diff --git a/src/mesa/main/api_exec.c b/src/mesa/main/api_exec.c
index 0c3c9c4de4..bae3bf11cb 100644
--- a/src/mesa/main/api_exec.c
+++ b/src/mesa/main/api_exec.c
@@ -58,7 +58,7 @@
#include "colortab.h"
#endif
#include "context.h"
-#if FEATURE_convolution
+#if FEATURE_convolve
#include "convolve.h"
#endif
#include "depth.h"
@@ -402,7 +402,7 @@ _mesa_init_exec_table(struct _glapi_table *exec)
SET_GetColorTableParameteriv(exec, _mesa_GetColorTableParameteriv);
#endif
-#if FEATURE_convolution
+#if FEATURE_convolve
SET_ConvolutionFilter1D(exec, _mesa_ConvolutionFilter1D);
SET_ConvolutionFilter2D(exec, _mesa_ConvolutionFilter2D);
SET_ConvolutionParameterf(exec, _mesa_ConvolutionParameterf);
diff --git a/src/mesa/main/mfeatures.h b/src/mesa/main/mfeatures.h
index ed78f57edf..3819da3d68 100644
--- a/src/mesa/main/mfeatures.h
+++ b/src/mesa/main/mfeatures.h
@@ -39,7 +39,7 @@
#define FEATURE_accum _HAVE_FULL_GL
#define FEATURE_attrib_stack _HAVE_FULL_GL
#define FEATURE_colortable _HAVE_FULL_GL
-#define FEATURE_convolution _HAVE_FULL_GL
+#define FEATURE_convolve _HAVE_FULL_GL
#define FEATURE_dispatch _HAVE_FULL_GL
#define FEATURE_dlist _HAVE_FULL_GL
#define FEATURE_draw_read_buffer _HAVE_FULL_GL
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8e4f6a2e66..8bf6858c1e 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2722,6 +2722,7 @@ struct gl_matrix_stack
#define _NEW_MULTISAMPLE 0x2000000 /**< __GLcontextRec::Multisample */
#define _NEW_TRACK_MATRIX 0x4000000 /**< __GLcontextRec::VertexProgram */
#define _NEW_PROGRAM 0x8000000 /**< __GLcontextRec::VertexProgram */
+#define _NEW_CURRENT_ATTRIB 0x10000000 /**< __GLcontextRec::Current */
#define _NEW_ALL ~0
/*@}*/
@@ -3049,6 +3050,8 @@ struct __GLcontextRec
GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
+ GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
+
/** \name Derived state */
/*@{*/
/** Bitwise-or of DD_* flags. Note that this bitfield may be used before
diff --git a/src/mesa/main/queryobj.c b/src/mesa/main/queryobj.c
index a1e32e70ba..2d06030030 100644
--- a/src/mesa/main/queryobj.c
+++ b/src/mesa/main/queryobj.c
@@ -95,7 +95,7 @@ _mesa_wait_query(GLcontext *ctx, struct gl_query_object *q)
* XXX maybe add Delete() method to gl_query_object class and call that instead
*/
void
-_mesa_delete_query(struct gl_query_object *q)
+_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q)
{
_mesa_free(q);
}
diff --git a/src/mesa/main/queryobj.h b/src/mesa/main/queryobj.h
index c05a1f3da8..9a9774641b 100644
--- a/src/mesa/main/queryobj.h
+++ b/src/mesa/main/queryobj.h
@@ -37,7 +37,7 @@ extern void
_mesa_free_query_data(GLcontext *ctx);
extern void
-_mesa_delete_query(struct gl_query_object *q);
+_mesa_delete_query(GLcontext *ctx, struct gl_query_object *q);
extern void
_mesa_begin_query(GLcontext *ctx, struct gl_query_object *q);
diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c
index 5913019bc1..7e44310d8d 100644
--- a/src/mesa/main/state.c
+++ b/src/mesa/main/state.c
@@ -447,6 +447,9 @@ _mesa_update_state_locked( GLcontext *ctx )
GLbitfield new_state = ctx->NewState;
GLbitfield prog_flags = _NEW_PROGRAM;
+ if (new_state == _NEW_CURRENT_ATTRIB)
+ goto out;
+
if (MESA_VERBOSE & VERBOSE_STATE)
_mesa_print_state("_mesa_update_state", new_state);
@@ -510,7 +513,8 @@ _mesa_update_state_locked( GLcontext *ctx )
_mesa_update_tnl_spaces( ctx, new_state );
if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
+ prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT |
+ _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
}
if (ctx->VertexProgram._MaintainTnlProgram) {
prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
@@ -532,6 +536,7 @@ _mesa_update_state_locked( GLcontext *ctx )
* Set ctx->NewState to zero to avoid recursion if
* Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?)
*/
+ out:
new_state = ctx->NewState;
ctx->NewState = 0;
ctx->Driver.UpdateState(ctx, new_state);
@@ -548,3 +553,38 @@ _mesa_update_state( GLcontext *ctx )
_mesa_update_state_locked(ctx);
_mesa_unlock_context_textures(ctx);
}
+
+
+
+
+/* Want to figure out which fragment program inputs are actually
+ * constant/current values from ctx->Current. These should be
+ * referenced as a tracked state variable rather than a fragment
+ * program input, to save the overhead of putting a constant value in
+ * every submitted vertex, transferring it to hardware, interpolating
+ * it across the triangle, etc...
+ *
+ * When there is a VP bound, just use vp->outputs. But when we're
+ * generating vp from fixed function state, basically want to
+ * calculate:
+ *
+ * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) |
+ * potential_vp_outputs )
+ *
+ * Where potential_vp_outputs is calculated by looking at enabled
+ * texgen, etc.
+ *
+ * The generated fragment program should then only declare inputs that
+ * may vary or otherwise differ from the ctx->Current values.
+ * Otherwise, the fp should track them as state values instead.
+ */
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+ GLbitfield varying_inputs )
+{
+ if (ctx->varying_vp_inputs != varying_inputs) {
+ ctx->varying_vp_inputs = varying_inputs;
+ ctx->NewState |= _NEW_ARRAY;
+ //_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs);
+ }
+}
diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h
index bb7cb8f32a..79f2f6beb0 100644
--- a/src/mesa/main/state.h
+++ b/src/mesa/main/state.h
@@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx );
extern void
_mesa_update_state_locked( GLcontext *ctx );
+void
+_mesa_set_varying_vp_inputs( GLcontext *ctx,
+ GLbitfield varying_inputs );
#endif
diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c
index f6bbbcfaed..c23173014d 100644
--- a/src/mesa/main/texenvprogram.c
+++ b/src/mesa/main/texenvprogram.c
@@ -55,15 +55,17 @@ struct texenvprog_cache_item
#define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM)
struct mode_opt {
- GLuint Source:4;
- GLuint Operand:3;
+ GLubyte Source:4;
+ GLubyte Operand:3;
};
struct state_key {
- GLbitfield enabled_units;
+ GLuint nr_enabled_units:8;
+ GLuint enabled_units:8;
GLuint separate_specular:1;
GLuint fog_enabled:1;
GLuint fog_mode:2;
+ GLuint inputs_available:12;
struct {
GLuint enabled:1;
@@ -74,10 +76,10 @@ struct state_key {
GLuint NumArgsRGB:2;
GLuint ModeRGB:4;
- struct mode_opt OptRGB[3];
-
GLuint NumArgsA:2;
GLuint ModeA:4;
+
+ struct mode_opt OptRGB[3];
struct mode_opt OptA[3];
} unit[8];
};
@@ -199,6 +201,66 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
}
}
+#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0)
+#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
+
+/**
+ * Identify all possible varying inputs. The fragment program will
+ * never reference non-varying inputs, but will track them via state
+ * constants instead.
+ *
+ * This function figures out all the inputs that the fragment program
+ * has access to. The bitmask is later reduced to just those which
+ * are actually referenced.
+ */
+static GLbitfield get_fp_input_mask( GLcontext *ctx )
+{
+ GLbitfield fp_inputs = 0x0;
+
+ if (!ctx->VertexProgram._Enabled ||
+ !ctx->VertexProgram._Current) {
+
+ /* Fixed function logic */
+ GLbitfield varying_inputs = ctx->varying_vp_inputs;
+
+ /* First look at what values may be computed by the generated
+ * vertex program:
+ */
+ if (ctx->Light.Enabled) {
+ fp_inputs |= FRAG_BIT_COL0;
+
+ if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+ fp_inputs |= FRAG_BIT_COL1;
+ }
+
+ fp_inputs |= (ctx->Texture._TexGenEnabled |
+ ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
+
+ /* Then look at what might be varying as a result of enabled
+ * arrays, etc:
+ */
+ if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0;
+ if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1;
+
+ fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0)
+ << FRAG_ATTRIB_TEX0);
+
+ }
+ else {
+ /* calculate from vp->outputs */
+ GLbitfield vp_outputs = ctx->VertexProgram._Current->Base.OutputsWritten;
+
+ if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0;
+ if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1;
+
+ fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0)
+ << FRAG_ATTRIB_TEX0);
+ }
+
+ return fp_inputs;
+}
+
+
/**
* Examine current texture environment state and generate a unique
* key to identify it.
@@ -206,7 +268,9 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
static void make_state_key( GLcontext *ctx, struct state_key *key )
{
GLuint i, j;
-
+ GLbitfield inputs_referenced = FRAG_BIT_COL0;
+ GLbitfield inputs_available = get_fp_input_mask( ctx );
+
memset(key, 0, sizeof(*key));
for (i=0;i<MAX_TEXTURE_UNITS;i++) {
@@ -217,6 +281,8 @@ static void make_state_key( GLcontext *ctx, struct state_key *key )
key->unit[i].enabled = 1;
key->enabled_units |= (1<<i);
+ key->nr_enabled_units = i+1;
+ inputs_referenced |= FRAG_BIT_TEX(i);
key->unit[i].source_index =
translate_tex_src_bit(texUnit->_ReallyEnabled);
@@ -245,16 +311,22 @@ static void make_state_key( GLcontext *ctx, struct state_key *key )
}
}
- if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+ if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
key->separate_specular = 1;
+ inputs_referenced |= FRAG_BIT_COL1;
+ }
if (ctx->Fog.Enabled) {
key->fog_enabled = 1;
key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
+ inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
}
+
+ key->inputs_available = (inputs_available & inputs_referenced);
}
-/* Use uregs to represent registers internally, translate to Mesa's
+/**
+ * Use uregs to represent registers internally, translate to Mesa's
* expected formats on emit.
*
* NOTE: These are passed by value extensively in this file rather
@@ -287,16 +359,16 @@ static const struct ureg undef = {
};
-/* State used to build the fragment program:
+/** State used to build the fragment program:
*/
struct texenv_fragment_program {
struct gl_fragment_program *program;
GLcontext *ctx;
struct state_key *state;
- GLbitfield alu_temps; /* Track texture indirections, see spec. */
- GLbitfield temps_output; /* Track texture indirections, see spec. */
- GLbitfield temp_in_use; /* Tracks temporary regs which are in use. */
+ GLbitfield alu_temps; /**< Track texture indirections, see spec. */
+ GLbitfield temps_output; /**< Track texture indirections, see spec. */
+ GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */
GLboolean error;
struct ureg src_texture[MAX_TEXTURE_UNITS];
@@ -304,11 +376,11 @@ struct texenv_fragment_program {
* else undef.
*/
- struct ureg src_previous; /* Reg containing color from previous
+ struct ureg src_previous; /**< Reg containing color from previous
* stage. May need to be decl'd.
*/
- GLuint last_tex_stage; /* Number of last enabled texture unit */
+ GLuint last_tex_stage; /**< Number of last enabled texture unit */
struct ureg half;
struct ureg one;
@@ -457,11 +529,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p,
#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0)
#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
+static GLuint frag_to_vert_attrib( GLuint attrib )
+{
+ switch (attrib) {
+ case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
+ case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
+ default:
+ assert(attrib >= FRAG_ATTRIB_TEX0);
+ assert(attrib <= FRAG_ATTRIB_TEX7);
+ return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
+ }
+}
+
static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
{
- p->program->Base.InputsRead |= (1 << input);
- return make_ureg(PROGRAM_INPUT, input);
+ if (p->state->inputs_available & (1<<input)) {
+ p->program->Base.InputsRead |= (1 << input);
+ return make_ureg(PROGRAM_INPUT, input);
+ }
+ else {
+ GLuint idx = frag_to_vert_attrib( input );
+ return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
+ }
}
diff --git a/src/mesa/shader/prog_cache.c b/src/mesa/shader/prog_cache.c
index 36a25377c5..9437e59613 100644
--- a/src/mesa/shader/prog_cache.c
+++ b/src/mesa/shader/prog_cache.c
@@ -44,6 +44,7 @@ struct cache_item
struct gl_program_cache
{
struct cache_item **items;
+ struct cache_item *last;
GLuint size, n_items;
};
@@ -83,6 +84,8 @@ rehash(struct gl_program_cache *cache)
struct cache_item *c, *next;
GLuint size, i;
+ cache->last = NULL;
+
size = cache->size * 3;
items = (struct cache_item**) _mesa_malloc(size * sizeof(*items));
_mesa_memset(items, 0, size * sizeof(*items));
@@ -105,6 +108,8 @@ clear_cache(GLcontext *ctx, struct gl_program_cache *cache)
{
struct cache_item *c, *next;
GLuint i;
+
+ cache->last = NULL;
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
@@ -149,18 +154,26 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *cache)
struct gl_program *
-_mesa_search_program_cache(const struct gl_program_cache *cache,
+_mesa_search_program_cache(struct gl_program_cache *cache,
const void *key, GLuint keysize)
{
- const GLuint hash = hash_key(key, keysize);
- struct cache_item *c;
-
- for (c = cache->items[hash % cache->size]; c; c = c->next) {
- if (c->hash == hash && memcmp(c->key, key, keysize) == 0)
- return c->program;
+ if (cache->last &&
+ memcmp(cache->last->key, key, keysize) == 0) {
+ return cache->last->program;
}
+ else {
+ const GLuint hash = hash_key(key, keysize);
+ struct cache_item *c;
+
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->hash == hash && memcmp(c->key, key, keysize) == 0) {
+ cache->last = c;
+ return c->program;
+ }
+ }
- return NULL;
+ return NULL;
+ }
}
diff --git a/src/mesa/shader/prog_cache.h b/src/mesa/shader/prog_cache.h
index a8c91fba01..4e1ccac03f 100644
--- a/src/mesa/shader/prog_cache.h
+++ b/src/mesa/shader/prog_cache.h
@@ -42,7 +42,7 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *pc);
extern struct gl_program *
-_mesa_search_program_cache(const struct gl_program_cache *cache,
+_mesa_search_program_cache(struct gl_program_cache *cache,
const void *key, GLuint keysize);
extern void
diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c
index d4e31207e8..34c4741350 100644
--- a/src/mesa/shader/prog_statevars.c
+++ b/src/mesa/shader/prog_statevars.c
@@ -395,6 +395,12 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
case STATE_INTERNAL:
switch (state[1]) {
+ case STATE_CURRENT_ATTRIB: {
+ const GLuint idx = (GLuint) state[2];
+ COPY_4V(value, ctx->Current.Attrib[idx]);
+ return;
+ }
+
case STATE_NORMAL_SCALE:
ASSIGN_4V(value,
ctx->_ModelViewInvScale,
@@ -501,6 +507,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[],
}
return;
+ /* XXX: make sure new tokens added here are also handled in the
+ * _mesa_program_state_flags() switch, below.
+ */
default:
/* unknown state indexes are silently ignored
* should be handled by the driver.
@@ -574,11 +583,29 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH])
case STATE_INTERNAL:
switch (state[1]) {
+ case STATE_CURRENT_ATTRIB:
+ return _NEW_CURRENT_ATTRIB;
+
+ case STATE_NORMAL_SCALE:
+ return _NEW_MODELVIEW;
+
case STATE_TEXRECT_SCALE:
case STATE_SHADOW_AMBIENT:
return _NEW_TEXTURE;
case STATE_FOG_PARAMS_OPTIMIZED:
return _NEW_FOG;
+ case STATE_LIGHT_SPOT_DIR_NORMALIZED:
+ case STATE_LIGHT_POSITION:
+ case STATE_LIGHT_POSITION_NORMALIZED:
+ case STATE_LIGHT_HALF_VECTOR:
+ return _NEW_LIGHT;
+
+ case STATE_PT_SCALE:
+ case STATE_PT_BIAS:
+ case STATE_PCM_SCALE:
+ case STATE_PCM_BIAS:
+ return _NEW_PIXEL;
+
default:
/* unknown state indexes are silently ignored and
* no flag set, since it is handled by the driver.
diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h
index 20643ca794..72e51f4031 100644
--- a/src/mesa/shader/prog_statevars.h
+++ b/src/mesa/shader/prog_statevars.h
@@ -104,6 +104,7 @@ typedef enum gl_state_index_ {
STATE_LOCAL,
STATE_INTERNAL, /* Mesa additions */
+ STATE_CURRENT_ATTRIB, /* ctx->Current vertex attrib value */
STATE_NORMAL_SCALE,
STATE_TEXRECT_SCALE,
STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
index f120c20bdf..5d21cff672 100644
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -686,17 +686,47 @@ _mesa_combine_programs(GLcontext *ctx,
if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) {
struct gl_fragment_program *fprogA, *fprogB, *newFprog;
+ GLbitfield progB_inputsRead = progB->InputsRead;
+ GLint progB_colorFile, progB_colorIndex;
+
fprogA = (struct gl_fragment_program *) progA;
fprogB = (struct gl_fragment_program *) progB;
newFprog = (struct gl_fragment_program *) newProg;
newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill;
+ /* We'll do a search and replace for instances
+ * of progB_colorFile/progB_colorIndex below...
+ */
+ progB_colorFile = PROGRAM_INPUT;
+ progB_colorIndex = FRAG_ATTRIB_COL0;
+
+ /*
+ * The fragment program may get color from a state var rather than
+ * a fragment input (vertex output) if it's constant.
+ * See the texenvprogram.c code.
+ * So, search the program's parameter list now to see if the program
+ * gets color from a state var instead of a conventional fragment
+ * input register.
+ */
+ for (i = 0; i < progB->Parameters->NumParameters; i++) {
+ struct gl_program_parameter *p = &progB->Parameters->Parameters[i];
+ if (p->Type == PROGRAM_STATE_VAR &&
+ p->StateIndexes[0] == STATE_INTERNAL &&
+ p->StateIndexes[1] == STATE_CURRENT_ATTRIB &&
+ p->StateIndexes[2] == VERT_ATTRIB_COLOR0) {
+ progB_inputsRead |= FRAG_BIT_COL0;
+ progB_colorFile = PROGRAM_STATE_VAR;
+ progB_colorIndex = i;
+ break;
+ }
+ }
+
/* Connect color outputs of fprogA to color inputs of fprogB, via a
* new temporary register.
*/
if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) &&
- (progB->InputsRead & (1 << FRAG_ATTRIB_COL0))) {
+ (progB_inputsRead & FRAG_BIT_COL0)) {
GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY);
if (tempReg < 0) {
_mesa_problem(ctx, "No free temp regs found in "
@@ -707,13 +737,14 @@ _mesa_combine_programs(GLcontext *ctx,
replace_registers(newInst, lenA,
PROGRAM_OUTPUT, FRAG_RESULT_COLR,
PROGRAM_TEMPORARY, tempReg);
- /* replace reads from input.color[0] with tempReg */
+ /* replace reads from the input color with tempReg */
replace_registers(newInst + lenA, lenB,
- PROGRAM_INPUT, FRAG_ATTRIB_COL0,
- PROGRAM_TEMPORARY, tempReg);
+ progB_colorFile, progB_colorIndex, /* search for */
+ PROGRAM_TEMPORARY, tempReg /* replace with */ );
}
- inputsB = progB->InputsRead;
+ /* compute combined program's InputsRead */
+ inputsB = progB_inputsRead;
if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) {
inputsB &= ~(1 << FRAG_ATTRIB_COL0);
}
diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c
index fc47896c24..5eef4ebe92 100644
--- a/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -215,6 +215,9 @@ static void update_raster_state( struct st_context *st )
raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE;
}
}
+
+ /* ST_NEW_VERTEX_PROGRAM
+ */
if (vertProg) {
if (vertProg->Base.Id == 0) {
if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
@@ -277,7 +280,7 @@ const struct st_tracked_state st_update_rasterizer = {
_NEW_POLYGON |
_NEW_PROGRAM |
_NEW_SCISSOR), /* mesa state dependencies*/
- 0, /* state tracker dependencies */
+ ST_NEW_VERTEX_PROGRAM, /* state tracker dependencies */
},
update_raster_state /* update function */
};
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index e545e00eb2..acaf1de882 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -26,7 +26,7 @@
**************************************************************************/
#include "main/imports.h"
-#if FEATURE_convolution
+#if FEATURE_convolve
#include "main/convolve.h"
#endif
#include "main/enums.h"
@@ -409,7 +409,7 @@ st_TexImage(GLcontext * ctx,
stImage->face = _mesa_tex_target_to_face(target);
stImage->level = level;
-#if FEATURE_convolution
+#if FEATURE_convolve
if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
_mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
&postConvHeight);
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index f9016923dc..61949a9388 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -526,7 +526,7 @@ st_draw_vbo(GLcontext *ctx,
num_vbuffers = 1;
num_velements = vp->num_inputs;
if (num_velements == 0)
- num_vbuffers = 0;
+ num_vbuffers = 0;
}
else {
/*printf("Draw non-interleaved\n");*/
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index fdb0c5a9a4..a6ce26ffed 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -143,29 +143,37 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
if (exec->vtx.attrsz[i]) {
- GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
-
/* Note: the exec->vtx.current[i] pointers point into the
* ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
*/
- COPY_CLEAN_4V(current,
- exec->vtx.attrsz[i],
- exec->vtx.attrptr[i]);
+ GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+ GLfloat tmp[4];
+
+ COPY_CLEAN_4V(tmp,
+ exec->vtx.attrsz[i],
+ exec->vtx.attrptr[i]);
+
+ if (memcmp(current, tmp, sizeof(tmp)) != 0)
+ {
+ memcpy(current, tmp, sizeof(tmp));
- /* Given that we explicitly state size here, there is no need
- * for the COPY_CLEAN above, could just copy 16 bytes and be
- * done. The only problem is when Mesa accesses ctx->Current
- * directly.
- */
- vbo->currval[i].Size = exec->vtx.attrsz[i];
-
- /* This triggers rather too much recalculation of Mesa state
- * that doesn't get used (eg light positions).
- */
- if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
- i <= VBO_ATTRIB_MAT_BACK_INDEXES)
- ctx->NewState |= _NEW_LIGHT;
+ /* Given that we explicitly state size here, there is no need
+ * for the COPY_CLEAN above, could just copy 16 bytes and be
+ * done. The only problem is when Mesa accesses ctx->Current
+ * directly.
+ */
+ vbo->currval[i].Size = exec->vtx.attrsz[i];
+
+ /* This triggers rather too much recalculation of Mesa state
+ * that doesn't get used (eg light positions).
+ */
+ if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
+ i <= VBO_ATTRIB_MAT_BACK_INDEXES)
+ ctx->NewState |= _NEW_LIGHT;
+
+ ctx->NewState |= _NEW_CURRENT_ATTRIB;
+ }
}
}
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 0f9d8da356..8871e10cf6 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx )
struct vbo_context *vbo = vbo_context(ctx);
struct vbo_exec_context *exec = &vbo->exec;
const struct gl_client_array **inputs = &exec->array.inputs[0];
+ GLbitfield const_inputs = 0x0;
GLuint i;
exec->array.program_mode = get_program_mode(ctx);
@@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx )
for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
if (exec->array.legacy_array[i]->Enabled)
inputs[i] = exec->array.legacy_array[i];
- else
+ else {
inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
}
for (i = 0; i < MAT_ATTRIB_MAX; i++) {
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
}
/* Could use just about anything, just to fill in the empty
* slots:
*/
- for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++)
+ for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+ }
break;
case VP_NV:
@@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx )
inputs[i] = exec->array.generic_array[i];
else if (exec->array.legacy_array[i]->Enabled)
inputs[i] = exec->array.legacy_array[i];
- else
+ else {
inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
}
/* Could use just about anything, just to fill in the empty
* slots:
*/
- for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++)
+ for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+ const_inputs |= 1 << i;
+ }
break;
case VP_ARB:
@@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx )
inputs[0] = exec->array.generic_array[0];
else if (exec->array.legacy_array[0]->Enabled)
inputs[0] = exec->array.legacy_array[0];
- else
+ else {
inputs[0] = &vbo->legacy_currval[0];
+ const_inputs |= 1 << 0;
+ }
for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
if (exec->array.legacy_array[i]->Enabled)
inputs[i] = exec->array.legacy_array[i];
- else
+ else {
inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
}
for (i = 0; i < 16; i++) {
if (exec->array.generic_array[i]->Enabled)
inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
- else
+ else {
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+ }
+
}
break;
}
+
+ _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
}
static void bind_arrays( GLcontext *ctx )
@@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
bind_arrays( ctx );
+ /* Again...
+ */
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
prim[0].begin = 1;
prim[0].end = 1;
prim[0].weak = 0;
@@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode,
bind_arrays( ctx );
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
ib.count = count;
ib.type = type;
ib.obj = ctx->Array.ElementArrayBufferObj;
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index 92356ba977..5bf3d836db 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
GLubyte *data = exec->vtx.buffer_map;
const GLuint *map;
GLuint attr;
+ GLbitfield varying_inputs = 0x0;
/* Install the default (ie Current) attributes first, then overlay
* all active ones.
@@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
arrays[attr]._MaxElement = count; /* ??? */
data += exec->vtx.attrsz[src] * sizeof(GLfloat);
+ varying_inputs |= 1<<attr;
}
}
+
+ _mesa_set_varying_vp_inputs( ctx, varying_inputs );
}
@@ -242,6 +246,9 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec )
*/
vbo_exec_bind_arrays( ctx );
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
/* if using a real VBO, unmap it before drawing */
if (exec->vtx.bufferobj->Name) {
ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c
index ed82f09958..0488c5d718 100644
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -64,18 +64,26 @@ static void _playback_copy_to_current( GLcontext *ctx,
for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
if (node->attrsz[i]) {
GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+ GLfloat tmp[4];
- COPY_CLEAN_4V(current,
- node->attrsz[i],
- data);
+ COPY_CLEAN_4V(tmp,
+ node->attrsz[i],
+ data);
+
+ if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0)
+ {
+ memcpy(current, tmp, 4 * sizeof(GLfloat));
- vbo->currval[i].Size = node->attrsz[i];
+ vbo->currval[i].Size = node->attrsz[i];
- data += node->attrsz[i];
+ if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
+ i <= VBO_ATTRIB_LAST_MATERIAL)
+ ctx->NewState |= _NEW_LIGHT;
+
+ ctx->NewState |= _NEW_CURRENT_ATTRIB;
+ }
- if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
- i <= VBO_ATTRIB_LAST_MATERIAL)
- ctx->NewState |= _NEW_LIGHT;
+ data += node->attrsz[i];
}
}
@@ -110,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
GLuint data = node->buffer_offset;
const GLuint *map;
GLuint attr;
+ GLbitfield varying_inputs = 0x0;
/* Install the default (ie Current) attributes first, then overlay
* all active ones.
@@ -159,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
assert(arrays[attr].BufferObj->Name);
data += node->attrsz[src] * sizeof(GLfloat);
+ varying_inputs |= 1<<attr;
}
}
+
+ _mesa_set_varying_vp_inputs( ctx, varying_inputs );
}
static void vbo_save_loopback_vertex_list( GLcontext *ctx,
@@ -229,6 +241,11 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data )
vbo_bind_vertex_list( ctx, node );
+ /* Again...
+ */
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
vbo_context(ctx)->draw_prims( ctx,
save->inputs,
node->prim,