summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/common/meta.c30
-rw-r--r--src/mesa/drivers/dri/common/xmlpool/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h42
-rw-r--r--src/mesa/drivers/dri/i965/brw_curbe.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp56
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c84
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c3
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_clip_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/gen6_sf_state.c13
-rw-r--r--src/mesa/drivers/dri/i965/gen6_vs_state.c60
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c293
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_render.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c2
20 files changed, 389 insertions, 234 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 9946bf1990..1bfd76a665 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -143,7 +143,10 @@ struct save_state
struct gl_vertex_program *VertexProgram;
GLboolean FragmentProgramEnabled;
struct gl_fragment_program *FragmentProgram;
- GLuint Shader;
+ GLuint VertexShader;
+ GLuint GeometryShader;
+ GLuint FragmentShader;
+ GLuint ActiveShader;
/** META_STENCIL_TEST */
struct gl_stencil_attrib Stencil;
@@ -433,8 +436,15 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
}
if (ctx->Extensions.ARB_shader_objects) {
- save->Shader = ctx->Shader.CurrentProgram ?
- ctx->Shader.CurrentProgram->Name : 0;
+ save->VertexShader = ctx->Shader.CurrentVertexProgram ?
+ ctx->Shader.CurrentVertexProgram->Name : 0;
+ save->GeometryShader = ctx->Shader.CurrentGeometryProgram ?
+ ctx->Shader.CurrentGeometryProgram->Name : 0;
+ save->FragmentShader = ctx->Shader.CurrentFragmentProgram ?
+ ctx->Shader.CurrentFragmentProgram->Name : 0;
+ save->ActiveShader = ctx->Shader.ActiveProgram ?
+ ctx->Shader.ActiveProgram->Name : 0;
+
_mesa_UseProgramObjectARB(0);
}
}
@@ -664,9 +674,17 @@ _mesa_meta_end(struct gl_context *ctx)
_mesa_reference_fragprog(ctx, &save->FragmentProgram, NULL);
}
- if (ctx->Extensions.ARB_shader_objects) {
- _mesa_UseProgramObjectARB(save->Shader);
- }
+ if (ctx->Extensions.ARB_vertex_shader)
+ _mesa_UseShaderProgramEXT(GL_VERTEX_SHADER, save->VertexShader);
+
+ if (ctx->Extensions.ARB_geometry_shader4)
+ _mesa_UseShaderProgramEXT(GL_GEOMETRY_SHADER_ARB,
+ save->GeometryShader);
+
+ if (ctx->Extensions.ARB_fragment_shader)
+ _mesa_UseShaderProgramEXT(GL_FRAGMENT_SHADER, save->FragmentShader);
+
+ _mesa_ActiveProgramEXT(save->ActiveShader);
}
if (state & META_STENCIL_TEST) {
diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile b/src/mesa/drivers/dri/common/xmlpool/Makefile
index 62ec919ea6..b71629e9f1 100644
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile
@@ -62,7 +62,7 @@ clean:
# Default target options.h
options.h: t_options.h mo
- python gen_xmlpool.py $(LANGS) > options.h
+ $(PYTHON2) $(PYTHON_FLAGS) gen_xmlpool.py $(LANGS) > options.h
# Update .mo files from the corresponding .po files.
mo:
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4a0709b446..335339515a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -188,6 +188,13 @@ struct brw_shader_program {
struct gl_shader_program base;
};
+enum param_conversion {
+ PARAM_NO_CONVERT,
+ PARAM_CONVERT_F2I,
+ PARAM_CONVERT_F2U,
+ PARAM_CONVERT_F2B,
+};
+
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
@@ -208,8 +215,10 @@ struct brw_wm_prog_data {
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
*/
- const GLfloat *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
- const GLfloat *pull_param[MAX_UNIFORMS * 4];
+ const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+ enum param_conversion param_convert[MAX_UNIFORMS * 4];
+ const float *pull_param[MAX_UNIFORMS * 4];
+ enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
};
struct brw_sf_prog_data {
@@ -800,6 +809,35 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
return (const struct brw_fragment_program *) p;
}
+static inline
+float convert_param(enum param_conversion conversion, float param)
+{
+ union {
+ float f;
+ uint32_t u;
+ int32_t i;
+ } fi;
+
+ switch (conversion) {
+ case PARAM_NO_CONVERT:
+ return param;
+ case PARAM_CONVERT_F2I:
+ fi.i = param;
+ return fi.f;
+ case PARAM_CONVERT_F2U:
+ fi.u = param;
+ return fi.f;
+ case PARAM_CONVERT_F2B:
+ if (param != 0.0)
+ fi.i = 1;
+ else
+ fi.i = 0;
+ return fi.f;
+ default:
+ return param;
+ }
+}
+
GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index 9ce0d8decd..7b823eb201 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -199,8 +199,10 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.wm_start * 16;
/* copy float constants */
- for (i = 0; i < brw->wm.prog_data->nr_params; i++)
- buf[offset + i] = *brw->wm.prog_data->param[i];
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+ buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i],
+ *brw->wm.prog_data->param[i]);
+ }
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6c3db61035..239586a036 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -930,6 +930,11 @@
#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */
/* DW1 */
# define GEN6_CLIP_STATISTICS_ENABLE (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance. Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0
/* DW2 */
# define GEN6_CLIP_ENABLE (1 << 31)
# define GEN6_CLIP_API_OGL (0 << 30)
@@ -937,6 +942,8 @@
# define GEN6_CLIP_XY_TEST (1 << 28)
# define GEN6_CLIP_Z_TEST (1 << 27)
# define GEN6_CLIP_GB_TEST (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16
# define GEN6_CLIP_MODE_NORMAL (0 << 13)
# define GEN6_CLIP_MODE_REJECT_ALL (3 << 13)
# define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4919394bc8..2ed59d3f5d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -228,6 +228,7 @@ brw_type_for_base_type(const struct glsl_type *type)
return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_ARRAY:
case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_SAMPLER:
/* These should be overridden with the type of the member when
* dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
* way to trip up if we don't.
@@ -286,8 +287,26 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
case GLSL_TYPE_BOOL:
vec_values = fp->Base.Parameters->ParameterValues[loc];
for (unsigned int i = 0; i < type->vector_elements; i++) {
- assert(c->prog_data.nr_params < ARRAY_SIZE(c->prog_data.param));
- c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
+ unsigned int param = c->prog_data.nr_params++;
+
+ assert(param < ARRAY_SIZE(c->prog_data.param));
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+ break;
+ }
+
+ c->prog_data.param[param] = &vec_values[i];
}
return 1;
@@ -371,6 +390,8 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
break;
last_swiz = swiz;
+ c->prog_data.param_convert[c->prog_data.nr_params] =
+ PARAM_NO_CONVERT;
c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
}
}
@@ -625,6 +646,7 @@ fs_visitor::visit(ir_variable *ir)
}
reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
+ reg->type = brw_type_for_base_type(ir->type);
}
if (!reg)
@@ -901,12 +923,21 @@ fs_visitor::visit(ir_expression *ir)
break;
case ir_unop_bit_not:
- case ir_unop_u2f:
- case ir_binop_lshift:
- case ir_binop_rshift:
+ inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0]));
+ break;
case ir_binop_bit_and:
+ inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
+ break;
case ir_binop_bit_xor:
+ inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
+ break;
case ir_binop_bit_or:
+ inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
+ break;
+
+ case ir_unop_u2f:
+ case ir_binop_lshift:
+ case ir_binop_rshift:
assert(!"GLSL 1.30 features unsupported");
break;
}
@@ -1186,7 +1217,7 @@ fs_visitor::visit(ir_texture *ir)
assert(!ir->projector);
sampler = _mesa_get_sampler_uniform_value(ir->sampler,
- ctx->Shader.CurrentProgram,
+ ctx->Shader.CurrentFragmentProgram,
&brw->fragment_program->Base);
sampler = c->fp->program.Base.SamplerUnits[sampler];
@@ -1204,6 +1235,11 @@ fs_visitor::visit(ir_texture *ir)
0
};
+ c->prog_data.param_convert[c->prog_data.nr_params] =
+ PARAM_NO_CONVERT;
+ c->prog_data.param_convert[c->prog_data.nr_params + 1] =
+ PARAM_NO_CONVERT;
+
fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
GLuint index = _mesa_add_state_reference(params,
@@ -2359,7 +2395,7 @@ fs_visitor::assign_curb_setup()
constant_nr % 8);
inst->src[i].file = FIXED_HW_REG;
- inst->src[i].fixed_hw_reg = brw_reg;
+ inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
}
}
}
@@ -2566,6 +2602,8 @@ fs_visitor::setup_pull_constants()
for (int i = 0; i < pull_uniform_count; i++) {
c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i];
+ c->prog_data.pull_param_convert[i] =
+ c->prog_data.param_convert[pull_uniform_base + i];
}
c->prog_data.nr_params -= pull_uniform_count;
c->prog_data.nr_pull_params = pull_uniform_count;
@@ -3093,7 +3131,7 @@ fs_visitor::generate_code()
if (INTEL_DEBUG & DEBUG_WM) {
printf("Native code for fragment shader %d:\n",
- ctx->Shader.CurrentProgram->Name);
+ ctx->Shader.CurrentFragmentProgram->Name);
}
if_depth_in_loop[loop_stack_depth] = 0;
@@ -3320,7 +3358,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
- struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
+ struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram;
if (!prog)
return GL_FALSE;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index ce33479996..e560cdac1f 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -165,13 +165,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* User clip planes from curbe:
*/
if (c->key.nr_userclip) {
- for (i = 0; i < c->key.nr_userclip; i++) {
- c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
- }
+ if (intel->gen >= 6) {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += ALIGN(c->key.nr_userclip, 2) / 2;
+ } else {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+ }
- /* Deal with curbe alignment:
- */
- reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
}
/* Vertex program parameters from curbe:
@@ -253,9 +260,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
c->first_output = reg;
c->first_overflow_output = 0;
- if (intel->gen >= 6)
- mrf = 3; /* no more pos store in attribute */
- else if (intel->gen == 5)
+ if (intel->gen >= 6) {
+ mrf = 3;
+ if (c->key.nr_userclip)
+ mrf += 2;
+ } else if (intel->gen == 5)
mrf = 8;
else
mrf = 4;
@@ -372,9 +381,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* See emit_vertex_write() for where the VUE's overhead on top of the
* attributes comes from.
*/
- if (intel->gen >= 6)
- c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8;
- else if (intel->gen == 5)
+ if (intel->gen >= 6) {
+ int header_regs = 2;
+ if (c->key.nr_userclip)
+ header_regs += 2;
+
+ c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8;
+ } else if (intel->gen == 5)
c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
else
c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -1392,9 +1405,33 @@ static void emit_vertex_write( struct brw_vs_compile *c)
/* Update the header for point size, user clipping flags, and -ve rhw
* workaround.
*/
- if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
- c->key.nr_userclip || brw->has_negative_rhw_bug)
- {
+ if (intel->gen >= 6) {
+ struct brw_reg m1 = brw_message_reg(1);
+
+ /* On gen6, m1 has each value in a separate dword, so we never
+ * need to mess with a temporary for computing the m1 value.
+ */
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ brw_MOV(p, brw_writemask(m1, WRITEMASK_W),
+ brw_swizzle1(c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ], 0));
+ }
+
+ /* Set the user clip distances in dword 8-15. (m3-4)*/
+ if (c->key.nr_userclip) {
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ struct brw_reg m;
+ if (i < 4)
+ m = brw_message_reg(3);
+ else
+ m = brw_message_reg(4);
+
+ brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]);
+ }
+ }
+ } else if ((c->prog_data.outputs_written &
+ BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+ c->key.nr_userclip || brw->has_negative_rhw_bug) {
struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
GLuint i;
@@ -1404,11 +1441,10 @@ static void emit_vertex_write( struct brw_vs_compile *c)
if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
- if (intel->gen < 6) {
- brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
- brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
- } else
- brw_MOV(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0));
+ brw_MUL(p, brw_writemask(header1, WRITEMASK_W),
+ brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+ brw_AND(p, brw_writemask(header1, WRITEMASK_W),
+ header1, brw_imm_ud(0x7ff<<8));
}
for (i = 0; i < c->key.nr_userclip; i++) {
@@ -1461,12 +1497,14 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* dword 0-3 (m1) of the header is indices, point width, clip flags.
* dword 4-7 (m2) is the 4D space position
* dword 8-15 (m3,m4) of the vertex header is the user clip distance if
- * enabled. We don't use it, so skip it.
- * m3 is the first vertex element data we fill, which is the vertex
- * position.
+ * enabled.
+ * m3 or 5 is the first vertex element data we fill, which is
+ * the vertex position.
*/
brw_MOV(p, brw_message_reg(2), pos);
len_vertex_header = 1;
+ if (c->key.nr_userclip > 0)
+ len_vertex_header += 2;
} else if (intel->gen == 5) {
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:
* dword 0-3 (m1) of the header is indices, point width, clip flags.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 8fc960b445..d6aa9f957a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -113,6 +113,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
struct brw_wm_ref *ref = get_ref(c);
c->prog_data.param[i] = param_ptr;
+ c->prog_data.param_convert[i] = PARAM_NO_CONVERT;
c->nr_creg = (i+16)/16;
/* Push the offsets into hw_reg. These will be added to the
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 433ccc66f0..9a27b93710 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -137,9 +137,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* If using the fragment shader backend, the program is always
* 8-wide.
*/
- if (ctx->Shader.CurrentProgram) {
+ if (ctx->Shader.CurrentFragmentProgram) {
struct brw_shader *shader = (struct brw_shader *)
- ctx->Shader.CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
+ ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
if (shader != NULL && shader->ir != NULL) {
key->is_glsl = GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index dd5ddea920..76fc94df1f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -343,7 +343,8 @@ prepare_wm_constants(struct brw_context *brw)
drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
constants = brw->wm.const_bo->virtual;
for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
- constants[i] = *brw->wm.prog_data->pull_param[i];
+ constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i],
+ *brw->wm.prog_data->pull_param[i]);
}
drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index 0d6e923f73..800a255521 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -254,7 +254,7 @@ prepare_color_calc_state(struct brw_context *brw)
const struct brw_tracked_state gen6_color_calc_state = {
.dirty = {
- .mesa = _NEW_COLOR,
+ .mesa = _NEW_COLOR | _NEW_STENCIL,
.brw = 0,
.cache = 0,
},
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index cd2ac9d92f..c65b41e2b6 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -28,6 +28,7 @@
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
+#include "brw_util.h"
#include "intel_batchbuffer.h"
static void
@@ -36,7 +37,7 @@ upload_clip_state(struct brw_context *brw)
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
uint32_t depth_clamp = 0;
- uint32_t provoking;
+ uint32_t provoking, userclip;
if (!ctx->Transform.DepthClamp)
depth_clamp = GEN6_CLIP_Z_TEST;
@@ -50,6 +51,9 @@ upload_clip_state(struct brw_context *brw)
(1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
}
+ /* _NEW_TRANSFORM */
+ userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1;
+
BEGIN_BATCH(4);
OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
@@ -57,6 +61,7 @@ upload_clip_state(struct brw_context *brw)
GEN6_CLIP_API_OGL |
GEN6_CLIP_MODE_NORMAL |
GEN6_CLIP_XY_TEST |
+ userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
depth_clamp |
provoking);
OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX);
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index 55a70bea62..471067e8f0 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -73,12 +73,19 @@ upload_sf_state(struct brw_context *brw)
/* _NEW_BUFFER */
GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
int attr = 0;
+ int urb_start;
+
+ /* _NEW_TRANSFORM */
+ if (ctx->Transform.ClipPlanesEnabled)
+ urb_start = 2;
+ else
+ urb_start = 1;
dw1 =
GEN6_SF_SWIZZLE_ENABLE |
num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
(num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
- 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+ urb_start << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
GEN6_SF_STATISTICS_ENABLE;
dw3 = 0;
@@ -195,7 +202,9 @@ const struct brw_tracked_state gen6_sf_state = {
_NEW_POLYGON |
_NEW_LINE |
_NEW_SCISSOR |
- _NEW_BUFFERS),
+ _NEW_BUFFERS |
+ _NEW_POINT |
+ _NEW_TRANSFORM),
.brw = BRW_NEW_CONTEXT,
.cache = CACHE_NEW_VS_PROG
},
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 304eaddf40..d8da216d15 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -40,11 +40,11 @@ upload_vs_state(struct brw_context *brw)
struct gl_context *ctx = &intel->ctx;
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
- unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
+ unsigned int nr_params = brw->vs.prog_data->nr_params / 4;
drm_intel_bo *constant_bo;
int i;
- if (vp->use_const_buffer || nr_params == 0) {
+ if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
@@ -54,6 +54,9 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
+ int params_uploaded = 0;
+ float *param;
+
if (brw->vertex_program->IsNVProgram)
_mesa_load_tracked_matrices(ctx);
@@ -63,14 +66,55 @@ upload_vs_state(struct brw_context *brw)
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
- nr_params * 4 * sizeof(float),
+ (MAX_CLIP_PLANES + nr_params) *
+ 4 * sizeof(float),
4096);
drm_intel_gem_bo_map_gtt(constant_bo);
- for (i = 0; i < nr_params; i++) {
- memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
- vp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
+ param = constant_bo->virtual;
+
+ /* This should be loaded like any other param, but it's ad-hoc
+ * until we redo the VS backend.
+ */
+ for (i = 0; i < MAX_CLIP_PLANES; i++) {
+ if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+ memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float));
+ param += 4;
+ params_uploaded++;
+ }
}
+ /* Align to a reg for convenience for brw_vs_emit.c */
+ if (params_uploaded & 1) {
+ param += 4;
+ params_uploaded++;
+ }
+
+ if (vp->use_const_buffer) {
+ for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+ if (brw->vs.constant_map[i] != -1) {
+ memcpy(param + brw->vs.constant_map[i] * 4,
+ vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ params_uploaded++;
+ }
+ }
+ } else {
+ for (i = 0; i < nr_params; i++) {
+ memcpy(param, vp->program.Base.Parameters->ParameterValues[i],
+ 4 * sizeof(float));
+ param += 4;
+ params_uploaded++;
+ }
+ }
+
+ if (0) {
+ printf("VS constant buffer:\n");
+ for (i = 0; i < params_uploaded; i++) {
+ float *buf = (float *)constant_bo->virtual + i * 4;
+ printf("%d: %f %f %f %f\n",
+ i, buf[0], buf[1], buf[2], buf[3]);
+ }
+ }
+
drm_intel_gem_bo_unmap_gtt(constant_bo);
BEGIN_BATCH(5);
@@ -79,7 +123,7 @@ upload_vs_state(struct brw_context *brw)
(5 - 2));
OUT_RELOC(constant_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(nr_params, 2) / 2 - 1);
+ ALIGN(params_uploaded, 2) / 2 - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 82d127720d..36d4ab93ba 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -63,7 +63,8 @@ prepare_wm_constants(struct brw_context *brw)
drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo);
constants = brw->wm.push_const_bo->virtual;
for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
- constants[i] = *brw->wm.prog_data->param[i];
+ constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
+ *brw->wm.prog_data->param[i]);
}
drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
}
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 974045730b..556a4195bd 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -55,6 +55,7 @@
#define need_GL_EXT_point_parameters
#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
+#define need_GL_EXT_separate_shader_objects
#define need_GL_EXT_stencil_two_side
#define need_GL_EXT_timer_query
#define need_GL_APPLE_vertex_array_object
@@ -114,6 +115,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_EXT_packed_depth_stencil", NULL },
{ "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{ "GL_EXT_secondary_color", GL_EXT_secondary_color_functions },
+ { "GL_EXT_separate_shader_objects", GL_EXT_separate_shader_objects_functions },
{ "GL_EXT_stencil_wrap", NULL },
{ "GL_EXT_texture_edge_clamp", NULL },
{ "GL_EXT_texture_env_combine", NULL },
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 4d9120ffd0..5556927357 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -32,15 +32,12 @@
#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
-struct peephole_state {
- struct rc_instruction * Inst;
- /** Stores a bitmask of the components that are still "alive" (i.e.
- * they have not been written to since Inst was executed.)
- */
- unsigned int WriteMask;
+struct src_clobbered_data {
+ unsigned int NumSrcRegs;
+ unsigned int SrcMasks[3];
};
-typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
struct rc_instruction *,
unsigned int);
@@ -67,27 +64,6 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
return combine;
}
-struct copy_propagate_state {
- struct radeon_compiler * C;
- struct rc_instruction * Mov;
- unsigned int Conflict:1;
-
- /** Whether Mov's source has been clobbered */
- unsigned int SourceClobbered:1;
-
- /** Which components of Mov's destination register are still from that Mov? */
- unsigned int MovMask:4;
-
- /** Which components of Mov's destination register are clearly *not* from that Mov */
- unsigned int DefinedMask:4;
-
- /** Which components of Mov's source register are sourced */
- unsigned int SourcedMask:4;
-
- /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
- int BranchDepth;
-};
-
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
struct rc_src_register * src)
{
@@ -123,24 +99,36 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
}
}
-static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+static void is_src_clobbered_scan_write(
+ void * data,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
{
+ unsigned int i;
struct rc_reader_data * reader_data = data;
- struct copy_propagate_state * s = reader_data->CbData;
+ struct src_clobbered_data * d = reader_data->CbData;
+ for (i = 0; i < d->NumSrcRegs; i++) {
+ if (file == reader_data->Writer->U.I.SrcReg[i].File
+ && index == reader_data->Writer->U.I.SrcReg[i].Index
+ && (mask & d->SrcMasks[i])){
- if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
- if (mask & s->SourcedMask)
reader_data->AbortOnRead = 1;
- } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
- reader_data->AbortOnRead = 1;
+ return;
+ }
+ if (reader_data->Writer->U.I.SrcReg[i].RelAddr &&
+ file == RC_FILE_ADDRESS) {
+ reader_data->AbortOnRead = 1;
+ return;
+ }
}
}
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
- struct copy_propagate_state s;
struct rc_reader_data reader_data;
+ struct src_clobbered_data sc_data;
unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
@@ -149,22 +137,15 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
inst_mov->U.I.SaturateMode)
return;
- memset(&s, 0, sizeof(s));
- s.C = c;
- s.Mov = inst_mov;
- s.MovMask = inst_mov->U.I.DstReg.WriteMask;
- s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+ sc_data.NumSrcRegs = 1;
+ sc_data.SrcMasks[0] = rc_swizzle_to_writemask(
+ inst_mov->U.I.SrcReg[0].Swizzle);
- reader_data.CbData = &s;
-
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
- s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
- }
+ reader_data.CbData = &sc_data;
/* Get a list of all the readers of this MOV instruction. */
rc_get_readers_normal(c, inst_mov, &reader_data,
- copy_propagate_scan_read, copy_propagate_scan_write);
+ copy_propagate_scan_read, is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
@@ -172,10 +153,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst;
- *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
+ *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]);
- if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
- inst->U.I.PreSub = s.Mov->U.I.PreSub;
+ if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = inst_mov->U.I.PreSub;
}
/* Finally, remove the original MOV instruction */
@@ -431,129 +412,99 @@ static int src_has_const_swz(struct rc_src_register src) {
return 0;
}
-static void peephole_scan_write(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+static void presub_scan_read(
+ void * data,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
{
- struct peephole_state * s = data;
- if(s->Inst->U.I.DstReg.File == file
- && s->Inst->U.I.DstReg.Index == index) {
- unsigned int common_mask = s->WriteMask & mask;
- s->WriteMask &= ~common_mask;
+ struct rc_reader_data * reader_data = data;
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(inst->U.I.Opcode);
+ /* XXX: There are some situations where instructions
+ * with more than 2 src registers can use the
+ * presubtract select, but to keep things simple we
+ * will disable presubtract on these instructions for
+ * now. */
+ if (info->NumSrcRegs > 2 || info->HasTexture) {
+ reader_data->Abort = 1;
+ return;
+ }
+
+ /* We can't use more than one presubtract value in an
+ * instruction, unless the two prsubtract operations
+ * are the same and read from the same registers.
+ * XXX For now we will limit instructions to only one presubtract
+ * value.*/
+ if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+ reader_data->Abort = 1;
+ return;
}
}
static int presub_helper(
struct radeon_compiler * c,
- struct peephole_state * s,
+ struct rc_instruction * inst_add,
rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace)
{
- struct rc_instruction * inst;
- unsigned int can_remove = 0;
- unsigned int cant_sub = 0;
-
- for(inst = s->Inst->Next; inst != &c->Program.Instructions;
- inst = inst->Next) {
- unsigned int i;
- unsigned char can_use_presub = 1;
- const struct rc_opcode_info * info =
- rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX: There are some situations where instructions
- * with more than 2 src registers can use the
- * presubtract select, but to keep things simple we
- * will disable presubtract on these instructions for
- * now. */
- if (info->NumSrcRegs > 2 || info->HasTexture) {
- can_use_presub = 0;
- }
+ struct rc_reader_data reader_data;
+ struct src_clobbered_data sc_data;
+ unsigned int i;
- /* We can't use more than one presubtract value in an
- * instruction, unless the two prsubtract operations
- * are the same and read from the same registers. */
- if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
- if (inst->U.I.PreSub.Opcode != presub_opcode
- || inst->U.I.PreSub.SrcReg[0].File !=
- s->Inst->U.I.SrcReg[1].File
- || inst->U.I.PreSub.SrcReg[0].Index !=
- s->Inst->U.I.SrcReg[1].Index) {
- can_use_presub = 0;
- }
- }
+ sc_data.NumSrcRegs = 2;
+ sc_data.SrcMasks[0] = rc_swizzle_to_writemask(
+ inst_add->U.I.SrcReg[0].Swizzle);
+ sc_data.SrcMasks[1] = rc_swizzle_to_writemask(
+ inst_add->U.I.SrcReg[1].Swizzle);
+ reader_data.CbData = &sc_data;
+ rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read,
+ is_src_clobbered_scan_write);
- /* Even if the instruction can't use a presubtract operation
- * we still need to check if the instruction reads from
- * s->Inst->U.I.DstReg, because if it does we must not
- * remove s->Inst. */
- for(i = 0; i < info->NumSrcRegs; i++) {
- unsigned int mask = src_reads_dst_mask(
- inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
- /* XXX We could be more aggressive here using
- * presubtract. It is okay if SrcReg[i] only reads
- * from some of the mask components. */
- if(s->Inst->U.I.DstReg.WriteMask != mask) {
- if (s->Inst->U.I.DstReg.WriteMask & mask) {
- can_remove = 0;
- break;
- } else {
- continue;
- }
- }
- if (cant_sub || !can_use_presub) {
- can_remove = 0;
- break;
- }
- presub_replace(s, inst, i);
- can_remove = 1;
- }
- if(!can_remove)
- break;
- rc_for_all_writes_mask(inst, peephole_scan_write, s);
- /* If all components of inst_add's destination register have
- * been written to by subsequent instructions, the original
- * value of the destination register is no longer valid and
- * we can't keep doing substitutions. */
- if (!s->WriteMask){
- break;
- }
- /* Make this instruction doesn't write to the presubtract source. */
- if (inst->U.I.DstReg.WriteMask &
- src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
- inst->U.I.DstReg)
- || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
- inst->U.I.DstReg)
- || info->IsFlowControl) {
- cant_sub = 1;
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
+ return 0;
+
+ for(i = 0; i < reader_data.ReaderCount; i++) {
+ unsigned int src_index;
+ struct rc_reader reader = reader_data.Readers[i];
+ const struct rc_opcode_info * info =
+ rc_get_opcode_info(reader.Inst->U.I.Opcode);
+
+ for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
+ if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src)
+ presub_replace(inst_add, reader.Inst, src_index);
}
}
- return can_remove;
+ return 1;
}
-/* This function assumes that s->Inst->U.I.SrcReg[0] and
- * s->Inst->U.I.SrcReg[1] aren't both negative. */
-static void presub_replace_add(struct peephole_state *s,
- struct rc_instruction * inst,
- unsigned int src_index)
+/* This function assumes that inst_add->U.I.SrcReg[0] and
+ * inst_add->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
{
rc_presubtract_op presub_opcode;
- if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
+ if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
presub_opcode = RC_PRESUB_SUB;
else
presub_opcode = RC_PRESUB_ADD;
- if (s->Inst->U.I.SrcReg[1].Negate) {
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
- inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
+ if (inst_add->U.I.SrcReg[1].Negate) {
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
} else {
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
- inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
+ inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
}
- inst->U.I.PreSub.SrcReg[0].Negate = 0;
- inst->U.I.PreSub.SrcReg[1].Negate = 0;
- inst->U.I.PreSub.Opcode = presub_opcode;
- inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
- inst->U.I.PreSub.SrcReg[0]);
- inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
- inst->U.I.SrcReg[src_index].Index = presub_opcode;
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = presub_opcode;
+ inst_reader->U.I.SrcReg[src_index] =
+ chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
}
static int is_presub_candidate(struct rc_instruction * inst)
@@ -578,7 +529,6 @@ static int peephole_add_presub_add(
struct rc_src_register * src0 = NULL;
struct rc_src_register * src1 = NULL;
unsigned int i;
- struct peephole_state s;
if (!is_presub_candidate(inst_add))
return 0;
@@ -604,30 +554,28 @@ static int peephole_add_presub_add(
if (!src1)
return 0;
- s.Inst = inst_add;
- s.WriteMask = inst_add->U.I.DstReg.WriteMask;
- if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+ if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
-static void presub_replace_inv(struct peephole_state * s,
- struct rc_instruction * inst,
- unsigned int src_index)
+static void presub_replace_inv(
+ struct rc_instruction * inst_add,
+ struct rc_instruction * inst_reader,
+ unsigned int src_index)
{
- /* We must be careful not to modify s->Inst, since it
- * is possible it will remain part of the program.
- * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
- inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
- inst->U.I.PreSub.SrcReg[0].Negate = 0;
- inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
- inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
- inst->U.I.PreSub.SrcReg[0]);
-
- inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
- inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+ /* We must be careful not to modify inst_add, since it
+ * is possible it will remain part of the program.*/
+ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+ inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
+ inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
+ inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
+ inst_reader->U.I.PreSub.SrcReg[0]);
+
+ inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+ inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
}
/**
@@ -645,7 +593,6 @@ static int peephole_add_presub_inv(
struct rc_instruction * inst_add)
{
unsigned int i, swz, mask;
- struct peephole_state s;
if (!is_presub_candidate(inst_add))
return 0;
@@ -674,11 +621,7 @@ static int peephole_add_presub_inv(
return 0;
}
- /* Setup the peephole_state information. */
- s.Inst = inst_add;
- s.WriteMask = inst_add->U.I.DstReg.WriteMask;
-
- if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
+ if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add);
return 1;
}
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
index 0c0eeca1fc..6e51832c87 100644
--- a/src/mesa/drivers/dri/r600/evergreen_render.c
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -909,6 +909,10 @@ static void evergreenDrawPrims(struct gl_context *ctx,
{
GLboolean retval = GL_FALSE;
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ radeon_prepare_render(radeon);
+
/* This check should get folded into just the places that
* min/max index are really needed.
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index f5b0df6ef5..ff3506b225 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -412,6 +412,8 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
return GL_TRUE;
radeon_prepare_render(&rmesa->radeon);
+ if (rmesa->radeon.NewGLState)
+ radeonValidateState( ctx );
tnl->Driver.Render.Start( ctx );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index c59b413012..5d2e8f4870 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -253,6 +253,8 @@ void radeonTclPrimitive( struct gl_context *ctx,
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
radeon_prepare_render(&rmesa->radeon);
+ if (rmesa->radeon.NewGLState)
+ radeonValidateState( ctx );
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {