diff options
Diffstat (limited to 'src/mesa/drivers')
20 files changed, 389 insertions, 234 deletions
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 9946bf1990..1bfd76a665 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -143,7 +143,10 @@ struct save_state struct gl_vertex_program *VertexProgram; GLboolean FragmentProgramEnabled; struct gl_fragment_program *FragmentProgram; - GLuint Shader; + GLuint VertexShader; + GLuint GeometryShader; + GLuint FragmentShader; + GLuint ActiveShader; /** META_STENCIL_TEST */ struct gl_stencil_attrib Stencil; @@ -433,8 +436,15 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) } if (ctx->Extensions.ARB_shader_objects) { - save->Shader = ctx->Shader.CurrentProgram ? - ctx->Shader.CurrentProgram->Name : 0; + save->VertexShader = ctx->Shader.CurrentVertexProgram ? + ctx->Shader.CurrentVertexProgram->Name : 0; + save->GeometryShader = ctx->Shader.CurrentGeometryProgram ? + ctx->Shader.CurrentGeometryProgram->Name : 0; + save->FragmentShader = ctx->Shader.CurrentFragmentProgram ? + ctx->Shader.CurrentFragmentProgram->Name : 0; + save->ActiveShader = ctx->Shader.ActiveProgram ? + ctx->Shader.ActiveProgram->Name : 0; + _mesa_UseProgramObjectARB(0); } } @@ -664,9 +674,17 @@ _mesa_meta_end(struct gl_context *ctx) _mesa_reference_fragprog(ctx, &save->FragmentProgram, NULL); } - if (ctx->Extensions.ARB_shader_objects) { - _mesa_UseProgramObjectARB(save->Shader); - } + if (ctx->Extensions.ARB_vertex_shader) + _mesa_UseShaderProgramEXT(GL_VERTEX_SHADER, save->VertexShader); + + if (ctx->Extensions.ARB_geometry_shader4) + _mesa_UseShaderProgramEXT(GL_GEOMETRY_SHADER_ARB, + save->GeometryShader); + + if (ctx->Extensions.ARB_fragment_shader) + _mesa_UseShaderProgramEXT(GL_FRAGMENT_SHADER, save->FragmentShader); + + _mesa_ActiveProgramEXT(save->ActiveShader); } if (state & META_STENCIL_TEST) { diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile b/src/mesa/drivers/dri/common/xmlpool/Makefile index 62ec919ea6..b71629e9f1 100644 --- a/src/mesa/drivers/dri/common/xmlpool/Makefile +++ b/src/mesa/drivers/dri/common/xmlpool/Makefile @@ -62,7 +62,7 @@ clean: # Default target options.h options.h: t_options.h mo - python gen_xmlpool.py $(LANGS) > options.h + $(PYTHON2) $(PYTHON_FLAGS) gen_xmlpool.py $(LANGS) > options.h # Update .mo files from the corresponding .po files. mo: diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4a0709b446..335339515a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -188,6 +188,13 @@ struct brw_shader_program { struct gl_shader_program base; }; +enum param_conversion { + PARAM_NO_CONVERT, + PARAM_CONVERT_F2I, + PARAM_CONVERT_F2U, + PARAM_CONVERT_F2B, +}; + /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state * corresponding to a different brw_wm_prog_key struct, with different @@ -208,8 +215,10 @@ struct brw_wm_prog_data { /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). */ - const GLfloat *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ - const GLfloat *pull_param[MAX_UNIFORMS * 4]; + const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */ + enum param_conversion param_convert[MAX_UNIFORMS * 4]; + const float *pull_param[MAX_UNIFORMS * 4]; + enum param_conversion pull_param_convert[MAX_UNIFORMS * 4]; }; struct brw_sf_prog_data { @@ -800,6 +809,35 @@ brw_fragment_program_const(const struct gl_fragment_program *p) return (const struct brw_fragment_program *) p; } +static inline +float convert_param(enum param_conversion conversion, float param) +{ + union { + float f; + uint32_t u; + int32_t i; + } fi; + + switch (conversion) { + case PARAM_NO_CONVERT: + return param; + case PARAM_CONVERT_F2I: + fi.i = param; + return fi.f; + case PARAM_CONVERT_F2U: + fi.u = param; + return fi.f; + case PARAM_CONVERT_F2B: + if (param != 0.0) + fi.i = 1; + else + fi.i = 0; + return fi.f; + default: + return param; + } +} + GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9ce0d8decd..7b823eb201 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -199,8 +199,10 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.wm_start * 16; /* copy float constants */ - for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = *brw->wm.prog_data->param[i]; + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + buf[offset + i] = convert_param(brw->wm.prog_data->param_convert[i], + *brw->wm.prog_data->param[i]); + } } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6c3db61035..239586a036 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -930,6 +930,11 @@ #define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ /* DW1 */ # define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 /* DW2 */ # define GEN6_CLIP_ENABLE (1 << 31) # define GEN6_CLIP_API_OGL (0 << 30) @@ -937,6 +942,8 @@ # define GEN6_CLIP_XY_TEST (1 << 28) # define GEN6_CLIP_Z_TEST (1 << 27) # define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 # define GEN6_CLIP_MODE_NORMAL (0 << 13) # define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) # define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 4919394bc8..2ed59d3f5d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -228,6 +228,7 @@ brw_type_for_base_type(const struct glsl_type *type) return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_ARRAY: case GLSL_TYPE_STRUCT: + case GLSL_TYPE_SAMPLER: /* These should be overridden with the type of the member when * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely * way to trip up if we don't. @@ -286,8 +287,26 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_BOOL: vec_values = fp->Base.Parameters->ParameterValues[loc]; for (unsigned int i = 0; i < type->vector_elements; i++) { - assert(c->prog_data.nr_params < ARRAY_SIZE(c->prog_data.param)); - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i]; + unsigned int param = c->prog_data.nr_params++; + + assert(param < ARRAY_SIZE(c->prog_data.param)); + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; + case GLSL_TYPE_UINT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2U; + break; + case GLSL_TYPE_INT: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2I; + break; + case GLSL_TYPE_BOOL: + c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; + break; + } + + c->prog_data.param[param] = &vec_values[i]; } return 1; @@ -371,6 +390,8 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) break; last_swiz = swiz; + c->prog_data.param_convert[c->prog_data.nr_params] = + PARAM_NO_CONVERT; c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; } } @@ -625,6 +646,7 @@ fs_visitor::visit(ir_variable *ir) } reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index); + reg->type = brw_type_for_base_type(ir->type); } if (!reg) @@ -901,12 +923,21 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_bit_not: - case ir_unop_u2f: - case ir_binop_lshift: - case ir_binop_rshift: + inst = emit(fs_inst(BRW_OPCODE_NOT, this->result, op[0])); + break; case ir_binop_bit_and: + inst = emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1])); + break; case ir_binop_bit_xor: + inst = emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1])); + break; case ir_binop_bit_or: + inst = emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1])); + break; + + case ir_unop_u2f: + case ir_binop_lshift: + case ir_binop_rshift: assert(!"GLSL 1.30 features unsupported"); break; } @@ -1186,7 +1217,7 @@ fs_visitor::visit(ir_texture *ir) assert(!ir->projector); sampler = _mesa_get_sampler_uniform_value(ir->sampler, - ctx->Shader.CurrentProgram, + ctx->Shader.CurrentFragmentProgram, &brw->fragment_program->Base); sampler = c->fp->program.Base.SamplerUnits[sampler]; @@ -1204,6 +1235,11 @@ fs_visitor::visit(ir_texture *ir) 0 }; + c->prog_data.param_convert[c->prog_data.nr_params] = + PARAM_NO_CONVERT; + c->prog_data.param_convert[c->prog_data.nr_params + 1] = + PARAM_NO_CONVERT; + fs_reg scale_x = fs_reg(UNIFORM, c->prog_data.nr_params); fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); GLuint index = _mesa_add_state_reference(params, @@ -2359,7 +2395,7 @@ fs_visitor::assign_curb_setup() constant_nr % 8); inst->src[i].file = FIXED_HW_REG; - inst->src[i].fixed_hw_reg = brw_reg; + inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type); } } } @@ -2566,6 +2602,8 @@ fs_visitor::setup_pull_constants() for (int i = 0; i < pull_uniform_count; i++) { c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i]; + c->prog_data.pull_param_convert[i] = + c->prog_data.param_convert[pull_uniform_base + i]; } c->prog_data.nr_params -= pull_uniform_count; c->prog_data.nr_pull_params = pull_uniform_count; @@ -3093,7 +3131,7 @@ fs_visitor::generate_code() if (INTEL_DEBUG & DEBUG_WM) { printf("Native code for fragment shader %d:\n", - ctx->Shader.CurrentProgram->Name); + ctx->Shader.CurrentFragmentProgram->Name); } if_depth_in_loop[loop_stack_depth] = 0; @@ -3320,7 +3358,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; - struct gl_shader_program *prog = ctx->Shader.CurrentProgram; + struct gl_shader_program *prog = ctx->Shader.CurrentFragmentProgram; if (!prog) return GL_FALSE; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index ce33479996..e560cdac1f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -165,13 +165,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); - } + if (intel->gen >= 6) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } - /* Deal with curbe alignment: - */ - reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: @@ -253,9 +260,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (intel->gen >= 6) - mrf = 3; /* no more pos store in attribute */ - else if (intel->gen == 5) + if (intel->gen >= 6) { + mrf = 3; + if (c->key.nr_userclip) + mrf += 2; + } else if (intel->gen == 5) mrf = 8; else mrf = 4; @@ -372,9 +381,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* See emit_vertex_write() for where the VUE's overhead on top of the * attributes comes from. */ - if (intel->gen >= 6) - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8; - else if (intel->gen == 5) + if (intel->gen >= 6) { + int header_regs = 2; + if (c->key.nr_userclip) + header_regs += 2; + + c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8; + } else if (intel->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1392,9 +1405,33 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug) - { + if (intel->gen >= 6) { + struct brw_reg m1 = brw_message_reg(1); + + /* On gen6, m1 has each value in a separate dword, so we never + * need to mess with a temporary for computing the m1 value. + */ + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + brw_MOV(p, brw_writemask(m1, WRITEMASK_W), + brw_swizzle1(c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ], 0)); + } + + /* Set the user clip distances in dword 8-15. (m3-4)*/ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(3); + else + m = brw_message_reg(4); + + brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]); + } + } + } else if ((c->prog_data.outputs_written & + BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1404,11 +1441,10 @@ static void emit_vertex_write( struct brw_vs_compile *c) if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; - if (intel->gen < 6) { - brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); - } else - brw_MOV(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0)); + brw_MUL(p, brw_writemask(header1, WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, WRITEMASK_W), + header1, brw_imm_ud(0x7ff<<8)); } for (i = 0; i < c->key.nr_userclip; i++) { @@ -1461,12 +1497,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) * dword 0-3 (m1) of the header is indices, point width, clip flags. * dword 4-7 (m2) is the 4D space position * dword 8-15 (m3,m4) of the vertex header is the user clip distance if - * enabled. We don't use it, so skip it. - * m3 is the first vertex element data we fill, which is the vertex - * position. + * enabled. + * m3 or 5 is the first vertex element data we fill, which is + * the vertex position. */ brw_MOV(p, brw_message_reg(2), pos); len_vertex_header = 1; + if (c->key.nr_userclip > 0) + len_vertex_header += 2; } else if (intel->gen == 5) { /* There are 20 DWs (D0-D19) in VUE header on Ironlake: * dword 0-3 (m1) of the header is indices, point width, clip flags. diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 8fc960b445..d6aa9f957a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -113,6 +113,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, struct brw_wm_ref *ref = get_ref(c); c->prog_data.param[i] = param_ptr; + c->prog_data.param_convert[i] = PARAM_NO_CONVERT; c->nr_creg = (i+16)/16; /* Push the offsets into hw_reg. These will be added to the diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 433ccc66f0..9a27b93710 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -137,9 +137,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* If using the fragment shader backend, the program is always * 8-wide. */ - if (ctx->Shader.CurrentProgram) { + if (ctx->Shader.CurrentFragmentProgram) { struct brw_shader *shader = (struct brw_shader *) - ctx->Shader.CurrentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; + ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL && shader->ir != NULL) { key->is_glsl = GL_TRUE; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index dd5ddea920..76fc94df1f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -343,7 +343,8 @@ prepare_wm_constants(struct brw_context *brw) drm_intel_gem_bo_map_gtt(brw->wm.const_bo); constants = brw->wm.const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) { - constants[i] = *brw->wm.prog_data->pull_param[i]; + constants[i] = convert_param(brw->wm.prog_data->pull_param_convert[i], + *brw->wm.prog_data->pull_param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 0d6e923f73..800a255521 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -254,7 +254,7 @@ prepare_color_calc_state(struct brw_context *brw) const struct brw_tracked_state gen6_color_calc_state = { .dirty = { - .mesa = _NEW_COLOR, + .mesa = _NEW_COLOR | _NEW_STENCIL, .brw = 0, .cache = 0, }, diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index cd2ac9d92f..c65b41e2b6 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -28,6 +28,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_util.h" #include "intel_batchbuffer.h" static void @@ -36,7 +37,7 @@ upload_clip_state(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t depth_clamp = 0; - uint32_t provoking; + uint32_t provoking, userclip; if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -50,6 +51,9 @@ upload_clip_state(struct brw_context *brw) (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } + /* _NEW_TRANSFORM */ + userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; + BEGIN_BATCH(4); OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); @@ -57,6 +61,7 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | + userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 55a70bea62..471067e8f0 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -73,12 +73,19 @@ upload_sf_state(struct brw_context *brw) /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; + int urb_start; + + /* _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) + urb_start = 2; + else + urb_start = 1; dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + urb_start << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | GEN6_SF_STATISTICS_ENABLE; dw3 = 0; @@ -195,7 +202,9 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POLYGON | _NEW_LINE | _NEW_SCISSOR | - _NEW_BUFFERS), + _NEW_BUFFERS | + _NEW_POINT | + _NEW_TRANSFORM), .brw = BRW_NEW_CONTEXT, .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 304eaddf40..d8da216d15 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -40,11 +40,11 @@ upload_vs_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); - unsigned int nr_params = vp->program.Base.Parameters->NumParameters; + unsigned int nr_params = brw->vs.prog_data->nr_params / 4; drm_intel_bo *constant_bo; int i; - if (vp->use_const_buffer || nr_params == 0) { + if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); @@ -54,6 +54,9 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { + int params_uploaded = 0; + float *param; + if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); @@ -63,14 +66,55 @@ upload_vs_state(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - nr_params * 4 * sizeof(float), + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), 4096); drm_intel_gem_bo_map_gtt(constant_bo); - for (i = 0; i < nr_params; i++) { - memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); + param = constant_bo->virtual; + + /* This should be loaded like any other param, but it's ad-hoc + * until we redo the VS backend. + */ + for (i = 0; i < MAX_CLIP_PLANES; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } } + /* Align to a reg for convenience for brw_vs_emit.c */ + if (params_uploaded & 1) { + param += 4; + params_uploaded++; + } + + if (vp->use_const_buffer) { + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + params_uploaded++; + } + } + } else { + for (i = 0; i < nr_params; i++) { + memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + } + + if (0) { + printf("VS constant buffer:\n"); + for (i = 0; i < params_uploaded; i++) { + float *buf = (float *)constant_bo->virtual + i * 4; + printf("%d: %f %f %f %f\n", + i, buf[0], buf[1], buf[2], buf[3]); + } + } + drm_intel_gem_bo_unmap_gtt(constant_bo); BEGIN_BATCH(5); @@ -79,7 +123,7 @@ upload_vs_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(nr_params, 2) / 2 - 1); + ALIGN(params_uploaded, 2) / 2 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index 82d127720d..36d4ab93ba 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -63,7 +63,8 @@ prepare_wm_constants(struct brw_context *brw) drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo); constants = brw->wm.push_const_bo->virtual; for (i = 0; i < brw->wm.prog_data->nr_params; i++) { - constants[i] = *brw->wm.prog_data->param[i]; + constants[i] = convert_param(brw->wm.prog_data->param_convert[i], + *brw->wm.prog_data->param[i]); } drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 974045730b..556a4195bd 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -55,6 +55,7 @@ #define need_GL_EXT_point_parameters #define need_GL_EXT_provoking_vertex #define need_GL_EXT_secondary_color +#define need_GL_EXT_separate_shader_objects #define need_GL_EXT_stencil_two_side #define need_GL_EXT_timer_query #define need_GL_APPLE_vertex_array_object @@ -114,6 +115,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_packed_depth_stencil", NULL }, { "GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions }, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { "GL_EXT_separate_shader_objects", GL_EXT_separate_shader_objects_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, { "GL_EXT_texture_env_combine", NULL }, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 4d9120ffd0..5556927357 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -32,15 +32,12 @@ #include "radeon_compiler_util.h" #include "radeon_swizzle.h" -struct peephole_state { - struct rc_instruction * Inst; - /** Stores a bitmask of the components that are still "alive" (i.e. - * they have not been written to since Inst was executed.) - */ - unsigned int WriteMask; +struct src_clobbered_data { + unsigned int NumSrcRegs; + unsigned int SrcMasks[3]; }; -typedef void (*rc_presub_replace_fn)(struct peephole_state *, +typedef void (*rc_presub_replace_fn)(struct rc_instruction *, struct rc_instruction *, unsigned int); @@ -67,27 +64,6 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct return combine; } -struct copy_propagate_state { - struct radeon_compiler * C; - struct rc_instruction * Mov; - unsigned int Conflict:1; - - /** Whether Mov's source has been clobbered */ - unsigned int SourceClobbered:1; - - /** Which components of Mov's destination register are still from that Mov? */ - unsigned int MovMask:4; - - /** Which components of Mov's destination register are clearly *not* from that Mov */ - unsigned int DefinedMask:4; - - /** Which components of Mov's source register are sourced */ - unsigned int SourcedMask:4; - - /** Branch depth beyond Mov; negative value indicates we left the Mov's block */ - int BranchDepth; -}; - static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, struct rc_src_register * src) { @@ -123,24 +99,36 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, } } -static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void is_src_clobbered_scan_write( + void * data, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) { + unsigned int i; struct rc_reader_data * reader_data = data; - struct copy_propagate_state * s = reader_data->CbData; + struct src_clobbered_data * d = reader_data->CbData; + for (i = 0; i < d->NumSrcRegs; i++) { + if (file == reader_data->Writer->U.I.SrcReg[i].File + && index == reader_data->Writer->U.I.SrcReg[i].Index + && (mask & d->SrcMasks[i])){ - if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) { - if (mask & s->SourcedMask) reader_data->AbortOnRead = 1; - } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { - reader_data->AbortOnRead = 1; + return; + } + if (reader_data->Writer->U.I.SrcReg[i].RelAddr && + file == RC_FILE_ADDRESS) { + reader_data->AbortOnRead = 1; + return; + } } } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { - struct copy_propagate_state s; struct rc_reader_data reader_data; + struct src_clobbered_data sc_data; unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || @@ -149,22 +137,15 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i inst_mov->U.I.SaturateMode) return; - memset(&s, 0, sizeof(s)); - s.C = c; - s.Mov = inst_mov; - s.MovMask = inst_mov->U.I.DstReg.WriteMask; - s.DefinedMask = RC_MASK_XYZW & ~s.MovMask; + sc_data.NumSrcRegs = 1; + sc_data.SrcMasks[0] = rc_swizzle_to_writemask( + inst_mov->U.I.SrcReg[0].Swizzle); - reader_data.CbData = &s; - - for(unsigned int chan = 0; chan < 4; ++chan) { - unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan); - s.SourcedMask |= (1 << swz) & RC_MASK_XYZW; - } + reader_data.CbData = &sc_data; /* Get a list of all the readers of this MOV instruction. */ rc_get_readers_normal(c, inst_mov, &reader_data, - copy_propagate_scan_read, copy_propagate_scan_write); + copy_propagate_scan_read, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; @@ -172,10 +153,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; - *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]); + *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]); - if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) - inst->U.I.PreSub = s.Mov->U.I.PreSub; + if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) + inst->U.I.PreSub = inst_mov->U.I.PreSub; } /* Finally, remove the original MOV instruction */ @@ -431,129 +412,99 @@ static int src_has_const_swz(struct rc_src_register src) { return 0; } -static void peephole_scan_write(void * data, struct rc_instruction * inst, - rc_register_file file, unsigned int index, unsigned int mask) +static void presub_scan_read( + void * data, + struct rc_instruction * inst, + struct rc_src_register * src) { - struct peephole_state * s = data; - if(s->Inst->U.I.DstReg.File == file - && s->Inst->U.I.DstReg.Index == index) { - unsigned int common_mask = s->WriteMask & mask; - s->WriteMask &= ~common_mask; + struct rc_reader_data * reader_data = data; + const struct rc_opcode_info * info = + rc_get_opcode_info(inst->U.I.Opcode); + /* XXX: There are some situations where instructions + * with more than 2 src registers can use the + * presubtract select, but to keep things simple we + * will disable presubtract on these instructions for + * now. */ + if (info->NumSrcRegs > 2 || info->HasTexture) { + reader_data->Abort = 1; + return; + } + + /* We can't use more than one presubtract value in an + * instruction, unless the two prsubtract operations + * are the same and read from the same registers. + * XXX For now we will limit instructions to only one presubtract + * value.*/ + if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { + reader_data->Abort = 1; + return; } } static int presub_helper( struct radeon_compiler * c, - struct peephole_state * s, + struct rc_instruction * inst_add, rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace) { - struct rc_instruction * inst; - unsigned int can_remove = 0; - unsigned int cant_sub = 0; - - for(inst = s->Inst->Next; inst != &c->Program.Instructions; - inst = inst->Next) { - unsigned int i; - unsigned char can_use_presub = 1; - const struct rc_opcode_info * info = - rc_get_opcode_info(inst->U.I.Opcode); - /* XXX: There are some situations where instructions - * with more than 2 src registers can use the - * presubtract select, but to keep things simple we - * will disable presubtract on these instructions for - * now. */ - if (info->NumSrcRegs > 2 || info->HasTexture) { - can_use_presub = 0; - } + struct rc_reader_data reader_data; + struct src_clobbered_data sc_data; + unsigned int i; - /* We can't use more than one presubtract value in an - * instruction, unless the two prsubtract operations - * are the same and read from the same registers. */ - if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) { - if (inst->U.I.PreSub.Opcode != presub_opcode - || inst->U.I.PreSub.SrcReg[0].File != - s->Inst->U.I.SrcReg[1].File - || inst->U.I.PreSub.SrcReg[0].Index != - s->Inst->U.I.SrcReg[1].Index) { - can_use_presub = 0; - } - } + sc_data.NumSrcRegs = 2; + sc_data.SrcMasks[0] = rc_swizzle_to_writemask( + inst_add->U.I.SrcReg[0].Swizzle); + sc_data.SrcMasks[1] = rc_swizzle_to_writemask( + inst_add->U.I.SrcReg[1].Swizzle); + reader_data.CbData = &sc_data; + rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read, + is_src_clobbered_scan_write); - /* Even if the instruction can't use a presubtract operation - * we still need to check if the instruction reads from - * s->Inst->U.I.DstReg, because if it does we must not - * remove s->Inst. */ - for(i = 0; i < info->NumSrcRegs; i++) { - unsigned int mask = src_reads_dst_mask( - inst->U.I.SrcReg[i], s->Inst->U.I.DstReg); - /* XXX We could be more aggressive here using - * presubtract. It is okay if SrcReg[i] only reads - * from some of the mask components. */ - if(s->Inst->U.I.DstReg.WriteMask != mask) { - if (s->Inst->U.I.DstReg.WriteMask & mask) { - can_remove = 0; - break; - } else { - continue; - } - } - if (cant_sub || !can_use_presub) { - can_remove = 0; - break; - } - presub_replace(s, inst, i); - can_remove = 1; - } - if(!can_remove) - break; - rc_for_all_writes_mask(inst, peephole_scan_write, s); - /* If all components of inst_add's destination register have - * been written to by subsequent instructions, the original - * value of the destination register is no longer valid and - * we can't keep doing substitutions. */ - if (!s->WriteMask){ - break; - } - /* Make this instruction doesn't write to the presubtract source. */ - if (inst->U.I.DstReg.WriteMask & - src_reads_dst_mask(s->Inst->U.I.SrcReg[1], - inst->U.I.DstReg) - || src_reads_dst_mask(s->Inst->U.I.SrcReg[0], - inst->U.I.DstReg) - || info->IsFlowControl) { - cant_sub = 1; + if (reader_data.Abort || reader_data.ReaderCount == 0) + return 0; + + for(i = 0; i < reader_data.ReaderCount; i++) { + unsigned int src_index; + struct rc_reader reader = reader_data.Readers[i]; + const struct rc_opcode_info * info = + rc_get_opcode_info(reader.Inst->U.I.Opcode); + + for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { + if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src) + presub_replace(inst_add, reader.Inst, src_index); } } - return can_remove; + return 1; } -/* This function assumes that s->Inst->U.I.SrcReg[0] and - * s->Inst->U.I.SrcReg[1] aren't both negative. */ -static void presub_replace_add(struct peephole_state *s, - struct rc_instruction * inst, - unsigned int src_index) +/* This function assumes that inst_add->U.I.SrcReg[0] and + * inst_add->U.I.SrcReg[1] aren't both negative. */ +static void presub_replace_add( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) { rc_presubtract_op presub_opcode; - if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate) + if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) presub_opcode = RC_PRESUB_SUB; else presub_opcode = RC_PRESUB_ADD; - if (s->Inst->U.I.SrcReg[1].Negate) { - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0]; + if (inst_add->U.I.SrcReg[1].Negate) { + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; } else { - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; - inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; + inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; } - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.SrcReg[1].Negate = 0; - inst->U.I.PreSub.Opcode = presub_opcode; - inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], - inst->U.I.PreSub.SrcReg[0]); - inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[src_index].Index = presub_opcode; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; + inst_reader->U.I.PreSub.Opcode = presub_opcode; + inst_reader->U.I.SrcReg[src_index] = + chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } static int is_presub_candidate(struct rc_instruction * inst) @@ -578,7 +529,6 @@ static int peephole_add_presub_add( struct rc_src_register * src0 = NULL; struct rc_src_register * src1 = NULL; unsigned int i; - struct peephole_state s; if (!is_presub_candidate(inst_add)) return 0; @@ -604,30 +554,28 @@ static int peephole_add_presub_add( if (!src1) return 0; - s.Inst = inst_add; - s.WriteMask = inst_add->U.I.DstReg.WriteMask; - if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) { + if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { rc_remove_instruction(inst_add); return 1; } return 0; } -static void presub_replace_inv(struct peephole_state * s, - struct rc_instruction * inst, - unsigned int src_index) +static void presub_replace_inv( + struct rc_instruction * inst_add, + struct rc_instruction * inst_reader, + unsigned int src_index) { - /* We must be careful not to modify s->Inst, since it - * is possible it will remain part of the program. - * XXX Maybe pass a struct instead of a pointer for s->Inst.*/ - inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; - inst->U.I.PreSub.SrcReg[0].Negate = 0; - inst->U.I.PreSub.Opcode = RC_PRESUB_INV; - inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], - inst->U.I.PreSub.SrcReg[0]); - - inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; - inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; + /* We must be careful not to modify inst_add, since it + * is possible it will remain part of the program.*/ + inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; + inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; + inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; + inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], + inst_reader->U.I.PreSub.SrcReg[0]); + + inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; + inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; } /** @@ -645,7 +593,6 @@ static int peephole_add_presub_inv( struct rc_instruction * inst_add) { unsigned int i, swz, mask; - struct peephole_state s; if (!is_presub_candidate(inst_add)) return 0; @@ -674,11 +621,7 @@ static int peephole_add_presub_inv( return 0; } - /* Setup the peephole_state information. */ - s.Inst = inst_add; - s.WriteMask = inst_add->U.I.DstReg.WriteMask; - - if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) { + if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { rc_remove_instruction(inst_add); return 1; } diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c index 0c0eeca1fc..6e51832c87 100644 --- a/src/mesa/drivers/dri/r600/evergreen_render.c +++ b/src/mesa/drivers/dri/r600/evergreen_render.c @@ -909,6 +909,10 @@ static void evergreenDrawPrims(struct gl_context *ctx, { GLboolean retval = GL_FALSE; + context_t *context = EVERGREEN_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index f5b0df6ef5..ff3506b225 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -412,6 +412,8 @@ static GLboolean radeon_run_render( struct gl_context *ctx, return GL_TRUE; radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.NewGLState) + radeonValidateState( ctx ); tnl->Driver.Render.Start( ctx ); diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index c59b413012..5d2e8f4870 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -253,6 +253,8 @@ void radeonTclPrimitive( struct gl_context *ctx, GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.NewGLState) + radeonValidateState( ctx ); if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { |