From e30a3e7aa09c373c0a02df555d090693718f0fe8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 27 Oct 2010 11:00:14 -0700 Subject: i965: Add user clip planes support to gen6. Fixes piglit user-clip, and compiz desktop switching when dragging a window and using just 2 desktops. Bug #30446. --- src/mesa/drivers/dri/i965/brw_defines.h | 7 +++ src/mesa/drivers/dri/i965/brw_vs_emit.c | 84 +++++++++++++++++++++-------- src/mesa/drivers/dri/i965/gen6_clip_state.c | 7 ++- src/mesa/drivers/dri/i965/gen6_sf_state.c | 12 ++++- src/mesa/drivers/dri/i965/gen6_vs_state.c | 44 +++++++++++++-- 5 files changed, 123 insertions(+), 31 deletions(-) (limited to 'src/mesa/drivers/dri') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6c3db61035..239586a036 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -930,6 +930,11 @@ #define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ /* DW1 */ # define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) +/** + * Just does cheap culling based on the clip distance. Bits must be + * disjoint with USER_CLIP_CLIP_DISTANCE bits. + */ +# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT 0 /* DW2 */ # define GEN6_CLIP_ENABLE (1 << 31) # define GEN6_CLIP_API_OGL (0 << 30) @@ -937,6 +942,8 @@ # define GEN6_CLIP_XY_TEST (1 << 28) # define GEN6_CLIP_Z_TEST (1 << 27) # define GEN6_CLIP_GB_TEST (1 << 26) +/** 8-bit field of which user clip distances to clip aganist. */ +# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT 16 # define GEN6_CLIP_MODE_NORMAL (0 << 13) # define GEN6_CLIP_MODE_REJECT_ALL (3 << 13) # define GEN6_CLIP_MODE_ACCEPT_ALL (4 << 13) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index ce33479996..e560cdac1f 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -165,13 +165,20 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); - } + if (intel->gen >= 6) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += ALIGN(c->key.nr_userclip, 2) / 2; + } else { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, + (i % 2) * 4), 0, 4, 1); + } + reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; + } - /* Deal with curbe alignment: - */ - reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; } /* Vertex program parameters from curbe: @@ -253,9 +260,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (intel->gen >= 6) - mrf = 3; /* no more pos store in attribute */ - else if (intel->gen == 5) + if (intel->gen >= 6) { + mrf = 3; + if (c->key.nr_userclip) + mrf += 2; + } else if (intel->gen == 5) mrf = 8; else mrf = 4; @@ -372,9 +381,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* See emit_vertex_write() for where the VUE's overhead on top of the * attributes comes from. */ - if (intel->gen >= 6) - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 7) / 8; - else if (intel->gen == 5) + if (intel->gen >= 6) { + int header_regs = 2; + if (c->key.nr_userclip) + header_regs += 2; + + c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 7) / 8; + } else if (intel->gen == 5) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1392,9 +1405,33 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || - c->key.nr_userclip || brw->has_negative_rhw_bug) - { + if (intel->gen >= 6) { + struct brw_reg m1 = brw_message_reg(1); + + /* On gen6, m1 has each value in a separate dword, so we never + * need to mess with a temporary for computing the m1 value. + */ + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { + brw_MOV(p, brw_writemask(m1, WRITEMASK_W), + brw_swizzle1(c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ], 0)); + } + + /* Set the user clip distances in dword 8-15. (m3-4)*/ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + struct brw_reg m; + if (i < 4) + m = brw_message_reg(3); + else + m = brw_message_reg(4); + + brw_DP4(p, brw_writemask(m, (1 << (i & 7))),pos, c->userplane[i]); + } + } + } else if ((c->prog_data.outputs_written & + BITFIELD64_BIT(VERT_RESULT_PSIZ)) || + c->key.nr_userclip || brw->has_negative_rhw_bug) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1404,11 +1441,10 @@ static void emit_vertex_write( struct brw_vs_compile *c) if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; - if (intel->gen < 6) { - brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); - } else - brw_MOV(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0)); + brw_MUL(p, brw_writemask(header1, WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, WRITEMASK_W), + header1, brw_imm_ud(0x7ff<<8)); } for (i = 0; i < c->key.nr_userclip; i++) { @@ -1461,12 +1497,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) * dword 0-3 (m1) of the header is indices, point width, clip flags. * dword 4-7 (m2) is the 4D space position * dword 8-15 (m3,m4) of the vertex header is the user clip distance if - * enabled. We don't use it, so skip it. - * m3 is the first vertex element data we fill, which is the vertex - * position. + * enabled. + * m3 or 5 is the first vertex element data we fill, which is + * the vertex position. */ brw_MOV(p, brw_message_reg(2), pos); len_vertex_header = 1; + if (c->key.nr_userclip > 0) + len_vertex_header += 2; } else if (intel->gen == 5) { /* There are 20 DWs (D0-D19) in VUE header on Ironlake: * dword 0-3 (m1) of the header is indices, point width, clip flags. diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index cd2ac9d92f..c65b41e2b6 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -28,6 +28,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_util.h" #include "intel_batchbuffer.h" static void @@ -36,7 +37,7 @@ upload_clip_state(struct brw_context *brw) struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; uint32_t depth_clamp = 0; - uint32_t provoking; + uint32_t provoking, userclip; if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -50,6 +51,9 @@ upload_clip_state(struct brw_context *brw) (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } + /* _NEW_TRANSFORM */ + userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; + BEGIN_BATCH(4); OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); @@ -57,6 +61,7 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | + userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 55a70bea62..565fde457b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -73,12 +73,19 @@ upload_sf_state(struct brw_context *brw) /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; + int urb_start; + + /* _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) + urb_start = 2; + else + urb_start = 1; dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT | (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 1 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; + urb_start << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT; dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE | GEN6_SF_STATISTICS_ENABLE; dw3 = 0; @@ -195,7 +202,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_POLYGON | _NEW_LINE | _NEW_SCISSOR | - _NEW_BUFFERS), + _NEW_BUFFERS | + _NEW_TRANSFORM), .brw = BRW_NEW_CONTEXT, .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 304eaddf40..1d5c5701b3 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -44,7 +44,8 @@ upload_vs_state(struct brw_context *brw) drm_intel_bo *constant_bo; int i; - if (vp->use_const_buffer || nr_params == 0) { + if (vp->use_const_buffer || (nr_params == 0 && + !ctx->Transform.ClipPlanesEnabled)) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); @@ -54,6 +55,9 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { + int params_uploaded = 0; + float *param; + if (brw->vertex_program->IsNVProgram) _mesa_load_tracked_matrices(ctx); @@ -63,14 +67,44 @@ upload_vs_state(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo", - nr_params * 4 * sizeof(float), + (MAX_CLIP_PLANES + nr_params) * + 4 * sizeof(float), 4096); drm_intel_gem_bo_map_gtt(constant_bo); + param = constant_bo->virtual; + + /* This should be loaded like any other param, but it's ad-hoc + * until we redo the VS backend. + */ + for (i = 0; i < MAX_CLIP_PLANES; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + } + /* Align to a reg for convenience for brw_vs_emit.c */ + if (params_uploaded & 1) { + param += 4; + params_uploaded++; + } + for (i = 0; i < nr_params; i++) { - memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float), - vp->program.Base.Parameters->ParameterValues[i], + memcpy(param, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + + if (0) { + printf("VS constant buffer:\n"); + for (i = 0; i < params_uploaded; i++) { + float *buf = (float *)constant_bo->virtual + i * 4; + printf("%d: %f %f %f %f\n", + i, buf[0], buf[1], buf[2], buf[3]); + } } + drm_intel_gem_bo_unmap_gtt(constant_bo); BEGIN_BATCH(5); @@ -79,7 +113,7 @@ upload_vs_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(nr_params, 2) / 2 - 1); + ALIGN(params_uploaded, 2) / 2 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); -- cgit v1.2.3