diff options
Diffstat (limited to 'src/gallium/drivers/nv40')
-rw-r--r-- | src/gallium/drivers/nv40/nv40_context.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state.c | 35 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_state_emit.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_vbo.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv40/nv40_vertprog.c | 101 |
6 files changed, 141 insertions, 24 deletions
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index b50f6f8fef..525eef8d63 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -133,7 +133,7 @@ struct nv40_context { unsigned fallback_swrast; /* Context state */ - unsigned dirty; + unsigned dirty, draw_dirty; struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; @@ -153,8 +153,10 @@ struct nv40_context { unsigned nr_samplers; unsigned nr_textures; unsigned dirty_samplers; - struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + unsigned vtxbuf_nr; struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; + unsigned vtxelt_nr; }; static INLINE struct nv40_context * diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c index 1417c95e75..5dc2991212 100644 --- a/src/gallium/drivers/nv40/nv40_state.c +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -423,10 +423,9 @@ nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nv40_context *nv40 = nv40_context(pipe); struct nv40_rasterizer_state *rsso = hwcso; - draw_set_rasterizer_state(nv40->draw, &rsso->pipe); - nv40->rasterizer = hwcso; nv40->dirty |= NV40_NEW_RAST; + nv40->draw_dirty |= NV40_NEW_RAST; } static void @@ -445,19 +444,20 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, struct nv40_context *nv40 = nv40_context(pipe); struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; - so_method(so, nv40->screen->curie, NV40TCL_DEPTH_FUNC, 3); + so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); so_data (so, nvgl_comparison_op(cso->depth.func)); so_data (so, cso->depth.writemask ? 1 : 0); so_data (so, cso->depth.enabled ? 1 : 0); - so_method(so, nv40->screen->curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); so_data (so, cso->alpha.enabled ? 1 : 0); so_data (so, nvgl_comparison_op(cso->alpha.func)); so_data (so, float_to_ubyte(cso->alpha.ref)); if (cso->stencil[0].enabled) { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); so_data (so, cso->stencil[0].enabled ? 1 : 0); so_data (so, cso->stencil[0].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[0].func)); @@ -467,12 +467,12 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); } else { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); so_data (so, 0); } if (cso->stencil[1].enabled) { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); so_data (so, cso->stencil[1].enabled ? 1 : 0); so_data (so, cso->stencil[1].write_mask); so_data (so, nvgl_comparison_op(cso->stencil[1].func)); @@ -482,7 +482,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); } else { - so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); so_data (so, 0); } @@ -529,10 +529,9 @@ nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nv40_context *nv40 = nv40_context(pipe); struct nv40_vertex_program *vp = hwcso; - draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL); - nv40->vertprog = hwcso; nv40->dirty |= NV40_NEW_VERTPROG; + nv40->draw_dirty |= NV40_NEW_VERTPROG; } static void @@ -595,10 +594,9 @@ nv40_set_clip_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_clip_state(nv40->draw, clip); - nv40->clip = *clip; nv40->dirty |= NV40_NEW_UCP; + nv40->draw_dirty |= NV40_NEW_UCP; } static void @@ -653,10 +651,9 @@ nv40_set_viewport_state(struct pipe_context *pipe, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_viewport_state(nv40->draw, vpt); - nv40->viewport = *vpt; nv40->dirty |= NV40_NEW_VIEWPORT; + nv40->draw_dirty |= NV40_NEW_VIEWPORT; } static void @@ -665,10 +662,11 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_buffers(nv40->draw, count, vb); - memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); + nv40->vtxbuf_nr = count; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; } static void @@ -677,10 +675,11 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, { struct nv40_context *nv40 = nv40_context(pipe); - draw_set_vertex_elements(nv40->draw, count, ve); - memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); + nv40->vtxelt_nr = count; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; } void diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h index ab2866eb7a..e018464c9f 100644 --- a/src/gallium/drivers/nv40/nv40_state.h +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -42,6 +42,7 @@ struct nv40_vertex_program { uint32_t ir; uint32_t or; + uint32_t clip_ctrl; struct nouveau_stateobj *so; }; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 74feb6d4bf..722b9f31e6 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -144,6 +144,8 @@ nv40_state_validate(struct nv40_context *nv40) boolean nv40_state_validate_swtnl(struct nv40_context *nv40) { + struct draw_context *draw = nv40->draw; + /* Setup for swtnl */ if (nv40->render_mode == HW) { NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); @@ -155,12 +157,30 @@ nv40_state_validate_swtnl(struct nv40_context *nv40) nv40->render_mode = SWTNL; } + if (nv40->draw_dirty & NV40_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nv40->vertprog->draw); + + if (nv40->draw_dirty & NV40_NEW_RAST) + draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + + if (nv40->draw_dirty & NV40_NEW_UCP) + draw_set_clip_state(draw, &nv40->clip); + + if (nv40->draw_dirty & NV40_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nv40->viewport); + + if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); + draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); + } + nv40_state_do_validate(nv40, swtnl_states); if (nv40->fallback_swrast) { NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); return FALSE; } + nv40->draw_dirty = 0; return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index b66bf26afb..bc53924a67 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -32,7 +32,7 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) case PIPE_FORMAT_R16G16_SSCALED: case PIPE_FORMAT_R16G16B16_SSCALED: case PIPE_FORMAT_R16G16B16A16_SSCALED: - *fmt = 5; + *fmt = NV40TCL_VTXFMT_TYPE_USHORT; break; default: pf_sprint_name(fs, pipe); diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 40ef7174a4..08d3f387e0 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -37,6 +37,8 @@ #define neg(s) nv40_sr_neg((s)) #define abs(s) nv40_sr_abs((s)) +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + struct nv40_vpc { struct nv40_vertex_program *vp; @@ -200,6 +202,36 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + case NV40_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; + dst.index = NV40_VP_INST_DEST_PSZ; + break; default: break; } @@ -391,6 +423,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, int ai = -1, ci = -1, ii = -1; int i; + struct { + struct nv40_sreg dst; + unsigned m; + } clip; + if (finst->Instruction.Opcode == TGSI_OPCODE_END) return TRUE; @@ -464,6 +501,47 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + /* If writing to clip distance regs, need to modify instruction to + * change which component is written to. On NV40 the clip regs + * are the unused components (yzw) of FOGC/PSZ. + */ + clip.dst = none; + if (dst.type == NV40SR_OUTPUT && + dst.index >= NV40_VP_INST_DEST_CLIP(0) && + dst.index <= NV40_VP_INST_DEST_CLIP(5)) { + unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0); + unsigned m[] = + { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W }; + + /* Some instructions we can get away with swizzling and/or + * changing the writemask. Others, we'll use a temp reg. + */ + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_DST: + case TGSI_OPCODE_EXP: + case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LOG: + case TGSI_OPCODE_XPD: + clip.dst = dst; + clip.m = m[n]; + dst = temp(vpc); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + mask = m[n]; + break; + default: + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) + src[i] = swz(src[i], X, X, X, X); + mask = m[n]; + break; + } + } + switch (finst->Instruction.Opcode) { case TGSI_OPCODE_ABS: arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); @@ -561,6 +639,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, return FALSE; } + if (clip.dst.type != NV40SR_NONE) { + arith(vpc, 0, OP_MOV, clip.dst, clip.m, + swz(dst, X, X, X, X), none, none); + } + release_temps(vpc); return TRUE; } @@ -612,6 +695,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, return FALSE; } break; +#if 0 + case TGSI_SEMANTIC_CLIP: + if (fdec->Semantic.SemanticIndex >= 6) { + NOUVEAU_ERR("bad clip distance index\n"); + return FALSE; + } + hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex); + break; +#endif default: NOUVEAU_ERR("bad output semantic\n"); return FALSE; @@ -782,6 +874,7 @@ nv40_vertprog_validate(struct nv40_context *nv40) { struct nouveau_winsys *nvws = nv40->nvws; struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; struct nv40_vertex_program *vp; struct pipe_buffer *constbuf; boolean upload_code = FALSE, upload_data = FALSE; @@ -825,12 +918,14 @@ check_gpu_resources: assert(0); } - so = so_new(5, 0); - so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1); + so = so_new(7, 0); + so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); so_data (so, vp->exec->start); - so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2); + so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); so_data (so, vp->ir); so_data (so, vp->or); + so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + so_data (so, vp->clip_ctrl); so_ref(so, &vp->so); upload_code = TRUE; |