summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nv40
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv40')
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h6
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c35
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h1
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c20
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c2
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c101
6 files changed, 141 insertions, 24 deletions
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index b50f6f8fef..525eef8d63 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -133,7 +133,7 @@ struct nv40_context {
unsigned fallback_swrast;
/* Context state */
- unsigned dirty;
+ unsigned dirty, draw_dirty;
struct pipe_scissor_state scissor;
unsigned stipple[32];
struct pipe_clip_state clip;
@@ -153,8 +153,10 @@ struct nv40_context {
unsigned nr_samplers;
unsigned nr_textures;
unsigned dirty_samplers;
- struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX];
+ struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX];
+ unsigned vtxbuf_nr;
struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX];
+ unsigned vtxelt_nr;
};
static INLINE struct nv40_context *
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index 1417c95e75..5dc2991212 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -423,10 +423,9 @@ nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_rasterizer_state *rsso = hwcso;
- draw_set_rasterizer_state(nv40->draw, &rsso->pipe);
-
nv40->rasterizer = hwcso;
nv40->dirty |= NV40_NEW_RAST;
+ nv40->draw_dirty |= NV40_NEW_RAST;
}
static void
@@ -445,19 +444,20 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
struct nouveau_stateobj *so = so_new(32, 0);
+ struct nouveau_grobj *curie = nv40->screen->curie;
- so_method(so, nv40->screen->curie, NV40TCL_DEPTH_FUNC, 3);
+ so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
so_data (so, nvgl_comparison_op(cso->depth.func));
so_data (so, cso->depth.writemask ? 1 : 0);
so_data (so, cso->depth.enabled ? 1 : 0);
- so_method(so, nv40->screen->curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
+ so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
so_data (so, cso->alpha.enabled ? 1 : 0);
so_data (so, nvgl_comparison_op(cso->alpha.func));
so_data (so, float_to_ubyte(cso->alpha.ref));
if (cso->stencil[0].enabled) {
- so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 8);
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8);
so_data (so, cso->stencil[0].enabled ? 1 : 0);
so_data (so, cso->stencil[0].write_mask);
so_data (so, nvgl_comparison_op(cso->stencil[0].func));
@@ -467,12 +467,12 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
} else {
- so_method(so, nv40->screen->curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
+ so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
so_data (so, 0);
}
if (cso->stencil[1].enabled) {
- so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 8);
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8);
so_data (so, cso->stencil[1].enabled ? 1 : 0);
so_data (so, cso->stencil[1].write_mask);
so_data (so, nvgl_comparison_op(cso->stencil[1].func));
@@ -482,7 +482,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
} else {
- so_method(so, nv40->screen->curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
+ so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
so_data (so, 0);
}
@@ -529,10 +529,9 @@ nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp = hwcso;
- draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL);
-
nv40->vertprog = hwcso;
nv40->dirty |= NV40_NEW_VERTPROG;
+ nv40->draw_dirty |= NV40_NEW_VERTPROG;
}
static void
@@ -595,10 +594,9 @@ nv40_set_clip_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
- draw_set_clip_state(nv40->draw, clip);
-
nv40->clip = *clip;
nv40->dirty |= NV40_NEW_UCP;
+ nv40->draw_dirty |= NV40_NEW_UCP;
}
static void
@@ -653,10 +651,9 @@ nv40_set_viewport_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
- draw_set_viewport_state(nv40->draw, vpt);
-
nv40->viewport = *vpt;
nv40->dirty |= NV40_NEW_VIEWPORT;
+ nv40->draw_dirty |= NV40_NEW_VIEWPORT;
}
static void
@@ -665,10 +662,11 @@ nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
{
struct nv40_context *nv40 = nv40_context(pipe);
- draw_set_vertex_buffers(nv40->draw, count, vb);
-
memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count);
+ nv40->vtxbuf_nr = count;
+
nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
static void
@@ -677,10 +675,11 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
{
struct nv40_context *nv40 = nv40_context(pipe);
- draw_set_vertex_elements(nv40->draw, count, ve);
-
memcpy(nv40->vtxelt, ve, sizeof(*ve) * count);
+ nv40->vtxelt_nr = count;
+
nv40->dirty |= NV40_NEW_ARRAYS;
+ nv40->draw_dirty |= NV40_NEW_ARRAYS;
}
void
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index ab2866eb7a..e018464c9f 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -42,6 +42,7 @@ struct nv40_vertex_program {
uint32_t ir;
uint32_t or;
+ uint32_t clip_ctrl;
struct nouveau_stateobj *so;
};
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 74feb6d4bf..722b9f31e6 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -144,6 +144,8 @@ nv40_state_validate(struct nv40_context *nv40)
boolean
nv40_state_validate_swtnl(struct nv40_context *nv40)
{
+ struct draw_context *draw = nv40->draw;
+
/* Setup for swtnl */
if (nv40->render_mode == HW) {
NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
@@ -155,12 +157,30 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
nv40->render_mode = SWTNL;
}
+ if (nv40->draw_dirty & NV40_NEW_VERTPROG)
+ draw_bind_vertex_shader(draw, nv40->vertprog->draw);
+
+ if (nv40->draw_dirty & NV40_NEW_RAST)
+ draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe);
+
+ if (nv40->draw_dirty & NV40_NEW_UCP)
+ draw_set_clip_state(draw, &nv40->clip);
+
+ if (nv40->draw_dirty & NV40_NEW_VIEWPORT)
+ draw_set_viewport_state(draw, &nv40->viewport);
+
+ if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
+ draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
+ draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);
+ }
+
nv40_state_do_validate(nv40, swtnl_states);
if (nv40->fallback_swrast) {
NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
return FALSE;
}
+ nv40->draw_dirty = 0;
return TRUE;
}
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index b66bf26afb..bc53924a67 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -32,7 +32,7 @@ nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
case PIPE_FORMAT_R16G16_SSCALED:
case PIPE_FORMAT_R16G16B16_SSCALED:
case PIPE_FORMAT_R16G16B16A16_SSCALED:
- *fmt = 5;
+ *fmt = NV40TCL_VTXFMT_TYPE_USHORT;
break;
default:
pf_sprint_name(fs, pipe);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 40ef7174a4..08d3f387e0 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -37,6 +37,8 @@
#define neg(s) nv40_sr_neg((s))
#define abs(s) nv40_sr_abs((s))
+#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n))
+
struct nv40_vpc {
struct nv40_vertex_program *vp;
@@ -200,6 +202,36 @@ emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ case NV40_VP_INST_DEST_CLIP(0):
+ vp->or |= (1 << 6);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(1):
+ vp->or |= (1 << 7);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(2):
+ vp->or |= (1 << 8);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2;
+ dst.index = NV40_VP_INST_DEST_FOGC;
+ break;
+ case NV40_VP_INST_DEST_CLIP(3):
+ vp->or |= (1 << 9);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(4):
+ vp->or |= (1 << 10);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
+ case NV40_VP_INST_DEST_CLIP(5):
+ vp->or |= (1 << 11);
+ vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5;
+ dst.index = NV40_VP_INST_DEST_PSZ;
+ break;
default:
break;
}
@@ -391,6 +423,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
int ai = -1, ci = -1, ii = -1;
int i;
+ struct {
+ struct nv40_sreg dst;
+ unsigned m;
+ } clip;
+
if (finst->Instruction.Opcode == TGSI_OPCODE_END)
return TRUE;
@@ -464,6 +501,47 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ /* If writing to clip distance regs, need to modify instruction to
+ * change which component is written to. On NV40 the clip regs
+ * are the unused components (yzw) of FOGC/PSZ.
+ */
+ clip.dst = none;
+ if (dst.type == NV40SR_OUTPUT &&
+ dst.index >= NV40_VP_INST_DEST_CLIP(0) &&
+ dst.index <= NV40_VP_INST_DEST_CLIP(5)) {
+ unsigned n = dst.index - NV40_VP_INST_DEST_CLIP(0);
+ unsigned m[] =
+ { MASK_Y, MASK_Z, MASK_W, MASK_Y, MASK_Z, MASK_W };
+
+ /* Some instructions we can get away with swizzling and/or
+ * changing the writemask. Others, we'll use a temp reg.
+ */
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_DST:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_XPD:
+ clip.dst = dst;
+ clip.m = m[n];
+ dst = temp(vpc);
+ break;
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_POW:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ mask = m[n];
+ break;
+ default:
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++)
+ src[i] = swz(src[i], X, X, X, X);
+ mask = m[n];
+ break;
+ }
+ }
+
switch (finst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
@@ -561,6 +639,11 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
return FALSE;
}
+ if (clip.dst.type != NV40SR_NONE) {
+ arith(vpc, 0, OP_MOV, clip.dst, clip.m,
+ swz(dst, X, X, X, X), none, none);
+ }
+
release_temps(vpc);
return TRUE;
}
@@ -612,6 +695,15 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
return FALSE;
}
break;
+#if 0
+ case TGSI_SEMANTIC_CLIP:
+ if (fdec->Semantic.SemanticIndex >= 6) {
+ NOUVEAU_ERR("bad clip distance index\n");
+ return FALSE;
+ }
+ hw = NV40_VP_INST_DEST_CLIP(fdec->Semantic.SemanticIndex);
+ break;
+#endif
default:
NOUVEAU_ERR("bad output semantic\n");
return FALSE;
@@ -782,6 +874,7 @@ nv40_vertprog_validate(struct nv40_context *nv40)
{
struct nouveau_winsys *nvws = nv40->nvws;
struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nouveau_grobj *curie = nv40->screen->curie;
struct nv40_vertex_program *vp;
struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
@@ -825,12 +918,14 @@ check_gpu_resources:
assert(0);
}
- so = so_new(5, 0);
- so_method(so, nv40->screen->curie, NV40TCL_VP_START_FROM_ID, 1);
+ so = so_new(7, 0);
+ so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
so_data (so, vp->exec->start);
- so_method(so, nv40->screen->curie, NV40TCL_VP_ATTRIB_EN, 2);
+ so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
so_data (so, vp->ir);
so_data (so, vp->or);
+ so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1);
+ so_data (so, vp->clip_ctrl);
so_ref(so, &vp->so);
upload_code = TRUE;