diff options
Diffstat (limited to 'src/gallium/drivers/nv40')
26 files changed, 6391 insertions, 0 deletions
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 0000000000..0ecae2b491 --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +C_SOURCES = \ +	nv40_clear.c \ +	nv40_context.c \ +	nv40_draw.c \ +	nv40_fragprog.c \ +	nv40_fragtex.c \ +	nv40_miptree.c \ +	nv40_query.c \ +	nv40_screen.c \ +	nv40_state.c \ +	nv40_state_blend.c \ +	nv40_state_emit.c \ +	nv40_state_fb.c \ +	nv40_state_rasterizer.c \ +	nv40_state_scissor.c \ +	nv40_state_stipple.c \ +	nv40_state_viewport.c \ +	nv40_state_zsa.c \ +	nv40_surface.c \ +	nv40_transfer.c \ +	nv40_vbo.c \ +	nv40_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 0000000000..59efd620e3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, +	   unsigned clearValue) +{ +	pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +	ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 0000000000..5d325f5067 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags, +	   struct pipe_fence_handle **fence) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	 +	if (flags & PIPE_FLUSH_TEXTURE_CACHE) { +		BEGIN_RING(curie, 0x1fd8, 1); +		OUT_RING  (2); +		BEGIN_RING(curie, 0x1fd8, 1); +		OUT_RING  (1); +	} + +	FIRE_RING(fence); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	if (nv40->draw) +		draw_destroy(nv40->draw); +	FREE(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv40_context *nv40; +	struct nouveau_winsys *nvws = screen->nvws; + +	nv40 = CALLOC(1, sizeof(struct nv40_context)); +	if (!nv40) +		return NULL; +	nv40->screen = screen; +	nv40->pctx_id = pctx_id; + +	nv40->nvws = nvws; + +	nv40->pipe.winsys = ws; +	nv40->pipe.screen = pscreen; +	nv40->pipe.destroy = nv40_destroy; +	nv40->pipe.draw_arrays = nv40_draw_arrays; +	nv40->pipe.draw_elements = nv40_draw_elements; +	nv40->pipe.clear = nv40_clear; +	nv40->pipe.flush = nv40_flush; + +	nv40_init_query_functions(nv40); +	nv40_init_surface_functions(nv40); +	nv40_init_state_functions(nv40); + +	/* Create, configure, and install fallback swtnl path */ +	nv40->draw = draw_create(); +	draw_wide_point_threshold(nv40->draw, 9999999.0); +	draw_wide_line_threshold(nv40->draw, 9999999.0); +	draw_enable_line_stipple(nv40->draw, FALSE); +	draw_enable_point_sprites(nv40->draw, FALSE); +	draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + +	return &nv40->pipe; +} +	 diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 0000000000..adcfbdd85a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,233 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \ +	struct nv40_screen *ctx = nv40->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ +	fprintf(stderr, "%s:%d -  "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ +	fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv40_state_index { +	NV40_STATE_FB = 0, +	NV40_STATE_VIEWPORT = 1, +	NV40_STATE_BLEND = 2, +	NV40_STATE_RAST = 3, +	NV40_STATE_ZSA = 4, +	NV40_STATE_BCOL = 5, +	NV40_STATE_CLIP = 6, +	NV40_STATE_SCISSOR = 7, +	NV40_STATE_STIPPLE = 8, +	NV40_STATE_FRAGPROG = 9, +	NV40_STATE_VERTPROG = 10, +	NV40_STATE_FRAGTEX0 = 11, +	NV40_STATE_FRAGTEX1 = 12, +	NV40_STATE_FRAGTEX2 = 13, +	NV40_STATE_FRAGTEX3 = 14, +	NV40_STATE_FRAGTEX4 = 15, +	NV40_STATE_FRAGTEX5 = 16, +	NV40_STATE_FRAGTEX6 = 17, +	NV40_STATE_FRAGTEX7 = 18, +	NV40_STATE_FRAGTEX8 = 19, +	NV40_STATE_FRAGTEX9 = 20, +	NV40_STATE_FRAGTEX10 = 21, +	NV40_STATE_FRAGTEX11 = 22, +	NV40_STATE_FRAGTEX12 = 23, +	NV40_STATE_FRAGTEX13 = 24, +	NV40_STATE_FRAGTEX14 = 25, +	NV40_STATE_FRAGTEX15 = 26, +	NV40_STATE_VERTTEX0 = 27, +	NV40_STATE_VERTTEX1 = 28, +	NV40_STATE_VERTTEX2 = 29, +	NV40_STATE_VERTTEX3 = 30, +	NV40_STATE_VTXBUF = 31, +	NV40_STATE_VTXFMT = 32, +	NV40_STATE_VTXATTR = 33, +	NV40_STATE_MAX = 34 +}; + +#include "nv40_screen.h" + +#define NV40_NEW_BLEND		(1 <<  0) +#define NV40_NEW_RAST		(1 <<  1) +#define NV40_NEW_ZSA		(1 <<  2) +#define NV40_NEW_SAMPLER	(1 <<  3) +#define NV40_NEW_FB		(1 <<  4) +#define NV40_NEW_STIPPLE	(1 <<  5) +#define NV40_NEW_SCISSOR	(1 <<  6) +#define NV40_NEW_VIEWPORT	(1 <<  7) +#define NV40_NEW_BCOL		(1 <<  8) +#define NV40_NEW_VERTPROG	(1 <<  9) +#define NV40_NEW_FRAGPROG	(1 << 10) +#define NV40_NEW_ARRAYS		(1 << 11) +#define NV40_NEW_UCP		(1 << 12) + +struct nv40_rasterizer_state { +	struct pipe_rasterizer_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv40_zsa_state { +	struct pipe_depth_stencil_alpha_state pipe; +	struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { +	struct pipe_blend_state pipe; +	struct nouveau_stateobj *so; +}; + + +struct nv40_state { +	unsigned scissor_enabled; +	unsigned stipple_enabled; +	unsigned viewport_bypass; +	unsigned fp_samplers; + +	uint64_t dirty; +	struct nouveau_stateobj *hw[NV40_STATE_MAX]; +}; + +struct nv40_context { +	struct pipe_context pipe; + +	struct nouveau_winsys *nvws; +	struct nv40_screen *screen; +	unsigned pctx_id; + +	struct draw_context *draw; + +	/* HW state derived from pipe states */ +	struct nv40_state state; +	struct { +		struct nv40_vertex_program *vertprog; + +		unsigned nr_attribs; +		unsigned hw[PIPE_MAX_SHADER_INPUTS]; +		unsigned draw[PIPE_MAX_SHADER_INPUTS]; +		unsigned emit[PIPE_MAX_SHADER_INPUTS]; +	} swtnl; + +	enum { +		HW, SWTNL, SWRAST +	} render_mode; +	unsigned fallback_swtnl; +	unsigned fallback_swrast; + +	/* Context state */ +	unsigned dirty, draw_dirty; +	struct pipe_scissor_state scissor; +	unsigned stipple[32]; +	struct pipe_clip_state clip; +	struct nv40_vertex_program *vertprog; +	struct nv40_fragment_program *fragprog; +	struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; +	unsigned constbuf_nr[PIPE_SHADER_TYPES]; +	struct nv40_rasterizer_state *rasterizer; +	struct nv40_zsa_state *zsa; +	struct nv40_blend_state *blend; +	struct pipe_blend_color blend_colour; +	struct pipe_viewport_state viewport; +	struct pipe_framebuffer_state framebuffer; +	struct pipe_buffer *idxbuf; +	unsigned idxbuf_format; +	struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; +	struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; +	unsigned nr_samplers; +	unsigned nr_textures; +	unsigned dirty_samplers; +	struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; +	unsigned vtxbuf_nr; +	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +	unsigned vtxelt_nr; +	const unsigned *edgeflags; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ +	return (struct nv40_context *)pipe; +} + +struct nv40_state_entry { +	boolean (*validate)(struct nv40_context *nv40); +	struct { +		unsigned pipe; +		unsigned hw; +	} dirty; +}; + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, +					struct pipe_buffer *idxbuf, +					unsigned ib_size, unsigned mode, +					unsigned start, unsigned count); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_destroy(struct nv40_context *, +				  struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_destroy(struct nv40_context *, +				  struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, +				unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, +				  struct pipe_buffer *indexBuffer, +				  unsigned indexSize, +				  unsigned mode, unsigned start, +				  unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, +		       unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 0000000000..c83ff91d7e --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,349 @@ +#include "pipe/p_shader_tokens.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all.  Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nv40_render_stage { +	struct draw_stage stage; +	struct nv40_context *nv40; +	unsigned prim; +}; + +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ +	return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ +	unsigned i; + +	for (i = 0; i < nv40->swtnl.nr_attribs; i++) { +		unsigned idx = nv40->swtnl.draw[i]; +		unsigned hw = nv40->swtnl.hw[i]; + +		switch (nv40->swtnl.emit[i]) { +		case EMIT_OMIT: +			break; +		case EMIT_1F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); +			OUT_RING  (fui(v->data[idx][0])); +			break; +		case EMIT_2F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			break; +		case EMIT_3F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			OUT_RING  (fui(v->data[idx][2])); +			break; +		case EMIT_4F: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); +			OUT_RING  (fui(v->data[idx][0])); +			OUT_RING  (fui(v->data[idx][1])); +			OUT_RING  (fui(v->data[idx][2])); +			OUT_RING  (fui(v->data[idx][3])); +			break; +		case EMIT_4UB: +			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); +			OUT_RING  (pack_ub4(float_to_ubyte(v->data[idx][0]), +					    float_to_ubyte(v->data[idx][1]), +					    float_to_ubyte(v->data[idx][2]), +					    float_to_ubyte(v->data[idx][3]))); +			break; +		default: +			assert(0); +			break; +		} +	} +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, +	       unsigned mode, unsigned count) +{ +	struct nv40_render_stage *rs = nv40_render_stage(stage); +	struct nv40_context *nv40 = rs->nv40; +	struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; +	unsigned i; + +	/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ +	if (pb->remaining < ((count * 20) + 6)) { +		if (rs->prim != NV40TCL_BEGIN_END_STOP) { +			NOUVEAU_ERR("AIII, missed flush\n"); +			assert(0); +		} +		FIRE_RING(NULL); +		nv40_state_emit(nv40); +	} + +	/* Switch primitive modes if necessary */ +	if (rs->prim != mode) { +		if (rs->prim != NV40TCL_BEGIN_END_STOP) { +			BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +			OUT_RING  (NV40TCL_BEGIN_END_STOP);	 +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (mode); +		rs->prim = mode; +	} + +	/* Emit vertex data */ +	for (i = 0; i < count; i++) +		nv40_render_vertex(nv40, prim->v[i]); + +	/* If it's likely we'll need to empty the push buffer soon, finish +	 * off the primitive now. +	 */ +	if (pb->remaining < ((count * 20) + 6)) { +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (NV40TCL_BEGIN_END_STOP); +		rs->prim = NV40TCL_BEGIN_END_STOP; +	} +} + +static void +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); +} + +static void +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); +} + +static void +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ +	nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); +} + +static void +nv40_render_flush(struct draw_stage *draw, unsigned flags) +{ +	struct nv40_render_stage *rs = nv40_render_stage(draw); +	struct nv40_context *nv40 = rs->nv40; + +	if (rs->prim != NV40TCL_BEGIN_END_STOP) { +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (NV40TCL_BEGIN_END_STOP); +		rs->prim = NV40TCL_BEGIN_END_STOP; +	} +} + +static void +nv40_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nv40_render_destroy(struct draw_stage *draw) +{ +	FREE(draw); +} + +static INLINE void +emit_mov(struct nv40_vertex_program *vp, +	 unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ +	struct nv40_vertex_program_exec *inst; + +	vp->insns = realloc(vp->insns, +			    sizeof(struct nv40_vertex_program_exec) * +			    ++vp->nr_insns); +	inst = &vp->insns[vp->nr_insns - 1]; + +	inst->data[0] = 0x401f9c6c; +	inst->data[1] = 0x0040000d | (src << 8); +	inst->data[2] = 0x8106c083; +	inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); +	inst->const_index = -1; +	inst->has_branch_offset = FALSE; + +	vp->ir |= (1 << src); +	if (vor != ~0) +		vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ +	struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); +	unsigned i; + +	emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); +	emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); +	for (i = 0; i < 8; i++) +		emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + +	vp->insns[vp->nr_insns - 1].data[3] |= 1; +	vp->translated = TRUE; +	return vp; +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ +	struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + +	if (!nv40->swtnl.vertprog) +		nv40->swtnl.vertprog = create_drawvp(nv40); + +	render->nv40 = nv40; +	render->stage.draw = nv40->draw; +	render->stage.point = nv40_render_point; +	render->stage.line = nv40_render_line; +	render->stage.tri = nv40_render_tri; +	render->stage.flush = nv40_render_flush; +	render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; +	render->stage.destroy = nv40_render_destroy; + +	return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, +			 struct pipe_buffer *idxbuf, unsigned idxbuf_size, +			 unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; +	unsigned i; +	void *map; + +	if (!nv40_state_validate_swtnl(nv40)) +		return FALSE; +	nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); +	nv40_state_emit(nv40); + +	for (i = 0; i < nv40->vtxbuf_nr; i++) { +		map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, +                                      PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_vertex_buffer(nv40->draw, i, map); +	} + +	if (idxbuf) { +		map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); +	} else { +		draw_set_mapped_element_buffer(nv40->draw, 0, NULL); +	} + +	if (nv40->constbuf[PIPE_SHADER_VERTEX]) { +		const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + +		map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], +				     PIPE_BUFFER_USAGE_CPU_READ); +		draw_set_mapped_constant_buffer(nv40->draw, map, nr); +	} + +	draw_arrays(nv40->draw, mode, start, count); + +	for (i = 0; i < nv40->vtxbuf_nr; i++) +		ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + +	if (idxbuf) +		ws->buffer_unmap(ws, idxbuf); + +	if (nv40->constbuf[PIPE_SHADER_VERTEX]) +		ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + +	draw_flush(nv40->draw); +	pipe->flush(pipe, 0, NULL); + +	return TRUE; +} + +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, +	    unsigned semantic, unsigned index) +{ +	unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); +	unsigned a = nv40->swtnl.nr_attribs++; + +	nv40->swtnl.hw[a] = hw; +	nv40->swtnl.emit[a] = emit; +	nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	unsigned colour = 0, texcoords = 0, fog = 0, i; + +	/* Determine needed fragprog inputs */ +	for (i = 0; i < fp->info.num_inputs; i++) { +		switch (fp->info.input_semantic_name[i]) { +		case TGSI_SEMANTIC_POSITION: +			break; +		case TGSI_SEMANTIC_COLOR: +			colour |= (1 << fp->info.input_semantic_index[i]); +			break; +		case TGSI_SEMANTIC_GENERIC: +			texcoords |= (1 << fp->info.input_semantic_index[i]); +			break; +		case TGSI_SEMANTIC_FOG: +			fog = 1; +			break; +		default: +			assert(0); +		} +	} + +	nv40->swtnl.nr_attribs = 0; + +	/* Map draw vtxprog output to hw attribute IDs */ +	for (i = 0; i < 2; i++) { +		if (!(colour & (1 << i))) +			continue; +		emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); +	} + +	for (i = 0; i < 8; i++) { +		if (!(texcoords & (1 << i))) +			continue; +		emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); +	} + +	if (fog) { +		emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); +	} + +	emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + +	return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { +	.validate = nv40_state_vtxfmt_validate, +	.dirty = { +		.pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, +		.hw = 0 +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 0000000000..91dcbebda0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,991 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { +	struct nv40_fragment_program *fp; + +	uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + +	unsigned r_temps; +	unsigned r_temps_discard; +	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nv40_sreg *r_temp; + +	int num_regs; + +	unsigned inst_offset; +	unsigned have_const; + +	struct { +		int pipe; +		float vals[4]; +	} consts[MAX_CONSTS]; +	int nr_consts; + +	struct nv40_sreg imm[MAX_IMM]; +	unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ +	int idx = ffs(~fpc->r_temps) - 1; + +	if (idx < 0) { +		NOUVEAU_ERR("out of temps!!\n"); +		assert(0); +		return nv40_sr(NV40SR_TEMP, 0); +	} + +	fpc->r_temps |= (1 << idx); +	fpc->r_temps_discard |= (1 << idx); +	return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ +	fpc->r_temps &= ~fpc->r_temps_discard; +	fpc->r_temps_discard = 0; +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ +	int idx; + +	if (fpc->nr_consts == MAX_CONSTS) +		assert(0); +	idx = fpc->nr_consts++; + +	fpc->consts[idx].pipe = pipe; +	if (pipe == -1) +		memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); +	return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ +			(d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ +	nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ +		    (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ +	struct nv40_fragment_program *fp = fpc->fp; + +	fp->insn_len += size; +	fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV40SR_INPUT: +		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); +		hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); +		break; +	case NV40SR_OUTPUT: +		sr |= NV40_FP_REG_SRC_HALF; +		/* fall-through */ +	case NV40SR_TEMP: +		sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); +		sr |= (src.index << NV40_FP_REG_SRC_SHIFT); +		break; +	case NV40SR_CONST: +		if (!fpc->have_const) { +			grow_insns(fpc, 4); +			fpc->have_const = 1; +		} + +		hw = &fp->insn[fpc->inst_offset]; +		if (fpc->consts[src.index].pipe >= 0) { +			struct nv40_fragment_program_data *fpd; + +			fp->consts = realloc(fp->consts, ++fp->nr_consts * +					     sizeof(*fpd)); +			fpd = &fp->consts[fp->nr_consts - 1]; +			fpd->offset = fpc->inst_offset + 4; +			fpd->index = fpc->consts[src.index].pipe; +			memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); +		} else { +			memcpy(&fp->insn[fpc->inst_offset + 4], +				fpc->consts[src.index].vals, +				sizeof(uint32_t) * 4); +		} + +		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);	 +		break; +	case NV40SR_NONE: +		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV40_FP_REG_NEGATE; + +	if (src.abs) +		hw[1] |= (1 << (29 + pos)); + +	sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | +	       (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + +	hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw = &fp->insn[fpc->inst_offset]; + +	switch (dst.type) { +	case NV40SR_TEMP: +		if (fpc->num_regs < (dst.index + 1)) +			fpc->num_regs = dst.index + 1; +		break; +	case NV40SR_OUTPUT: +		if (dst.index == 1) { +			fp->fp_control |= 0xe; +		} else { +			hw[0] |= NV40_FP_OP_OUT_REG_HALF; +		} +		break; +	case NV40SR_NONE: +		hw[0] |= (1 << 30); +		break; +	default: +		assert(0); +	} + +	hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, +	      struct nv40_sreg dst, int mask, +	      struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ +	struct nv40_fragment_program *fp = fpc->fp; +	uint32_t *hw; + +	fpc->inst_offset = fp->insn_len; +	fpc->have_const = 0; +	grow_insns(fpc, 4); +	hw = &fp->insn[fpc->inst_offset]; +	memset(hw, 0, sizeof(uint32_t) * 4); + +	if (op == NV40_FP_OP_OPCODE_KIL) +		fp->fp_control |= NV40TCL_FP_CONTROL_KIL; +	hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); +	hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); +	hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + +	if (sat) +		hw[0] |= NV40_FP_OP_OUT_SAT; + +	if (dst.cc_update) +		hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; +	hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); +	hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | +		  (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | +		  (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | +		  (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + +	emit_dst(fpc, dst); +	emit_src(fpc, 0, s0); +	emit_src(fpc, 1, s1); +	emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, +	    struct nv40_sreg dst, int mask, +	    struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ +	struct nv40_fragment_program *fp = fpc->fp; + +	nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + +	fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); +	fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ +	struct nv40_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv40_sr(NV40SR_INPUT, +			      fpc->attrib_map[fsrc->SrcRegister.Index]); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(fpc, fsrc->SrcRegister.Index, NULL); +		break; +	case TGSI_FILE_IMMEDIATE: +		assert(fsrc->SrcRegister.Index < fpc->nr_imm); +		src = fpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		src = fpc->r_temp[fsrc->SrcRegister.Index]; +		break; +	/* NV40 fragprog result regs are just temps, so this is simple */ +	case TGSI_FILE_OUTPUT: +		src = fpc->r_result[fsrc->SrcRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		return fpc->r_result[fdst->DstRegister.Index]; +	case TGSI_FILE_TEMPORARY: +		return fpc->r_temp[fdst->DstRegister.Index]; +	case TGSI_FILE_NULL: +		return nv40_sr(NV40SR_NONE, 0); +	default: +		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); +		return nv40_sr(NV40SR_NONE, 0); +	} +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, +	       struct nv40_sreg *src) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); +	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; +	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, +			fsrc->SrcRegisterExtSwz.NegateY, +			fsrc->SrcRegisterExtSwz.NegateZ, +			fsrc->SrcRegisterExtSwz.NegateW }; +	uint c; + +	for (c = 0; c < 4; c++) { +		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { +		case TGSI_EXTSWIZZLE_X: +		case TGSI_EXTSWIZZLE_Y: +		case TGSI_EXTSWIZZLE_Z: +		case TGSI_EXTSWIZZLE_W: +			mask |= (1 << c); +			break; +		case TGSI_EXTSWIZZLE_ZERO: +			zero_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		case TGSI_EXTSWIZZLE_ONE: +			one_mask |= (1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		default: +			assert(0); +		} + +		if (!tgsi.negate && neg[c]) +			neg_mask |= (1 << c); +	} + +	if (mask == MASK_ALL && !neg_mask) +		return TRUE; + +	*src = temp(fpc); + +	if (mask) +		arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + +	if (zero_mask) +		arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + +	if (one_mask) +		arith(fpc, 0, STR, *src, one_mask, *src, none, none); + +	if (neg_mask) { +		struct nv40_sreg one = temp(fpc); +		arith(fpc, 0, STR, one, neg_mask, one, none, none); +		arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); +	} + +	return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, +				const struct tgsi_full_instruction *finst) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg src[3], dst, tmp; +	int mask, sat, unit; +	int ai = -1, ci = -1, ii = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(fpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_TEMPORARY: +			if (!src_native_swz(fpc, fsrc, &src[i])) +				continue; +			break; +		default: +			break; +		} + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_CONSTANT: +			if ((ci == -1 && ii == -1) || +			    ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_IMMEDIATE: +			if ((ci == -1 && ii == -1) || +			    ii == fsrc->SrcRegister.Index) { +				ii = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(fpc, fsrc); +			} else { +				src[i] = temp(fpc); +				arith(fpc, 0, MOV, src[i], MASK_ALL, +				      tgsi_src(fpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		case TGSI_FILE_SAMPLER: +			unit = fsrc->SrcRegister.Index; +			break; +		case TGSI_FILE_OUTPUT: +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); +	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_CMP: +		tmp = temp(fpc); +		arith(fpc, sat, MOV, dst, mask, src[2], none, none); +		tmp.cc_update = 1; +		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); +		dst.cc_test = NV40_VP_INST_COND_LT; +		arith(fpc, sat, MOV, dst, mask, src[1], none, none); +		break; +	case TGSI_OPCODE_COS: +		arith(fpc, sat, COS, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DDX: +		if (mask & (MASK_Z | MASK_W)) { +			tmp = temp(fpc); +			arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, +			      swz(src[0], Z, W, Z, W), none, none); +			arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, +			      swz(tmp, X, Y, X, Y), none, none); +			arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], +			      none, none); +			arith(fpc, 0, MOV, dst, mask, tmp, none, none); +		} else { +			arith(fpc, sat, DDX, dst, mask, src[0], none, none); +		} +		break; +	case TGSI_OPCODE_DDY: +		if (mask & (MASK_Z | MASK_W)) { +			tmp = temp(fpc); +			arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, +			      swz(src[0], Z, W, Z, W), none, none); +			arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, +			      swz(tmp, X, Y, X, Y), none, none); +			arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], +			      none, none); +			arith(fpc, 0, MOV, dst, mask, tmp, none, none); +		} else { +			arith(fpc, sat, DDY, dst, mask, src[0], none, none); +		} +		break; +	case TGSI_OPCODE_DP3: +		arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		tmp = temp(fpc); +		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); +		arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), +		      swz(src[1], W, W, W, W), none); +		break; +	case TGSI_OPCODE_DST: +		arith(fpc, sat, DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(fpc, sat, EX2, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FLR: +		arith(fpc, sat, FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(fpc, sat, FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_KILP: +		arith(fpc, 0, KIL, none, 0, none, none, none); +		break; +	case TGSI_OPCODE_KIL: +		dst = nv40_sr(NV40SR_NONE, 0); +		dst.cc_update = 1; +		arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); +		dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; +		arith(fpc, 0, KIL, dst, 0, none, none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(fpc, sat, LG2, dst, mask, src[0], none, none); +		break; +//	case TGSI_OPCODE_LIT: +	case TGSI_OPCODE_LRP: +		tmp = temp(fpc); +		arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); +		arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); +		break; +	case TGSI_OPCODE_MAD: +		arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(fpc, sat, MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_NOISE1: +	case TGSI_OPCODE_NOISE2: +	case TGSI_OPCODE_NOISE3: +	case TGSI_OPCODE_NOISE4: +		arith(fpc, sat, SFL, dst, mask, none, none, none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(fpc); +		arith(fpc, 0, LG2, tmp, MASK_X, +		      swz(src[0], X, X, X, X), none, none); +		arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(fpc, sat, EX2, dst, mask, +		      swz(tmp, X, X, X, X), none, none); +		break; +	case TGSI_OPCODE_RCP: +		arith(fpc, sat, RCP, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_RET: +		assert(0); +		break; +	case TGSI_OPCODE_RFL: +		tmp = temp(fpc); +		arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); +		arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); +		arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, +		      swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); +		arith(fpc, sat, MAD, dst, mask, +		      swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); +		break; +	case TGSI_OPCODE_RSQ: +		tmp = temp(fpc); +		arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, +		      abs(swz(src[0], X, X, X, X)), none, none); +		arith(fpc, sat, EX2, dst, mask, +		      neg(swz(tmp, X, X, X, X)), none, none); +		break; +	case TGSI_OPCODE_SCS: +		if (mask & MASK_X) { +			arith(fpc, sat, COS, dst, MASK_X, +			      swz(src[0], X, X, X, X), none, none); +		} +		if (mask & MASK_Y) { +			arith(fpc, sat, SIN, dst, MASK_Y, +			      swz(src[0], X, X, X, X), none, none); +		} +		break; +	case TGSI_OPCODE_SEQ: +		arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SFL: +		arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGE: +		arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SGT: +		arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SIN: +		arith(fpc, sat, SIN, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_SLE: +		arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SNE: +		arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_STR: +		arith(fpc, sat, STR, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); +		break; +	case TGSI_OPCODE_TEX: +		tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXB: +		tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_TXP: +		tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(fpc); +		arith(fpc, 0, MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(fpc, sat, MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	release_temps(fpc); +	return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV40_FP_OP_INPUT_SRC_POSITION; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_FP_OP_INPUT_SRC_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_FP_OP_INPUT_SRC_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV40_FP_OP_INPUT_SRC_FOGC; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. +						     SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad input semantic\n"); +		return FALSE; +	} + +	fpc->attrib_map[fdec->DeclarationRange.First] = hw; +	return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, +				const struct tgsi_full_declaration *fdec) +{ +	unsigned idx = fdec->DeclarationRange.First; +	unsigned hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = 1; +		break; +	case TGSI_SEMANTIC_COLOR: +		switch (fdec->Semantic.SemanticIndex) { +		case 0: hw = 0; break; +		case 1: hw = 2; break; +		case 2: hw = 3; break; +		case 3: hw = 4; break; +		default: +			NOUVEAU_ERR("bad rcol index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	fpc->r_temps |= (1 << hw); +	return TRUE; +} + +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) +{ +	struct tgsi_parse_context p; +	int high_temp = -1, i; + +	tgsi_parse_init(&p, fpc->fp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; +			fdec = &p.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_INPUT: +				if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) +					goto out_err; +				break; +			case TGSI_FILE_OUTPUT: +				if (!nv40_fragprog_parse_decl_output(fpc, fdec)) +					goto out_err; +				break; +			case TGSI_FILE_TEMPORARY: +				if (fdec->DeclarationRange.Last > high_temp) { +					high_temp = +						fdec->DeclarationRange.Last; +				} +				break; +			default: +				break; +			} +		} +			break; +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			struct tgsi_full_immediate *imm; +			float vals[4]; +			 +			imm = &p.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(fpc->nr_imm < MAX_IMM); + +			vals[0] = imm->u.ImmediateFloat32[0].Float; +			vals[1] = imm->u.ImmediateFloat32[1].Float; +			vals[2] = imm->u.ImmediateFloat32[2].Float; +			vals[3] = imm->u.ImmediateFloat32[3].Float; +			fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); +		} +			break; +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (++high_temp) { +		fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_temp; i++) +			fpc->r_temp[i] = temp(fpc); +		fpc->r_temps_discard = 0; +	} + +	return TRUE; + +out_err: +	if (fpc->r_temp) +		FREE(fpc->r_temp); +	tgsi_parse_free(&p); +	return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, +			struct nv40_fragment_program *fp) +{ +	struct tgsi_parse_context parse; +	struct nv40_fpc *fpc = NULL; + +	fpc = CALLOC(1, sizeof(struct nv40_fpc)); +	if (!fpc) +		return; +	fpc->fp = fp; +	fpc->num_regs = 2; + +	if (!nv40_fragprog_prepare(fpc)) { +		FREE(fpc); +		return; +	} + +	tgsi_parse_init(&parse, fp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; + +			finst = &parse.FullToken.FullInstruction; +			if (!nv40_fragprog_parse_instruction(fpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + +	/* Terminate final instruction */ +	fp->insn[fpc->inst_offset] |= 0x00000001; + +	/* Append NOP + END instruction, may or may not be necessary. */ +	fpc->inst_offset = fp->insn_len; +	grow_insns(fpc, 4); +	fp->insn[fpc->inst_offset + 0] = 0x00000001; +	fp->insn[fpc->inst_offset + 1] = 0x00000000; +	fp->insn[fpc->inst_offset + 2] = 0x00000000; +	fp->insn[fpc->inst_offset + 3] = 0x00000000; +	 +	fp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	if (fpc->r_temp) +		FREE(fpc->r_temp); +	FREE(fpc); +} + +static void +nv40_fragprog_upload(struct nv40_context *nv40, +		     struct nv40_fragment_program *fp) +{ +	struct pipe_winsys *ws = nv40->pipe.winsys; +	const uint32_t le = 1; +	uint32_t *map; +	int i; + +	map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 +	for (i = 0; i < fp->insn_len; i++) { +		fflush(stdout); fflush(stderr); +		NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); +		fflush(stdout); fflush(stderr); +	} +#endif + +	if ((*(const uint8_t *)&le)) { +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = fp->insn[i]; +		} +	} else { +		/* Weird swapping for big-endian chips */ +		for (i = 0; i < fp->insn_len; i++) { +			map[i] = ((fp->insn[i] & 0xffff) << 16) | +				  ((fp->insn[i] >> 16) & 0xffff); +		} +	} + +	ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	struct pipe_buffer *constbuf = +		nv40->constbuf[PIPE_SHADER_FRAGMENT]; +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_stateobj *so; +	boolean new_consts = FALSE; +	int i; + +	if (fp->translated) +		goto update_constants; + +	nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; +	nv40_fragprog_translate(nv40, fp); +	if (!fp->translated) { +		nv40->fallback_swrast |= NV40_NEW_FRAGPROG; +		return FALSE; +	} + +	fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); +	nv40_fragprog_upload(nv40, fp); + +	so = so_new(4, 1); +	so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); +	so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | +		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +		  NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); +	so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); +	so_data  (so, fp->fp_control); +	so_ref(so, &fp->so); + +update_constants: +	if (fp->nr_consts) { +		float *map; +		 +		map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); +		for (i = 0; i < fp->nr_consts; i++) { +			struct nv40_fragment_program_data *fpd = &fp->consts[i]; +			uint32_t *p = &fp->insn[fpd->offset]; +			uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + +			if (!memcmp(p, cb, 4 * sizeof(float))) +				continue; +			memcpy(p, cb, 4 * sizeof(float)); +			new_consts = TRUE; +		} +		ws->buffer_unmap(ws, constbuf); + +		if (new_consts) +			nv40_fragprog_upload(nv40, fp); +	} + +	if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { +		so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, +		      struct nv40_fragment_program *fp) +{ +	if (fp->insn_len) +		FREE(fp->insn); +} + +struct nv40_state_entry nv40_state_fragprog = { +	.validate = nv40_fragprog_validate, +	.dirty = { +		.pipe = NV40_NEW_FRAGPROG, +		.hw = NV40_STATE_FRAGPROG +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 0000000000..0227d22620 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,168 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw)            \ +{                                                                              \ +  TRUE,                                                                        \ +  PIPE_FORMAT_##m,                                                             \ +  NV40TCL_TEX_FORMAT_FORMAT_##tf,                                              \ +  (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y |         \ +   NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w |         \ +   NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y |         \ +   NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w),         \ +  ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) |       \ +   (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw))       \ +} + +struct nv40_texture_format { +	boolean defined; +	uint	pipe; +	int     format; +	int     swizzle; +	int     sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { +	_(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(R5G6B5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0), +	_(L8_UNORM      , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X, 0, 0, 0, 0), +	_(R16_SNORM     , A16     , ZERO, ZERO,   S1,  ONE, X, X, X, Y, 1, 1, 1, 1), +	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X, 0, 0, 0, 0), +	_(A8L8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y, 0, 0, 0, 0), +	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(Z24S8_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0), +	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	_(DXT5_RGBA     , DXT5    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0), +	{}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ +	struct nv40_texture_format *tf = nv40_texture_formats; + +	while (tf->defined) { +		if (tf->pipe == pipe_format) +			return tf; +		tf++; +	} + +	NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); +	return NULL; +} + + +static struct nouveau_stateobj * +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ +	struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; +	struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; +	struct pipe_texture *pt = &nv40mt->base; +	struct nv40_texture_format *tf; +	struct nouveau_stateobj *so; +	uint32_t txf, txs, txp; +	unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + +	tf = nv40_fragtex_format(pt->format); +	if (!tf) +		assert(0); + +	txf  = ps->fmt; +	txf |= tf->format | 0x8000; +	txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + +	if (1) /* XXX */ +		txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + +	switch (pt->target) { +	case PIPE_TEXTURE_CUBE: +		txf |= NV40TCL_TEX_FORMAT_CUBIC; +		/* fall-through */ +	case PIPE_TEXTURE_2D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_2D; +		break; +	case PIPE_TEXTURE_3D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_3D; +		break; +	case PIPE_TEXTURE_1D: +		txf |= NV40TCL_TEX_FORMAT_DIMS_1D; +		break; +	default: +		NOUVEAU_ERR("Unknown target %d\n", pt->target); +		return NULL; +	} + +	if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		txp = 0; +	} else { +		txp  = nv40mt->level[0].pitch; +		txf |= NV40TCL_TEX_FORMAT_LINEAR; +	} + +	txs = tf->swizzle; + +	so = so_new(16, 2); +	so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); +	so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); +	so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, +		  NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); +	so_data  (so, ps->wrap); +	so_data  (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); +	so_data  (so, txs); +	so_data  (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); +	so_data  (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | +		       pt->height[0]); +	so_data  (so, ps->bcol); +	so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); +	so_data  (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + +	return so; +} + +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) +{ +	struct nv40_fragment_program *fp = nv40->fragprog; +	struct nv40_state *state = &nv40->state; +	struct nouveau_stateobj *so; +	unsigned samplers, unit; + +	samplers = state->fp_samplers & ~fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = so_new(2, 0); +		so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); +		so_data  (so, 0); +		so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); +	} + +	samplers = nv40->dirty_samplers & fp->samplers; +	while (samplers) { +		unit = ffs(samplers) - 1; +		samplers &= ~(1 << unit); + +		so = nv40_fragtex_build(nv40, unit); +		so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); +		state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); +	} + +	nv40->state.fp_samplers = fp->samplers; +	return FALSE; +} + +struct nv40_state_entry nv40_state_fragtex = { +	.validate = nv40_fragtex_validate, +	.dirty = { +		.pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, +		.hw = 0 +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 0000000000..638d279aa5 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,227 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *mt) +{ +	struct pipe_texture *pt = &mt->base; +	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; +	uint offset = 0; +	int nr_faces, l, f; +	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | +		                           PIPE_TEXTURE_USAGE_DEPTH_STENCIL | +		                           PIPE_TEXTURE_USAGE_RENDER_TARGET | +		                           PIPE_TEXTURE_USAGE_DISPLAY_TARGET | +		                           PIPE_TEXTURE_USAGE_PRIMARY); + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		nr_faces = 6; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		nr_faces = pt->depth[0]; +	} else { +		nr_faces = 1; +	} + +	for (l = 0; l <= pt->last_level; l++) { +		pt->width[l] = width; +		pt->height[l] = height; +		pt->depth[l] = depth; +		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); +		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + +		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) +			mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); +		else +			mt->level[l].pitch = pt->width[l] * pt->block.size; + +		mt->level[l].image_offset = +			CALLOC(nr_faces, sizeof(unsigned)); + +		width  = MAX2(1, width  >> 1); +		height = MAX2(1, height >> 1); +		depth  = MAX2(1, depth  >> 1); +	} + +	for (f = 0; f < nr_faces; f++) { +		for (l = 0; l < pt->last_level; l++) { +			mt->level[l].image_offset[f] = offset; + +			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && +			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1) +				offset += align(mt->level[l].pitch * pt->height[l], 64); +			else +				offset += mt->level[l].pitch * pt->height[l]; +		} + +		mt->level[l].image_offset[f] = offset; +		offset += mt->level[l].pitch * pt->height[l]; +	} + +	mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ +	struct pipe_winsys *ws = pscreen->winsys; +	struct nv40_miptree *mt; +	unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | +	                     NOUVEAU_BUFFER_USAGE_TEXTURE; + +	mt = MALLOC(sizeof(struct nv40_miptree)); +	if (!mt) +		return NULL; +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; + +	/* Swizzled textures must be POT */ +	if (pt->width[0] & (pt->width[0] - 1) || +	    pt->height[0] & (pt->height[0] - 1)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | +	                     PIPE_TEXTURE_USAGE_DISPLAY_TARGET | +	                     PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +	else { +		switch (pt->format) { +		/* TODO: Figure out which formats can be swizzled */ +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_X8R8G8B8_UNORM: +		case PIPE_FORMAT_R16_SNORM: +		{ +			if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) +				mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +			break; +		} +		default: +			mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; +		} +	} + +	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) +		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + +	nv40_miptree_layout(mt); + +	mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); +	if (!mt->buffer) { +		FREE(mt); +		return NULL; +	} + +	return &mt->base; +} + +static struct pipe_texture * +nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, +		     const unsigned *stride, struct pipe_buffer *pb) +{ +	struct nv40_miptree *mt; + +	/* Only supports 2D, non-mipmapped textures for the moment */ +	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || +	    pt->depth[0] != 1) +		return NULL; + +	mt = CALLOC_STRUCT(nv40_miptree); +	if (!mt) +		return NULL; + +	mt->base = *pt; +	mt->base.refcount = 1; +	mt->base.screen = pscreen; +	mt->level[0].pitch = stride[0]; +	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + +	pipe_buffer_reference(pscreen, &mt->buffer, pb); +	return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ +	struct pipe_texture *pt = *ppt; +	struct nv40_miptree *mt = (struct nv40_miptree *)pt; +	int l; + +	*ppt = NULL; +	if (--pt->refcount) +		return; + +	pipe_buffer_reference(pscreen, &mt->buffer, NULL); +	for (l = 0; l <= pt->last_level; l++) { +		if (mt->level[l].image_offset) +			FREE(mt->level[l].image_offset); +	} + +	FREE(mt); +} + +static struct pipe_surface * +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +			 unsigned face, unsigned level, unsigned zslice, +			 unsigned flags) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)pt; +	struct nv04_surface *ns; + +	ns = CALLOC_STRUCT(nv04_surface); +	if (!ns) +		return NULL; +	pipe_texture_reference(&ns->base.texture, pt); +	ns->base.format = pt->format; +	ns->base.width = pt->width[level]; +	ns->base.height = pt->height[level]; +	ns->base.usage = flags; +	ns->base.status = PIPE_SURFACE_STATUS_DEFINED; +	ns->base.refcount = 1; +	ns->base.face = face; +	ns->base.level = level; +	ns->base.zslice = zslice; +	ns->pitch = mt->level[level].pitch; + +	if (pt->target == PIPE_TEXTURE_CUBE) { +		ns->base.offset = mt->level[level].image_offset[face]; +	} else +	if (pt->target == PIPE_TEXTURE_3D) { +		ns->base.offset = mt->level[level].image_offset[zslice]; +	} else { +		ns->base.offset = mt->level[level].image_offset[0]; +	} + +	return &ns->base; +} + +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, +			 struct pipe_surface **psurface) +{ +	struct pipe_surface *ps = *psurface; + +	*psurface = NULL; +	if (--ps->refcount > 0) +		return; + +	pipe_texture_reference(&ps->texture, NULL); +	FREE(ps); +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ +	pscreen->texture_create = nv40_miptree_create; +	pscreen->texture_blanket = nv40_miptree_blanket; +	pscreen->texture_release = nv40_miptree_release; +	pscreen->get_tex_surface = nv40_miptree_surface_new; +	pscreen->tex_surface_release = nv40_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 0000000000..9b9a43f49d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { +	struct nouveau_resource *object; +	unsigned type; +	boolean ready; +	uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ +	return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ +	struct nv40_query *q; + +	q = CALLOC(1, sizeof(struct nv40_query)); +	q->type = query_type; + +	return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	if (q->object) +		nv40->nvws->res_free(&q->object); +	FREE(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	/* Happens when end_query() is called, then another begin_query() +	 * without querying the result in-between.  For now we'll wait for +	 * the existing query to notify completion, but it could be better. +	 */ +	if (q->object) { +		uint64_t tmp; +		pipe->get_query_result(pipe, pq, 1, &tmp); +	} + +	if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) +		assert(0); +	nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); + +	BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); +	OUT_RING  (1); +	BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); +	OUT_RING  (1); + +	q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); + +	BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); +	OUT_RING  ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | +		   ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); +	FIRE_RING(NULL); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, +		  boolean wait, uint64_t *result) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_query *q = nv40_query(pq); +	struct nouveau_winsys *nvws = nv40->nvws; + +	assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + +	if (!q->ready) { +		unsigned status; + +		status = nvws->notifier_status(nv40->screen->query, +					       q->object->start); +		if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { +			if (wait == FALSE) +				return FALSE; +			nvws->notifier_wait(nv40->screen->query, q->object->start, +					    NV_NOTIFY_STATE_STATUS_COMPLETED, +					    0); +		} + +		q->result = nvws->notifier_retval(nv40->screen->query, +						  q->object->start); +		q->ready = TRUE; +		nvws->res_free(&q->object); +	} + +	*result = q->result; +	return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ +	nv40->pipe.create_query = nv40_query_create; +	nv40->pipe.destroy_query = nv40_query_destroy; +	nv40->pipe.begin_query = nv40_query_begin; +	nv40->pipe.end_query = nv40_query_end; +	nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 0000000000..0d4baefaea --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,304 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct nouveau_device *dev = screen->nvws->channel->device; +	static char buffer[128]; + +	snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); +	return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ +	return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); + +	switch (param) { +	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: +		return 16; +	case PIPE_CAP_NPOT_TEXTURES: +		return 1; +	case PIPE_CAP_TWO_SIDED_STENCIL: +		return 1; +	case PIPE_CAP_GLSL: +		return 0; +	case PIPE_CAP_S3TC: +		return 1; +	case PIPE_CAP_ANISOTROPIC_FILTER: +		return 1; +	case PIPE_CAP_POINT_SPRITE: +		return 1; +	case PIPE_CAP_MAX_RENDER_TARGETS: +		return 4; +	case PIPE_CAP_OCCLUSION_QUERY: +		return 1; +	case PIPE_CAP_TEXTURE_SHADOW_MAP: +		return 1; +	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: +		return 13; +	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: +		return 10; +	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: +		return 13; +	case PIPE_CAP_TEXTURE_MIRROR_CLAMP: +	case PIPE_CAP_TEXTURE_MIRROR_REPEAT: +		return 1; +	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: +		return 0; /* We have 4 - but unsupported currently */ +	case NOUVEAU_CAP_HW_VTXBUF: +		return 1; +	case NOUVEAU_CAP_HW_IDXBUF: +		if (screen->curie->grclass == NV40TCL) +			return 1; +		return 0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0; +	} +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ +	switch (param) { +	case PIPE_CAP_MAX_LINE_WIDTH: +	case PIPE_CAP_MAX_LINE_WIDTH_AA: +		return 10.0; +	case PIPE_CAP_MAX_POINT_WIDTH: +	case PIPE_CAP_MAX_POINT_WIDTH_AA: +		return 64.0; +	case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: +		return 16.0; +	case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: +		return 16.0; +	default: +		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); +		return 0.0; +	} +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, +				     enum pipe_format format, +				     enum pipe_texture_target target, +				     unsigned tex_usage, unsigned geom_flags) +{ +	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM:  +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +			return TRUE; +		default: +			break; +		} +	} else { +		switch (format) { +		case PIPE_FORMAT_A8R8G8B8_UNORM: +		case PIPE_FORMAT_A1R5G5B5_UNORM: +		case PIPE_FORMAT_A4R4G4B4_UNORM: +		case PIPE_FORMAT_R5G6B5_UNORM: +		case PIPE_FORMAT_R16_SNORM: +		case PIPE_FORMAT_L8_UNORM: +		case PIPE_FORMAT_A8_UNORM: +		case PIPE_FORMAT_I8_UNORM: +		case PIPE_FORMAT_A8L8_UNORM: +		case PIPE_FORMAT_Z16_UNORM: +		case PIPE_FORMAT_Z24S8_UNORM: +		case PIPE_FORMAT_DXT1_RGB: +		case PIPE_FORMAT_DXT1_RGBA: +		case PIPE_FORMAT_DXT3_RGBA: +		case PIPE_FORMAT_DXT5_RGBA: +			return TRUE; +		default: +			break; +		} +	} + +	return FALSE; +} + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surf) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; + +	return mt->buffer; +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ +	struct nv40_screen *screen = nv40_screen(pscreen); +	struct nouveau_winsys *nvws = screen->nvws; + +	nvws->res_free(&screen->vp_exec_heap); +	nvws->res_free(&screen->vp_data_heap); +	nvws->res_free(&screen->query_heap); +	nvws->notifier_free(&screen->query); +	nvws->notifier_free(&screen->sync); +	nvws->grobj_free(&screen->curie); + +	FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ +	struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); +	struct nouveau_stateobj *so; +	unsigned curie_class = 0; +	unsigned chipset = nvws->channel->device->chipset; +	int ret; + +	if (!screen) +		return NULL; +	screen->nvws = nvws; + +	/* 2D engine setup */ +	screen->eng2d = nv04_surface_2d_init(nvws); +	screen->eng2d->buf = nv40_surface_buffer; + +	/* 3D object */ +	switch (chipset & 0xf0) { +	case 0x40: +		if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV40TCL; +		else +		if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV44TCL; +		break; +	case 0x60: +		if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) +			curie_class = NV44TCL; +		break; +	} + +	if (!curie_class) { +		NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); +		return NULL; +	} + +	ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); +	if (ret) { +		NOUVEAU_ERR("Error creating 3D object: %d\n", ret); +		return FALSE; +	} + +	/* Notifier for sync purposes */ +	ret = nvws->notifier_alloc(nvws, 1, &screen->sync); +	if (ret) { +		NOUVEAU_ERR("Error creating notifier object: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Query objects */ +	ret = nvws->notifier_alloc(nvws, 32, &screen->query); +	if (ret) { +		NOUVEAU_ERR("Error initialising query objects: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	ret = nvws->res_init(&screen->query_heap, 0, 32); +	if (ret) { +		NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Vtxprog resources */ +	if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || +	    nvws->res_init(&screen->vp_data_heap, 0, 256)) { +		nv40_screen_destroy(&screen->pipe); +		return NULL; +	} + +	/* Static curie initialisation */ +	so = so_new(128, 0); +	so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); +	so_data  (so, screen->sync->handle); +	so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->gart->handle); +	so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); +	so_data  (so, 0); +	so_data  (so, screen->query->handle); +	so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); +	so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); +	so_data  (so, nvws->channel->vram->handle); +	so_data  (so, nvws->channel->vram->handle); + +	so_method(so, screen->curie, 0x1ea4, 3); +	so_data  (so, 0x00000010); +	so_data  (so, 0x01000100); +	so_data  (so, 0xff800006); + +	/* vtxprog output routing */ +	so_method(so, screen->curie, 0x1fc4, 1); +	so_data  (so, 0x06144321); +	so_method(so, screen->curie, 0x1fc8, 2); +	so_data  (so, 0xedcba987); +	so_data  (so, 0x00000021); +	so_method(so, screen->curie, 0x1fd0, 1); +	so_data  (so, 0x00171615); +	so_method(so, screen->curie, 0x1fd4, 1); +	so_data  (so, 0x001b1a19); + +	so_method(so, screen->curie, 0x1ef8, 1); +	so_data  (so, 0x0020ffff); +	so_method(so, screen->curie, 0x1d64, 1); +	so_data  (so, 0x00d30000); +	so_method(so, screen->curie, 0x1e94, 1); +	so_data  (so, 0x00000001); + +	so_emit(nvws, so); +	so_ref(NULL, &so); +	nvws->push_flush(nvws, 0, NULL); + +	screen->pipe.winsys = ws; +	screen->pipe.destroy = nv40_screen_destroy; + +	screen->pipe.get_name = nv40_screen_get_name; +	screen->pipe.get_vendor = nv40_screen_get_vendor; +	screen->pipe.get_param = nv40_screen_get_param; +	screen->pipe.get_paramf = nv40_screen_get_paramf; + +	screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + +	nv40_screen_init_miptree_functions(&screen->pipe); +	nv40_screen_init_transfer_functions(&screen->pipe); +	u_simple_screen_init(&screen->pipe); + +	return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 0000000000..7b503bd207 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,40 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv40_screen { +	struct pipe_screen pipe; + +	struct nouveau_winsys *nvws; + +	unsigned cur_pctx; + +	/* HW graphics objects */ +	struct nv04_surface_2d *eng2d; +	struct nouveau_grobj *curie; +	struct nouveau_notifier *sync; + +	/* Query object resources */ +	struct nouveau_notifier *query; +	struct nouveau_resource *query_heap; + +	/* Vtxprog resources */ +	struct nouveau_resource *vp_exec_heap; +	struct nouveau_resource *vp_data_heap; + +	/* Current 3D state of channel */ +	struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ +	return (struct nv40_screen *)screen; +} + +void +nv40_screen_init_transfer_functions(struct pipe_screen *pscreen); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 0000000000..854dccf548 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,556 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30.  Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + *     In some cases it is possible to put two instructions into one opcode + *     slot.  The rules for when this is OK is not entirely clear to me yet. + * + *     There are separate writemasks and dest temp register fields for each + *     grouping of instructions.  There is however only one field with the + *     ID of a result register.  Writing to temp/result regs is selected by + *     setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + *     The source/dest temp register fields have been extended by 1 bit, to + *     give a total of 32 temporary registers. + * + * Relative Addressing + *     NV40 can use an address register to index into vertex attribute regs. + *     This is done by putting the offset value into INPUT_SRC and setting + *     the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + *     There is a second condition code register on NV40, it's use is enabled + *     by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + *     TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT                                        (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE                        ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT                                       (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1                                 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1                                 (1 << 24) +#define NV40_VP_INST_SRC2_ABS                                          (1 << 23) +#define NV40_VP_INST_SRC1_ABS                                          (1 << 22) +#define NV40_VP_INST_SRC0_ABS                                          (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT                                      15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK                             (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13) +#define NV40_VP_INST_COND_SHIFT                                               10 +#define NV40_VP_INST_COND_MASK                                       (0x7 << 10) +#    define NV40_VP_INST_COND_FL                                               0 +#    define NV40_VP_INST_COND_LT                                               1 +#    define NV40_VP_INST_COND_EQ                                               2 +#    define NV40_VP_INST_COND_LE                                               3 +#    define NV40_VP_INST_COND_GT                                               4 +#    define NV40_VP_INST_COND_NE                                               5 +#    define NV40_VP_INST_COND_GE                                               6 +#    define NV40_VP_INST_COND_TR                                               7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8 +#define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6 +#define NV40_VP_INST_COND_SWZ_Y_MASK                                    (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT                                          4 +#define NV40_VP_INST_COND_SWZ_Z_MASK                                    (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT                                          2 +#define NV40_VP_INST_COND_SWZ_W_MASK                                    (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT                                        2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK                               (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT                                            0 +#define NV40_VP_INST_ADDR_SWZ_MASK                                   (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ +                NV40_VP_INST_INDEX_INPUT | \ +                NV40_VP_INST_COND_REG_SELECT_1 | \ +                NV40_VP_INST_ADDR_REG_SELECT_1 | \ +                NV40_VP_INST_SRC2_ABS | \ +                NV40_VP_INST_SRC1_ABS | \ +                NV40_VP_INST_SRC0_ABS | \ +                NV40_VP_INST_VEC_DEST_TEMP_MASK | \ +                NV40_VP_INST_COND_TEST_ENABLE | \ +                NV40_VP_INST_COND_MASK | \ +                NV40_VP_INST_COND_SWZ_ALL_MASK | \ +                NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22 +#define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22) +#    define NV40_VP_INST_OP_NOP                                             0x00 +#    define NV40_VP_INST_OP_MOV                                             0x01 +#    define NV40_VP_INST_OP_MUL                                             0x02 +#    define NV40_VP_INST_OP_ADD                                             0x03 +#    define NV40_VP_INST_OP_MAD                                             0x04 +#    define NV40_VP_INST_OP_DP3                                             0x05 +#    define NV40_VP_INST_OP_DPH                                             0x06 +#    define NV40_VP_INST_OP_DP4                                             0x07 +#    define NV40_VP_INST_OP_DST                                             0x08 +#    define NV40_VP_INST_OP_MIN                                             0x09 +#    define NV40_VP_INST_OP_MAX                                             0x0A +#    define NV40_VP_INST_OP_SLT                                             0x0B +#    define NV40_VP_INST_OP_SGE                                             0x0C +#    define NV40_VP_INST_OP_ARL                                             0x0D +#    define NV40_VP_INST_OP_FRC                                             0x0E +#    define NV40_VP_INST_OP_FLR                                             0x0F +#    define NV40_VP_INST_OP_SEQ                                             0x10 +#    define NV40_VP_INST_OP_SFL                                             0x11 +#    define NV40_VP_INST_OP_SGT                                             0x12 +#    define NV40_VP_INST_OP_SLE                                             0x13 +#    define NV40_VP_INST_OP_SNE                                             0x14 +#    define NV40_VP_INST_OP_STR                                             0x15 +#    define NV40_VP_INST_OP_SSG                                             0x16 +#    define NV40_VP_INST_OP_ARR                                             0x17 +#    define NV40_VP_INST_OP_ARA                                             0x18 +#    define NV40_VP_INST_OP_TXL                                             0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27 +#define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27) +#    define NV40_VP_INST_OP_NOP                                             0x00 +#    define NV40_VP_INST_OP_MOV                                             0x01 +#    define NV40_VP_INST_OP_RCP                                             0x02 +#    define NV40_VP_INST_OP_RCC                                             0x03 +#    define NV40_VP_INST_OP_RSQ                                             0x04 +#    define NV40_VP_INST_OP_EXP                                             0x05 +#    define NV40_VP_INST_OP_LOG                                             0x06 +#    define NV40_VP_INST_OP_LIT                                             0x07 +#    define NV40_VP_INST_OP_BRA                                             0x09 +#    define NV40_VP_INST_OP_CAL                                             0x0B +#    define NV40_VP_INST_OP_RET                                             0x0C +#    define NV40_VP_INST_OP_LG2                                             0x0D +#    define NV40_VP_INST_OP_EX2                                             0x0E +#    define NV40_VP_INST_OP_SIN                                             0x0F +#    define NV40_VP_INST_OP_COS                                             0x10 +#    define NV40_VP_INST_OP_PUSHA                                           0x13 +#    define NV40_VP_INST_OP_POPA                                            0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT                                          12 +#define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT                                           8 +#define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8) +#    define NV40_VP_INST_IN_POS                                                0 +#    define NV40_VP_INST_IN_WEIGHT                                             1 +#    define NV40_VP_INST_IN_NORMAL                                             2 +#    define NV40_VP_INST_IN_COL0                                               3 +#    define NV40_VP_INST_IN_COL1                                               4 +#    define NV40_VP_INST_IN_FOGC                                               5 +#    define NV40_VP_INST_IN_TC0                                                8 +#    define NV40_VP_INST_IN_TC(n)                                          (8+n) +#define NV40_VP_INST_SRC0H_SHIFT                                               0 +#define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ +                NV40_VP_INST_VEC_OPCODE_MASK | \ +                NV40_VP_INST_SCA_OPCODE_MASK | \ +                NV40_VP_INST_CONST_SRC_MASK  | \ +                NV40_VP_INST_INPUT_SRC_MASK  | \ +                NV40_VP_INST_SRC0H_MASK \ +                ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT                                              23 +#define NV40_VP_INST_SRC0L_MASK                                    (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT                                                6 +#define NV40_VP_INST_SRC1_MASK                                    (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT                                               0 +#define NV40_VP_INST_SRC2H_MASK                                      (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT                                              0 +#define NV40_VP_INST_IADDRH_MASK                                     (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT                                             29 +#define NV40_VP_INST_IADDRL_MASK                                       (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT                                              21 +#define NV40_VP_INST_SRC2L_MASK                                    (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT                                      17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK                              (0xF << 17) +#    define NV40_VP_INST_SCA_WRITEMASK_X                               (1 << 20) +#    define NV40_VP_INST_SCA_WRITEMASK_Y                               (1 << 19) +#    define NV40_VP_INST_SCA_WRITEMASK_Z                               (1 << 18) +#    define NV40_VP_INST_SCA_WRITEMASK_W                               (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT                                      13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK                              (0xF << 13) +#    define NV40_VP_INST_VEC_WRITEMASK_X                               (1 << 16) +#    define NV40_VP_INST_VEC_WRITEMASK_Y                               (1 << 15) +#    define NV40_VP_INST_VEC_WRITEMASK_Z                               (1 << 14) +#    define NV40_VP_INST_VEC_WRITEMASK_W                               (1 << 13) +#define NV40_VP_INST_SCA_RESULT                                        (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT                                       7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK                              (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT                                                2 +#define NV40_VP_INST_DEST_MASK                                         (31 << 2) +#    define NV40_VP_INST_DEST_POS                                              0 +#    define NV40_VP_INST_DEST_COL0                                             1 +#    define NV40_VP_INST_DEST_COL1                                             2 +#    define NV40_VP_INST_DEST_BFC0                                             3 +#    define NV40_VP_INST_DEST_BFC1                                             4 +#    define NV40_VP_INST_DEST_FOGC                                             5 +#    define NV40_VP_INST_DEST_PSZ                                              6 +#    define NV40_VP_INST_DEST_TC0                                              7 +#    define NV40_VP_INST_DEST_TC(n)                                        (7+n) +#    define NV40_VP_INST_DEST_TEMP                                          0x1F +#define NV40_VP_INST_INDEX_CONST                                        (1 << 1) +#define NV40_VP_INST_LAST                                               (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ +                NV40_VP_INST_SRC2L_MASK |\ +                NV40_VP_INST_SCA_WRITEMASK_MASK |\ +                NV40_VP_INST_VEC_WRITEMASK_MASK |\ +                NV40_VP_INST_SCA_DEST_TEMP_MASK |\ +                NV40_VP_INST_DEST_MASK |\ +                NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT                                                9 +#define NV40_VP_SRC0_HIGH_MASK                                        0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK                                         0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT                                               11 +#define NV40_VP_SRC2_HIGH_MASK                                        0x0001F800 +#define NV40_VP_SRC2_LOW_MASK                                         0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE                                             (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT                                               14 +#define NV40_VP_SRC_SWZ_X_MASK                                         (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT                                               12 +#define NV40_VP_SRC_SWZ_Y_MASK                                         (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT                                               10 +#define NV40_VP_SRC_SWZ_Z_MASK                                         (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT                                                8 +#define NV40_VP_SRC_SWZ_W_MASK                                          (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT                                              8 +#define NV40_VP_SRC_SWZ_ALL_MASK                                     (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT                                             2 +#define NV40_VP_SRC_TEMP_SRC_MASK                                    (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT                                             0 +#define NV40_VP_SRC_REG_TYPE_MASK                                       (3 << 0) +#    define NV40_VP_SRC_REG_TYPE_UNK0                                          0 +#    define NV40_VP_SRC_REG_TYPE_TEMP                                          1 +#    define NV40_VP_SRC_REG_TYPE_INPUT                                         2 +#    define NV40_VP_SRC_REG_TYPE_CONST                                         3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + *         0 - Opcode, output reg/mask, ATTRIB source + *         1 - Source 0 + *         2 - Source 1 + *         3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + *                 result.color == R0.xyzw + *                 result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + *  + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords.  As such instructions such as: + *  + *                 ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + *   Loops appear to be fairly expensive on NV40 at least, the proprietary + *   driver goes to a lot of effort to avoid using the native looping + *   instructions.  If the total number of *executed* instructions between + *   REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + *   The maximum loop count is 255. + * + * Conditional execution + *   TODO + *  + * Non-native instructions: + *         LIT + *         LRP - MAD+MAD + *         SUB - ADD, negate second source + *         RSQ - LG2 + EX2 + *         POW - LG2 + MUL + EX2 + *         SCS - COS + SIN + *         XPD + *         DP2 - MUL + ADD + *         NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END                                          (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT                                               1 +#define NV40_FP_OP_OUT_REG_MASK                                        (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF                                         (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE                                    (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT                                               9 +#define NV40_FP_OP_OUTMASK_MASK                                       (0xF << 9) +#    define NV40_FP_OP_OUT_X                                            (1 << 9) +#    define NV40_FP_OP_OUT_Y                                            (1 <<10) +#    define NV40_FP_OP_OUT_Z                                            (1 <<11) +#    define NV40_FP_OP_OUT_W                                            (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT                                            13 +#define NV40_FP_OP_INPUT_SRC_MASK                                     (15 << 13) +#    define NV40_FP_OP_INPUT_SRC_POSITION                                    0x0 +#    define NV40_FP_OP_INPUT_SRC_COL0                                        0x1 +#    define NV40_FP_OP_INPUT_SRC_COL1                                        0x2 +#    define NV40_FP_OP_INPUT_SRC_FOGC                                        0x3 +#    define NV40_FP_OP_INPUT_SRC_TC0                                         0x4 +#    define NV40_FP_OP_INPUT_SRC_TC(n)                                 (0x4 + n) +#    define NV40_FP_OP_INPUT_SRC_FACING                                      0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT                                             17 +#define NV40_FP_OP_TEX_UNIT_MASK                                     (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT                                            22 +#define NV40_FP_OP_PRECISION_MASK                                      (3 << 22) +#   define NV40_FP_PRECISION_FP32                                              0 +#   define NV40_FP_PRECISION_FP16                                              1 +#   define NV40_FP_PRECISION_FX12                                              2 +#define NV40_FP_OP_OPCODE_SHIFT                                               24 +#define NV40_FP_OP_OPCODE_MASK                                      (0x3F << 24) +#        define NV40_FP_OP_OPCODE_NOP                                       0x00 +#        define NV40_FP_OP_OPCODE_MOV                                       0x01 +#        define NV40_FP_OP_OPCODE_MUL                                       0x02 +#        define NV40_FP_OP_OPCODE_ADD                                       0x03 +#        define NV40_FP_OP_OPCODE_MAD                                       0x04 +#        define NV40_FP_OP_OPCODE_DP3                                       0x05 +#        define NV40_FP_OP_OPCODE_DP4                                       0x06 +#        define NV40_FP_OP_OPCODE_DST                                       0x07 +#        define NV40_FP_OP_OPCODE_MIN                                       0x08 +#        define NV40_FP_OP_OPCODE_MAX                                       0x09 +#        define NV40_FP_OP_OPCODE_SLT                                       0x0A +#        define NV40_FP_OP_OPCODE_SGE                                       0x0B +#        define NV40_FP_OP_OPCODE_SLE                                       0x0C +#        define NV40_FP_OP_OPCODE_SGT                                       0x0D +#        define NV40_FP_OP_OPCODE_SNE                                       0x0E +#        define NV40_FP_OP_OPCODE_SEQ                                       0x0F +#        define NV40_FP_OP_OPCODE_FRC                                       0x10 +#        define NV40_FP_OP_OPCODE_FLR                                       0x11 +#        define NV40_FP_OP_OPCODE_KIL                                       0x12 +#        define NV40_FP_OP_OPCODE_PK4B                                      0x13 +#        define NV40_FP_OP_OPCODE_UP4B                                      0x14 +/* DDX/DDY can only write to XY */ +#        define NV40_FP_OP_OPCODE_DDX                                       0x15 +#        define NV40_FP_OP_OPCODE_DDY                                       0x16 +#        define NV40_FP_OP_OPCODE_TEX                                       0x17 +#        define NV40_FP_OP_OPCODE_TXP                                       0x18 +#        define NV40_FP_OP_OPCODE_TXD                                       0x19 +#        define NV40_FP_OP_OPCODE_RCP                                       0x1A +#        define NV40_FP_OP_OPCODE_EX2                                       0x1C +#        define NV40_FP_OP_OPCODE_LG2                                       0x1D +#        define NV40_FP_OP_OPCODE_STR                                       0x20 +#        define NV40_FP_OP_OPCODE_SFL                                       0x21 +#        define NV40_FP_OP_OPCODE_COS                                       0x22 +#        define NV40_FP_OP_OPCODE_SIN                                       0x23 +#        define NV40_FP_OP_OPCODE_PK2H                                      0x24 +#        define NV40_FP_OP_OPCODE_UP2H                                      0x25 +#        define NV40_FP_OP_OPCODE_PK4UB                                     0x27 +#        define NV40_FP_OP_OPCODE_UP4UB                                     0x28 +#        define NV40_FP_OP_OPCODE_PK2US                                     0x29 +#        define NV40_FP_OP_OPCODE_UP2US                                     0x2A +#        define NV40_FP_OP_OPCODE_DP2A                                      0x2E +#        define NV40_FP_OP_OPCODE_TXL                                       0x2F +#        define NV40_FP_OP_OPCODE_TXB                                       0x31 +#        define NV40_FP_OP_OPCODE_DIV                                       0x3A +#        define NV40_FP_OP_OPCODE_UNK_LIT                                   0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +#        define NV40_FP_OP_BRA_OPCODE_BRK                                    0x0 +#        define NV40_FP_OP_BRA_OPCODE_CAL                                    0x1 +#        define NV40_FP_OP_BRA_OPCODE_IF                                     0x2 +#        define NV40_FP_OP_BRA_OPCODE_LOOP                                   0x3 +#        define NV40_FP_OP_BRA_OPCODE_REP                                    0x4 +#        define NV40_FP_OP_BRA_OPCODE_RET                                    0x5 +#define NV40_FP_OP_OUT_SAT                                             (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS                                             (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT                                           27 +#define NV40_FP_OP_COND_SWZ_W_MASK                                     (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT                                           25 +#define NV40_FP_OP_COND_SWZ_Z_MASK                                     (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT                                           23 +#define NV40_FP_OP_COND_SWZ_Y_MASK                                     (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT                                           21 +#define NV40_FP_OP_COND_SWZ_X_MASK                                     (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT                                         21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK                                (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT                                                 18 +#define NV40_FP_OP_COND_MASK                                        (0x07 << 18) +#        define NV40_FP_OP_COND_FL                                             0 +#        define NV40_FP_OP_COND_LT                                             1 +#        define NV40_FP_OP_COND_EQ                                             2 +#        define NV40_FP_OP_COND_LE                                             3 +#        define NV40_FP_OP_COND_GT                                             4 +#        define NV40_FP_OP_COND_NE                                             5 +#        define NV40_FP_OP_COND_GE                                             6 +#        define NV40_FP_OP_COND_TR                                             7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH                                      (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT                                            28 +#define NV40_FP_OP_DST_SCALE_MASK                                      (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X                                                0 +#define NV40_FP_OP_DST_SCALE_2X                                                1 +#define NV40_FP_OP_DST_SCALE_4X                                                2 +#define NV40_FP_OP_DST_SCALE_8X                                                3 +#define NV40_FP_OP_DST_SCALE_INV_2X                                            5 +#define NV40_FP_OP_DST_SCALE_INV_4X                                            6 +#define NV40_FP_OP_DST_SCALE_INV_8X                                            7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT                                            19 +#define NV40_FP_OP_LOOP_INCR_MASK                                   (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT                                           10 +#define NV40_FP_OP_LOOP_INDEX_MASK                                  (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT                                            2 +#define NV40_FP_OP_LOOP_COUNT_MASK                                   (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT                                               2 +#define NV40_FP_OP_ELSE_ID_MASK                                      (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT                                                 2 +#define NV40_FP_OP_IADDR_MASK                                        (0xFF << 2) + +/* SRC1 REP + *   I have no idea why there are 3 count values here..  but they + *   have always been filled with the same value in my tests so + *   far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT                                            2 +#define NV40_FP_OP_REP_COUNT1_MASK                                   (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT                                           10 +#define NV40_FP_OP_REP_COUNT2_MASK                                  (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT                                           19 +#define NV40_FP_OP_REP_COUNT3_MASK                                  (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT                                                2 +#define NV40_FP_OP_END_ID_MASK                                       (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT                                         (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT                                           19 +#define NV40_FP_OP_ADDR_INDEX_MASK                                   (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT                                                 0 +#define NV40_FP_REG_TYPE_MASK                                           (3 << 0) +#        define NV40_FP_REG_TYPE_TEMP                                          0 +#        define NV40_FP_REG_TYPE_INPUT                                         1 +#        define NV40_FP_REG_TYPE_CONST                                         2 +#define NV40_FP_REG_SRC_SHIFT                                                  2 +#define NV40_FP_REG_SRC_MASK                                           (63 << 2) +#define NV40_FP_REG_SRC_HALF                                            (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT                                              9 +#define NV40_FP_REG_SWZ_ALL_MASK                                      (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT                                                9 +#define NV40_FP_REG_SWZ_X_MASK                                          (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT                                               11 +#define NV40_FP_REG_SWZ_Y_MASK                                         (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT                                               13 +#define NV40_FP_REG_SWZ_Z_MASK                                         (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT                                               15 +#define NV40_FP_REG_SWZ_W_MASK                                         (3 << 15) +#        define NV40_FP_SWIZZLE_X                                              0 +#        define NV40_FP_SWIZZLE_Y                                              1 +#        define NV40_FP_SWIZZLE_Z                                              2 +#        define NV40_FP_SWIZZLE_W                                              3 +#define NV40_FP_REG_NEGATE                                             (1 << 17) + +#ifndef NV40_SHADER_NO_FUCKEDNESS +#define NV40SR_NONE	0 +#define NV40SR_OUTPUT	1 +#define NV40SR_INPUT	2 +#define NV40SR_TEMP	3 +#define NV40SR_CONST	4 + +struct nv40_sreg { +	int type; +	int index; + +	int dst_scale; + +	int negate; +	int abs; +	int swz[4]; + +	int cc_update; +	int cc_update_reg; +	int cc_test; +	int cc_test_reg; +	int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ +	struct nv40_sreg temp = { +		.type = type, +		.index = index, +		.dst_scale = DEF_SCALE, +		.abs = 0, +		.negate = 0, +		.swz = { 0, 1, 2, 3 }, +		.cc_update = 0, +		.cc_update_reg = 0, +		.cc_test = DEF_CTEST, +		.cc_test_reg = 0, +		.cc_swz = { 0, 1, 2, 3 }, +	}; +	return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ +	struct nv40_sreg dst = src; + +	dst.swz[SWZ_X] = src.swz[x]; +	dst.swz[SWZ_Y] = src.swz[y]; +	dst.swz[SWZ_Z] = src.swz[z]; +	dst.swz[SWZ_W] = src.swz[w]; +	return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ +	src.negate = !src.negate; +	return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ +	src.abs = 1; +	return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ +	src.dst_scale = scale; +	return src; +} +#endif + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 0000000000..2eff25aa83 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,740 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, +			const struct pipe_blend_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); +	struct nouveau_stateobj *so = so_new(16, 0); + +	if (cso->blend_enable) { +		so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); +		so_data  (so, 1); +		so_data  (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | +			       nvgl_blend_func(cso->rgb_src_factor)); +		so_data  (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | +			      nvgl_blend_func(cso->rgb_dst_factor)); +		so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); +		so_data  (so, nvgl_blend_eqn(cso->alpha_func) << 16 | +			      nvgl_blend_eqn(cso->rgb_func)); +	} else { +		so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, curie, NV40TCL_COLOR_MASK, 1); +	so_data  (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | +		       ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | +		       ((cso->colormask & PIPE_MASK_G) ? (0x01 <<  8) : 0) | +		       ((cso->colormask & PIPE_MASK_B) ? (0x01 <<  0) : 0))); + +	if (cso->logicop_enable) { +		so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); +		so_data  (so, 1); +		so_data  (so, nvgl_logicop_func(cso->logicop_func)); +	} else { +		so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); +	so_data  (so, cso->dither ? 1 : 0); + +	so_ref(so, &bso->so); +	bso->pipe = *cso; +	return (void *)bso; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->blend = hwcso; +	nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_blend_state *bso = hwcso; + +	so_ref(NULL, &bso->so); +	FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { +	unsigned ret; + +	switch (wrap) { +	case PIPE_TEX_WRAP_REPEAT: +		ret = NV40TCL_TEX_WRAP_S_REPEAT; +		break; +	case PIPE_TEX_WRAP_MIRROR_REPEAT: +		ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_EDGE: +		ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +		ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_CLAMP: +		ret = NV40TCL_TEX_WRAP_S_CLAMP; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; +		break; +	case PIPE_TEX_WRAP_MIRROR_CLAMP: +		ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; +		break; +	default: +		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); +		ret = NV40TCL_TEX_WRAP_S_REPEAT; +		break; +	} + +	return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, +			  const struct pipe_sampler_state *cso) +{ +	struct nv40_sampler_state *ps; +	uint32_t filter = 0; + +	ps = MALLOC(sizeof(struct nv40_sampler_state)); + +	ps->fmt = 0; +	if (!cso->normalized_coords) +		ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + +	ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | +		    (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | +		    (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + +	ps->en = 0; +	if (cso->max_anisotropy >= 2.0) { +		/* no idea, binary driver sets it, works without it.. meh.. */ +		ps->wrap |= (1 << 5); + +		if (cso->max_anisotropy >= 16.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; +		} else +		if (cso->max_anisotropy >= 12.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; +		} else +		if (cso->max_anisotropy >= 10.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; +		} else +		if (cso->max_anisotropy >= 8.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; +		} else +		if (cso->max_anisotropy >= 6.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; +		} else +		if (cso->max_anisotropy >= 4.0) { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; +		} else { +			ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; +		} +	} + +	switch (cso->mag_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; +		break; +	} + +	switch (cso->min_img_filter) { +	case PIPE_TEX_FILTER_LINEAR: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; +			break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; +			break; +		} +		break; +	case PIPE_TEX_FILTER_NEAREST: +	default: +		switch (cso->min_mip_filter) { +		case PIPE_TEX_MIPFILTER_NEAREST: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; +		break; +		case PIPE_TEX_MIPFILTER_LINEAR: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; +			break; +		case PIPE_TEX_MIPFILTER_NONE: +		default: +			filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; +			break; +		} +		break; +	} + +	ps->filt = filter; + +	{ +		float limit; + +		limit = CLAMP(cso->lod_bias, -16.0, 15.0); +		ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + +		limit = CLAMP(cso->max_lod, 0.0, 15.0); +		ps->en |= (int)(limit * 256.0) << 7; + +		limit = CLAMP(cso->min_lod, 0.0, 15.0); +		ps->en |= (int)(limit * 256.0) << 19; +	} + + +	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { +		switch (cso->compare_func) { +		case PIPE_FUNC_NEVER: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; +			break; +		case PIPE_FUNC_GREATER: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; +			break; +		case PIPE_FUNC_EQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; +			break; +		case PIPE_FUNC_GEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; +			break; +		case PIPE_FUNC_LESS: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; +			break; +		case PIPE_FUNC_NOTEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; +			break; +		case PIPE_FUNC_LEQUAL: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; +			break; +		case PIPE_FUNC_ALWAYS: +			ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; +			break; +		default: +			break; +		} +	} + +	ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | +		    (float_to_ubyte(cso->border_color[0]) << 16) | +		    (float_to_ubyte(cso->border_color[1]) <<  8) | +		    (float_to_ubyte(cso->border_color[2]) <<  0)); + +	return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		nv40->tex_sampler[unit] = sampler[unit]; +		nv40->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv40->nr_samplers; unit++) { +		nv40->tex_sampler[unit] = NULL; +		nv40->dirty_samplers |= (1 << unit); +	} + +	nv40->nr_samplers = nr; +	nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	FREE(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, +			 struct pipe_texture **miptree) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	unsigned unit; + +	for (unit = 0; unit < nr; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv40->tex_miptree[unit], miptree[unit]); +		nv40->dirty_samplers |= (1 << unit); +	} + +	for (unit = nr; unit < nv40->nr_textures; unit++) { +		pipe_texture_reference((struct pipe_texture **) +				       &nv40->tex_miptree[unit], NULL); +		nv40->dirty_samplers |= (1 << unit); +	} + +	nv40->nr_textures = nr; +	nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, +			     const struct pipe_rasterizer_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *curie = nv40->screen->curie; + +	/*XXX: ignored: +	 * 	light_twoside +	 * 	point_smooth -nohw +	 * 	multisample +	 */ + +	so_method(so, curie, NV40TCL_SHADE_MODEL, 1); +	so_data  (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : +				       NV40TCL_SHADE_MODEL_SMOOTH); + +	so_method(so, curie, NV40TCL_LINE_WIDTH, 2); +	so_data  (so, (unsigned char)(cso->line_width * 8.0) & 0xff); +	so_data  (so, cso->line_smooth ? 1 : 0); +	so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); +	so_data  (so, cso->line_stipple_enable ? 1 : 0); +	so_data  (so, (cso->line_stipple_pattern << 16) | +		       cso->line_stipple_factor); + +	so_method(so, curie, NV40TCL_POINT_SIZE, 1); +	so_data  (so, fui(cso->point_size)); + +	so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); +	if (cso->front_winding == PIPE_WINDING_CCW) { +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV40TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV40TCL_FRONT_FACE_CCW); +	} else { +		so_data(so, nvgl_polygon_mode(cso->fill_cw)); +		so_data(so, nvgl_polygon_mode(cso->fill_ccw)); +		switch (cso->cull_mode) { +		case PIPE_WINDING_CCW: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		case PIPE_WINDING_CW: +			so_data(so, NV40TCL_CULL_FACE_FRONT); +			break; +		case PIPE_WINDING_BOTH: +			so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); +			break; +		default: +			so_data(so, NV40TCL_CULL_FACE_BACK); +			break; +		} +		so_data(so, NV40TCL_FRONT_FACE_CW); +	} +	so_data(so, cso->poly_smooth ? 1 : 0); +	so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + +	so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +	so_data  (so, cso->poly_stipple_enable ? 1 : 0); + +	so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || +	    (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) +		so_data(so, 1); +	else +		so_data(so, 0); +	if (cso->offset_cw || cso->offset_ccw) { +		so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); +		so_data  (so, fui(cso->offset_scale)); +		so_data  (so, fui(cso->offset_units * 2)); +	} + +	so_method(so, curie, NV40TCL_POINT_SPRITE, 1); +	if (cso->point_sprite) { +		unsigned psctl = (1 << 0), i; + +		for (i = 0; i < 8; i++) { +			if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) +				psctl |= (1 << (8 + i)); +		} + +		so_data(so, psctl); +	} else { +		so_data(so, 0); +	} + +	so_ref(so, &rsso->so); +	rsso->pipe = *cso; +	return (void *)rsso; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->rasterizer = hwcso; +	nv40->dirty |= NV40_NEW_RAST; +	nv40->draw_dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_rasterizer_state *rsso = hwcso; + +	so_ref(NULL, &rsso->so); +	FREE(rsso); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, +			const struct pipe_depth_stencil_alpha_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); +	struct nouveau_stateobj *so = so_new(32, 0); +	struct nouveau_grobj *curie = nv40->screen->curie; + +	so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); +	so_data  (so, nvgl_comparison_op(cso->depth.func)); +	so_data  (so, cso->depth.writemask ? 1 : 0); +	so_data  (so, cso->depth.enabled ? 1 : 0); + +	so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); +	so_data  (so, cso->alpha.enabled ? 1 : 0); +	so_data  (so, nvgl_comparison_op(cso->alpha.func)); +	so_data  (so, float_to_ubyte(cso->alpha.ref_value)); + +	if (cso->stencil[0].enabled) { +		so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); +		so_data  (so, cso->stencil[0].enabled ? 1 : 0); +		so_data  (so, cso->stencil[0].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[0].func)); +		so_data  (so, cso->stencil[0].ref_value); +		so_data  (so, cso->stencil[0].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); +	} else { +		so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); +		so_data  (so, 0); +	} + +	if (cso->stencil[1].enabled) { +		so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); +		so_data  (so, cso->stencil[1].enabled ? 1 : 0); +		so_data  (so, cso->stencil[1].writemask); +		so_data  (so, nvgl_comparison_op(cso->stencil[1].func)); +		so_data  (so, cso->stencil[1].ref_value); +		so_data  (so, cso->stencil[1].valuemask); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].fail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); +		so_data  (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); +	} else { +		so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &zsaso->so); +	zsaso->pipe = *cso; +	return (void *)zsaso; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->zsa = hwcso; +	nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_zsa_state *zsaso = hwcso; + +	so_ref(NULL, &zsaso->so); +	FREE(zsaso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_vertex_program *vp; + +	vp = CALLOC(1, sizeof(struct nv40_vertex_program)); +	vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +	vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); + +	return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->vertprog = hwcso; +	nv40->dirty |= NV40_NEW_VERTPROG; +	nv40->draw_dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_vertex_program *vp = hwcso; + +	draw_delete_vertex_shader(nv40->draw, vp->draw); +	nv40_vertprog_destroy(nv40, vp); +	FREE((void*)vp->pipe.tokens); +	FREE(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, +		     const struct pipe_shader_state *cso) +{ +	struct nv40_fragment_program *fp; + +	fp = CALLOC(1, sizeof(struct nv40_fragment_program)); +	fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + +	tgsi_scan_shader(fp->pipe.tokens, &fp->info); + +	return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->fragprog = hwcso; +	nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv40_fragment_program *fp = hwcso; + +	nv40_fragprog_destroy(nv40, fp); +	FREE((void*)fp->pipe.tokens); +	FREE(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, +		     const struct pipe_blend_color *bcol) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->blend_colour = *bcol; +	nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, +		    const struct pipe_clip_state *clip) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->clip = *clip; +	nv40->dirty |= NV40_NEW_UCP; +	nv40->draw_dirty |= NV40_NEW_UCP; +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, +			 const struct pipe_constant_buffer *buf ) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->constbuf[shader] = buf->buffer; +	nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + +	if (shader == PIPE_SHADER_VERTEX) { +		nv40->dirty |= NV40_NEW_VERTPROG; +	} else +	if (shader == PIPE_SHADER_FRAGMENT) { +		nv40->dirty |= NV40_NEW_FRAGPROG; +	} +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, +			   const struct pipe_framebuffer_state *fb) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->framebuffer = *fb; +	nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, +			 const struct pipe_poly_stipple *stipple) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->stipple, stipple->stipple, 4 * 32); +	nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, +		       const struct pipe_scissor_state *s) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->scissor = *s; +	nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, +			const struct pipe_viewport_state *vpt) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->viewport = *vpt; +	nv40->dirty |= NV40_NEW_VIEWPORT; +	nv40->draw_dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, +			const struct pipe_vertex_buffer *vb) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); +	nv40->vtxbuf_nr = count; + +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, +			 const struct pipe_vertex_element *ve) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); +	nv40->vtxelt_nr = count; + +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +	struct nv40_context *nv40 = nv40_context(pipe); + +	nv40->edgeflags = bitfield; +	nv40->dirty |= NV40_NEW_ARRAYS; +	nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ +	nv40->pipe.create_blend_state = nv40_blend_state_create; +	nv40->pipe.bind_blend_state = nv40_blend_state_bind; +	nv40->pipe.delete_blend_state = nv40_blend_state_delete; + +	nv40->pipe.create_sampler_state = nv40_sampler_state_create; +	nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; +	nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; +	nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + +	nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; +	nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; +	nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + +	nv40->pipe.create_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_create; +	nv40->pipe.bind_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_bind; +	nv40->pipe.delete_depth_stencil_alpha_state = +		nv40_depth_stencil_alpha_state_delete; + +	nv40->pipe.create_vs_state = nv40_vp_state_create; +	nv40->pipe.bind_vs_state = nv40_vp_state_bind; +	nv40->pipe.delete_vs_state = nv40_vp_state_delete; + +	nv40->pipe.create_fs_state = nv40_fp_state_create; +	nv40->pipe.bind_fs_state = nv40_fp_state_bind; +	nv40->pipe.delete_fs_state = nv40_fp_state_delete; + +	nv40->pipe.set_blend_color = nv40_set_blend_color; +	nv40->pipe.set_clip_state = nv40_set_clip_state; +	nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; +	nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; +	nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; +	nv40->pipe.set_scissor_state = nv40_set_scissor_state; +	nv40->pipe.set_viewport_state = nv40_set_viewport_state; + +	nv40->pipe.set_edgeflags = nv40_set_edgeflags; +	nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; +	nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 0000000000..8a9d8c8fdf --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,88 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv40_sampler_state { +	uint32_t fmt; +	uint32_t wrap; +	uint32_t en; +	uint32_t filt; +	uint32_t bcol; +}; + +struct nv40_vertex_program_exec { +	uint32_t data[4]; +	boolean has_branch_offset; +	int const_index; +}; + +struct nv40_vertex_program_data { +	int index; /* immediates == -1 */ +	float value[4]; +}; + +struct nv40_vertex_program { +	struct pipe_shader_state pipe; + +	struct draw_vertex_shader *draw; + +	boolean translated; + +	struct pipe_clip_state ucp; + +	struct nv40_vertex_program_exec *insns; +	unsigned nr_insns; +	struct nv40_vertex_program_data *consts; +	unsigned nr_consts; + +	struct nouveau_resource *exec; +	unsigned exec_start; +	struct nouveau_resource *data; +	unsigned data_start; +	unsigned data_start_min; + +	uint32_t ir; +	uint32_t or; +	uint32_t clip_ctrl; +	struct nouveau_stateobj *so; +}; + +struct nv40_fragment_program_data { +	unsigned offset; +	unsigned index; +}; + +struct nv40_fragment_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	boolean translated; +	unsigned samplers; + +	uint32_t *insn; +	int       insn_len; + +	struct nv40_fragment_program_data *consts; +	unsigned nr_consts; + +	struct pipe_buffer *buffer; + +	uint32_t fp_control; +	struct nouveau_stateobj *so; +}; + +struct nv40_miptree { +	struct pipe_texture base; + +	struct pipe_buffer *buffer; +	uint total_size; + +	struct { +		uint pitch; +		uint *image_offset; +	} level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 0000000000..95e6d7394f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { +	.validate = nv40_state_blend_validate, +	.dirty = { +		.pipe = NV40_NEW_BLEND, +		.hw = NV40_STATE_BLEND +	} +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ +	struct nouveau_stateobj *so = so_new(2, 0); +	struct pipe_blend_color *bcol = &nv40->blend_colour; + +	so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); +	so_data  (so, ((float_to_ubyte(bcol->color[3]) << 24) | +		       (float_to_ubyte(bcol->color[0]) << 16) | +		       (float_to_ubyte(bcol->color[1]) <<  8) | +		       (float_to_ubyte(bcol->color[2]) <<  0))); + +	so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { +	.validate = nv40_state_blend_colour_validate, +	.dirty = { +		.pipe = NV40_NEW_BCOL, +		.hw = NV40_STATE_BCOL +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 0000000000..ce859def10 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,184 @@ +#include "nv40_context.h" +#include "nv40_state.h" +#include "draw/draw_context.h" + +static struct nv40_state_entry *render_states[] = { +	&nv40_state_framebuffer, +	&nv40_state_rasterizer, +	&nv40_state_scissor, +	&nv40_state_stipple, +	&nv40_state_fragprog, +	&nv40_state_fragtex, +	&nv40_state_vertprog, +	&nv40_state_blend, +	&nv40_state_blend_colour, +	&nv40_state_zsa, +	&nv40_state_viewport, +	&nv40_state_vbo, +	NULL +}; + +static struct nv40_state_entry *swtnl_states[] = { +	&nv40_state_framebuffer, +	&nv40_state_rasterizer, +	&nv40_state_scissor, +	&nv40_state_stipple, +	&nv40_state_fragprog, +	&nv40_state_fragtex, +	&nv40_state_vertprog, +	&nv40_state_blend, +	&nv40_state_blend_colour, +	&nv40_state_zsa, +	&nv40_state_viewport, +	&nv40_state_vtxfmt, +	NULL +}; + +static void +nv40_state_do_validate(struct nv40_context *nv40, +		       struct nv40_state_entry **states) +{ +	const struct pipe_framebuffer_state *fb = &nv40->framebuffer; +	unsigned i; + +	for (i = 0; i < fb->nr_cbufs; i++) +		fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; +	if (fb->zsbuf) +		fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + +	while (*states) { +		struct nv40_state_entry *e = *states; + +		if (nv40->dirty & e->dirty.pipe) { +			if (e->validate(nv40)) +				nv40->state.dirty |= (1ULL << e->dirty.hw); +		} + +		states++; +	} +	nv40->dirty = 0; +} + +void +nv40_state_emit(struct nv40_context *nv40) +{ +	struct nv40_state *state = &nv40->state; +	struct nv40_screen *screen = nv40->screen; +	unsigned i, samplers; +	uint64_t states; + +	if (nv40->pctx_id != screen->cur_pctx) { +		for (i = 0; i < NV40_STATE_MAX; i++) { +			if (state->hw[i] && screen->state[i] != state->hw[i]) +				state->dirty |= (1ULL << i); +		} + +		screen->cur_pctx = nv40->pctx_id; +	} + +	for (i = 0, states = state->dirty; states; i++) { +		if (!(states & (1ULL << i))) +			continue; +		so_ref (state->hw[i], &nv40->screen->state[i]); +		if (state->hw[i]) +			so_emit(nv40->nvws, nv40->screen->state[i]); +		states &= ~(1ULL << i); +	} + +	if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | +			    (1ULL << NV40_STATE_FRAGTEX0))) { +		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); +		OUT_RING  (2); +		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); +		OUT_RING  (1); +	} + +	state->dirty = 0; + +	so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); +	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { +		if (!(samplers & (1 << i))) +			continue; +		so_emit_reloc_markers(nv40->nvws, +				      state->hw[NV40_STATE_FRAGTEX0+i]); +		samplers &= ~(1ULL << i); +	} +	so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); +	if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) +		so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} + +boolean +nv40_state_validate(struct nv40_context *nv40) +{ +	boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + +	if (nv40->render_mode != HW) { +		/* Don't even bother trying to go back to hw if none +		 * of the states that caused swtnl previously have changed. +		 */ +		if ((nv40->fallback_swtnl & nv40->dirty) +				!= nv40->fallback_swtnl) +			return FALSE; + +		/* Attempt to go to hwtnl again */ +		nv40->pipe.flush(&nv40->pipe, 0, NULL); +		nv40->dirty |= (NV40_NEW_VIEWPORT | +				NV40_NEW_VERTPROG | +				NV40_NEW_ARRAYS); +		nv40->render_mode = HW; +	} + +	nv40_state_do_validate(nv40, render_states); +	if (nv40->fallback_swtnl || nv40->fallback_swrast) +		return FALSE; +	 +	if (was_sw) +		NOUVEAU_ERR("swtnl->hw\n"); + +	return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ +	struct draw_context *draw = nv40->draw; + +	/* Setup for swtnl */ +	if (nv40->render_mode == HW) { +		NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); +		nv40->pipe.flush(&nv40->pipe, 0, NULL); +		nv40->dirty |= (NV40_NEW_VIEWPORT | +				NV40_NEW_VERTPROG | +				NV40_NEW_ARRAYS); +		nv40->render_mode = SWTNL; +	} + +	if (nv40->draw_dirty & NV40_NEW_VERTPROG) +		draw_bind_vertex_shader(draw, nv40->vertprog->draw); + +	if (nv40->draw_dirty & NV40_NEW_RAST) +		draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + +	if (nv40->draw_dirty & NV40_NEW_UCP) +		draw_set_clip_state(draw, &nv40->clip); + +	if (nv40->draw_dirty & NV40_NEW_VIEWPORT) +		draw_set_viewport_state(draw, &nv40->viewport); + +	if (nv40->draw_dirty & NV40_NEW_ARRAYS) { +		draw_set_edgeflags(draw, nv40->edgeflags); +		draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); +		draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);	 +	} + +	nv40_state_do_validate(nv40, swtnl_states); +	if (nv40->fallback_swrast) { +		NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); +		return FALSE; +	} + +	nv40->draw_dirty = 0; +	return TRUE; +} + diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 0000000000..5ebd3a1a56 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,162 @@ +#include "nv40_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; +	return mt->buffer; +} + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ +	struct pipe_framebuffer_state *fb = &nv40->framebuffer; +	struct nv04_surface *rt[4], *zeta; +	uint32_t rt_enable, rt_format; +	int i, colour_format = 0, zeta_format = 0; +	struct nouveau_stateobj *so = so_new(64, 10); +	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; +	unsigned w = fb->width; +	unsigned h = fb->height; + +	rt_enable = 0; +	for (i = 0; i < fb->nr_cbufs; i++) { +		if (colour_format) { +			assert(colour_format == fb->cbufs[i]->format); +		} else { +			colour_format = fb->cbufs[i]->format; +			rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); +			rt[i] = (struct nv04_surface *)fb->cbufs[i]; +		} +	} + +	if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | +			 NV40TCL_RT_ENABLE_COLOR3)) +		rt_enable |= NV40TCL_RT_ENABLE_MRT; + +	if (fb->zsbuf) { +		zeta_format = fb->zsbuf->format; +		zeta = (struct nv04_surface *)fb->zsbuf; +	} + +	if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { +		assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); +		for (i = 1; i < fb->nr_cbufs; i++) +			assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + +		rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | +		            log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | +		            log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; +	} +	else +		rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + +	switch (colour_format) { +	case PIPE_FORMAT_A8R8G8B8_UNORM: +	case 0: +		rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; +		break; +	case PIPE_FORMAT_R5G6B5_UNORM: +		rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; +		break; +	default: +		assert(0); +	} + +	switch (zeta_format) { +	case PIPE_FORMAT_Z16_UNORM: +		rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; +		break; +	case PIPE_FORMAT_Z24S8_UNORM: +	case 0: +		rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; +		break; +	default: +		assert(0); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); +		so_reloc (so, nv40_surface_buffer(&rt[0]->base), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); +		so_data  (so, rt[0]->pitch); +		so_reloc (so, nv40_surface_buffer(&rt[0]->base), rt[0]->base.offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); +		so_reloc (so, nv40_surface_buffer(&rt[1]->base), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); +		so_reloc (so, nv40_surface_buffer(&rt[1]->base), rt[1]->base.offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_data  (so, rt[1]->pitch); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); +		so_reloc (so, nv40_surface_buffer(&rt[2]->base), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(&rt[2]->base), rt[2]->base.offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); +		so_data  (so, rt[2]->pitch); +	} + +	if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); +		so_reloc (so, nv40_surface_buffer(&rt[3]->base), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(&rt[3]->base), rt[3]->base.offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); +		so_data  (so, rt[3]->pitch); +	} + +	if (zeta_format) { +		so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); +		so_reloc (so, nv40_surface_buffer(&zeta->base), 0, rt_flags | NOUVEAU_BO_OR, +			  nv40->nvws->channel->vram->handle, +			  nv40->nvws->channel->gart->handle); +		so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); +		so_reloc (so, nv40_surface_buffer(&zeta->base), zeta->base.offset, rt_flags | +			  NOUVEAU_BO_LOW, 0, 0); +		so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); +		so_data  (so, zeta->pitch); +	} + +	so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); +	so_data  (so, rt_enable); +	so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_data  (so, rt_format); +	so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); +	so_data  (so, (w << 16) | 0); +	so_data  (so, (h << 16) | 0); +	so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); +	so_data  (so, ((w - 1) << 16) | 0); +	so_data  (so, ((h - 1) << 16) | 0); +	so_method(so, nv40->screen->curie, 0x1d88, 1); +	so_data  (so, (1 << 12) | h); + +	so_ref(so, &nv40->state.hw[NV40_STATE_FB]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { +	.validate = nv40_state_framebuffer_validate, +	.dirty = { +		.pipe = NV40_NEW_FB, +		.hw = NV40_STATE_FB +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 0000000000..9ecda5990f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->rasterizer->so, +	       &nv40->state.hw[NV40_STATE_RAST]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { +	.validate = nv40_state_rasterizer_validate, +	.dirty = { +		.pipe = NV40_NEW_RAST, +		.hw = NV40_STATE_RAST +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 0000000000..285239ef41 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ +	struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; +	struct pipe_scissor_state *s = &nv40->scissor; +	struct nouveau_stateobj *so; + +	if (nv40->state.hw[NV40_STATE_SCISSOR] && +	    (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) +		return FALSE; +	nv40->state.scissor_enabled = rast->scissor; + +	so = so_new(3, 0); +	so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); +	if (nv40->state.scissor_enabled) { +		so_data  (so, ((s->maxx - s->minx) << 16) | s->minx); +		so_data  (so, ((s->maxy - s->miny) << 16) | s->miny); +	} else { +		so_data  (so, 4096 << 16); +		so_data  (so, 4096 << 16); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { +	.validate = nv40_state_scissor_validate, +	.dirty = { +		.pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, +		.hw = NV40_STATE_SCISSOR +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 0000000000..b51024ad9b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ +	struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nouveau_stateobj *so; + +	if (nv40->state.hw[NV40_STATE_STIPPLE] && +	   (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) +		return FALSE; + +	if (rast->poly_stipple_enable) { +		unsigned i; + +		so = so_new(35, 0); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 1); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); +		for (i = 0; i < 32; i++) +			so_data(so, nv40->stipple[i]); +	} else { +		so = so_new(2, 0); +		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); +		so_data  (so, 0); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { +	.validate = nv40_state_stipple_validate, +	.dirty = { +		.pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, +		.hw = NV40_STATE_STIPPLE, +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 0000000000..869a55b405 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,67 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ +	struct pipe_viewport_state *vpt = &nv40->viewport; +	struct nouveau_stateobj *so; +	unsigned bypass; + +	if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) +		bypass = 0; +	else +		bypass = 1; + +	if (nv40->state.hw[NV40_STATE_VIEWPORT] && +	    (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && +	    nv40->state.viewport_bypass == bypass) +		return FALSE; +	nv40->state.viewport_bypass = bypass; + +	so = so_new(11, 0); +	if (!bypass) { +		so_method(so, nv40->screen->curie, +			  NV40TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(vpt->translate[0])); +		so_data  (so, fui(vpt->translate[1])); +		so_data  (so, fui(vpt->translate[2])); +		so_data  (so, fui(vpt->translate[3])); +		so_data  (so, fui(vpt->scale[0])); +		so_data  (so, fui(vpt->scale[1])); +		so_data  (so, fui(vpt->scale[2])); +		so_data  (so, fui(vpt->scale[3])); +		so_method(so, nv40->screen->curie, 0x1d78, 1); +		so_data  (so, 1); +	} else { +		so_method(so, nv40->screen->curie, +			  NV40TCL_VIEWPORT_TRANSLATE_X, 8); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(0.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(1.0)); +		so_data  (so, fui(0.0)); +		/* Not entirely certain what this is yet.  The DDX uses this +		 * value also as it fixes rendering when you pass +		 * pre-transformed vertices to the GPU.  My best gusss is that +		 * this bypasses some culling/clipping stage.  Might be worth +		 * noting that points/lines are uneffected by whatever this +		 * value fixes, only filled polygons are effected. +		 */ +		so_method(so, nv40->screen->curie, 0x1d78, 1); +		so_data  (so, 0x110); +	} + +	so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { +	.validate = nv40_state_viewport_validate, +	.dirty = { +		.pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, +		.hw = NV40_STATE_VIEWPORT +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 0000000000..fb760677c8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ +	so_ref(nv40->zsa->so, +	       &nv40->state.hw[NV40_STATE_ZSA]); +	return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { +	.validate = nv40_state_zsa_validate, +	.dirty = { +		.pipe = NV40_NEW_ZSA, +		.hw = NV40_STATE_ZSA +	} +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 0000000000..c4a5fb20d9 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + *  + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + *  + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + *  + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + *  + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *  + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, +		  struct pipe_surface *dest, unsigned destx, unsigned desty, +		  struct pipe_surface *src, unsigned srcx, unsigned srcy, +		  unsigned width, unsigned height) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + +	if (do_flip) { +		desty += height; +		while (height--) { +			eng2d->copy(eng2d, dest, destx, desty--, src, +				    srcx, srcy++, width, 1); +		} +		return; +	} + +	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, +		  unsigned destx, unsigned desty, unsigned width, +		  unsigned height, unsigned value) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + +	eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ +	nv40->pipe.surface_copy = nv40_surface_copy; +	nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c new file mode 100644 index 0000000000..f762f32f0c --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_transfer.c @@ -0,0 +1,201 @@ +#include <pipe/p_state.h> +#include <pipe/p_defines.h> +#include <pipe/p_inlines.h> +#include <util/u_memory.h> +#include <nouveau/nouveau_winsys.h> +#include "nv40_context.h" +#include "nv40_screen.h" +#include "nv40_state.h" + +struct nv40_transfer { +	struct pipe_transfer base; +	struct pipe_surface *surface; +	bool direct; +}; + +static unsigned nv40_usage_tx_to_buf(unsigned tx_usage) +{ +	switch (tx_usage) { +		case PIPE_TRANSFER_READ: +			return PIPE_BUFFER_USAGE_CPU_READ; +		case PIPE_TRANSFER_WRITE: +			return PIPE_BUFFER_USAGE_CPU_WRITE; +		case PIPE_TRANSFER_READ_WRITE: +			return PIPE_BUFFER_USAGE_CPU_READ_WRITE; +		default: +			assert(0); +	} + +	return -1; +} + +static void +nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level, +                             struct pipe_texture *template) +{ +	memset(template, 0, sizeof(struct pipe_texture)); +	template->target = pt->target; +	template->format = pt->format; +	template->width[0] = pt->width[level]; +	template->height[0] = pt->height[level]; +	template->depth[0] = 1; +	template->block = pt->block; +	template->nblocksx[0] = pt->nblocksx[level]; +	template->nblocksy[0] = pt->nblocksx[level]; +	template->last_level = 0; +	template->compressed = pt->compressed; +	template->nr_samples = pt->nr_samples; + +	template->tex_usage = PIPE_TEXTURE_USAGE_DYNAMIC | +	                      NOUVEAU_TEXTURE_USAGE_LINEAR; +} + +static struct pipe_transfer * +nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, +		  unsigned face, unsigned level, unsigned zslice, +		  enum pipe_transfer_usage usage, +		  unsigned x, unsigned y, unsigned w, unsigned h) +{ +	struct nv40_miptree *mt = (struct nv40_miptree *)pt; +	struct nv40_transfer *tx; +	struct pipe_texture tx_tex_template, *tx_tex; + +	tx = CALLOC_STRUCT(nv40_transfer); +	if (!tx) +		return NULL; + +	tx->base.refcount = 1; +	pipe_texture_reference(&tx->base.texture, pt); +	tx->base.format = pt->format; +	tx->base.x = x; +	tx->base.y = y; +	tx->base.width = w; +	tx->base.height = h; +	tx->base.block = pt->block; +	tx->base.nblocksx = pt->nblocksx[level]; +	tx->base.nblocksy = pt->nblocksy[level]; +	tx->base.stride = mt->level[level].pitch; +	tx->base.usage = usage; +	tx->base.face = face; +	tx->base.level = level; +	tx->base.zslice = zslice; + +	/* Direct access to texture */ +	if ((pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC || +	     debug_get_bool_option("NOUVEAU_NO_TRANSFER", TRUE/*XXX:FALSE*/)) && +	    pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) +	{ +		tx->direct = true; +		tx->surface = pscreen->get_tex_surface(pscreen, pt, +	                                               face, level, zslice, +	                                               nv40_usage_tx_to_buf(usage)); +		return &tx->base; +	} + +	tx->direct = false; + +	nv40_compatible_transfer_tex(pt, level, &tx_tex_template); + +	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template); +	if (!tx_tex) +	{ +		FREE(tx); +		return NULL; +	} + +	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex, +	                                       0, 0, 0, +	                                       nv40_usage_tx_to_buf(usage)); + +	pipe_texture_reference(&tx_tex, NULL); + +	if (!tx->surface) +	{ +		pipe_surface_reference(&tx->surface, NULL); +		FREE(tx); +		return NULL; +	} + +	if (usage != PIPE_TRANSFER_WRITE) { +		struct nv40_screen *nvscreen = nv40_screen(pscreen); +		struct pipe_surface *src; + +		src = pscreen->get_tex_surface(pscreen, pt, +	                                       face, level, zslice, +	                                       PIPE_BUFFER_USAGE_GPU_READ); + +		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ +		/* TODO: Check if SIFM can un-swizzle */ +		nvscreen->eng2d->copy(nvscreen->eng2d, +		                      tx->surface, 0, 0, +		                      src, 0, 0, +		                      src->width, src->height); + +		pipe_surface_reference(&src, NULL); +	} + +	return &tx->base; +} + +static void +nv40_transfer_del(struct pipe_screen *pscreen, struct pipe_transfer **pptx) +{ +	struct pipe_transfer *ptx = *pptx; +	struct nv40_transfer *tx = (struct nv40_transfer *)ptx; + +	if (!tx->direct && ptx->usage != PIPE_TRANSFER_READ) { +		struct nv40_screen *nvscreen = nv40_screen(pscreen); +		struct pipe_surface *dst; + +		dst = pscreen->get_tex_surface(pscreen, ptx->texture, +	                                       ptx->face, ptx->level, ptx->zslice, +	                                       PIPE_BUFFER_USAGE_GPU_WRITE); + +		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */ +		nvscreen->eng2d->copy(nvscreen->eng2d, +		                      dst, 0, 0, +		                      tx->surface, 0, 0, +		                      dst->width, dst->height); + +		pipe_surface_reference(&dst, NULL); +	} + +	*pptx = NULL; +	if (--ptx->refcount) +		return; + +	pipe_surface_reference(&tx->surface, NULL); +	pipe_texture_reference(&ptx->texture, NULL); +	FREE(ptx); +} + +static void * +nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +{ +	struct nv40_transfer *tx = (struct nv40_transfer *)ptx; +	struct nv04_surface *ns = (struct nv04_surface *)tx->surface; +	struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; +	void *map = pipe_buffer_map(pscreen, mt->buffer, +	                            nv40_usage_tx_to_buf(ptx->usage)); + +	return map + ns->base.offset + +	       ptx->y * ns->pitch + ptx->x * ptx->block.size; +} + +static void +nv40_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +{ +	struct nv40_transfer *tx = (struct nv40_transfer *)ptx; +	struct nv40_miptree *mt = (struct nv40_miptree *)tx->surface->texture; + +	pipe_buffer_unmap(pscreen, mt->buffer); +} + +void +nv40_screen_init_transfer_functions(struct pipe_screen *pscreen) +{ +	pscreen->get_tex_transfer = nv40_transfer_new; +	pscreen->tex_transfer_release = nv40_transfer_del; +	pscreen->transfer_map = nv40_transfer_map; +	pscreen->transfer_unmap = nv40_transfer_unmap; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 0000000000..8f1834628f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,555 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ +	switch (pipe) { +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +		*fmt = NV40TCL_VTXFMT_TYPE_FLOAT; +		break; +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R8G8B8A8_UNORM: +		*fmt = NV40TCL_VTXFMT_TYPE_UBYTE; +		break; +	case PIPE_FORMAT_R16_SSCALED: +	case PIPE_FORMAT_R16G16_SSCALED: +	case PIPE_FORMAT_R16G16B16_SSCALED: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*fmt = NV40TCL_VTXFMT_TYPE_USHORT; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	switch (pipe) { +	case PIPE_FORMAT_R8_UNORM: +	case PIPE_FORMAT_R32_FLOAT: +	case PIPE_FORMAT_R16_SSCALED: +		*ncomp = 1; +		break; +	case PIPE_FORMAT_R8G8_UNORM: +	case PIPE_FORMAT_R32G32_FLOAT: +	case PIPE_FORMAT_R16G16_SSCALED: +		*ncomp = 2; +		break; +	case PIPE_FORMAT_R8G8B8_UNORM: +	case PIPE_FORMAT_R32G32B32_FLOAT: +	case PIPE_FORMAT_R16G16B16_SSCALED: +		*ncomp = 3; +		break; +	case PIPE_FORMAT_R8G8B8A8_UNORM: +	case PIPE_FORMAT_R32G32B32A32_FLOAT: +	case PIPE_FORMAT_R16G16B16A16_SSCALED: +		*ncomp = 4; +		break; +	default: +		NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); +		return 1; +	} + +	return 0; +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, +		    unsigned ib_size) +{ +	struct pipe_screen *pscreen = &nv40->screen->pipe; +	unsigned type; + +	if (!ib) { +		nv40->idxbuf = NULL; +		nv40->idxbuf_format = 0xdeadbeef; +		return FALSE; +	} + +	if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) +		return FALSE; + +	switch (ib_size) { +	case 2: +		type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; +		break; +	case 4: +		type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; +		break; +	default: +		return FALSE; +	} + +	if (ib != nv40->idxbuf || +	    type != nv40->idxbuf_format) { +		nv40->dirty |= NV40_NEW_ARRAYS; +		nv40->idxbuf = ib; +		nv40->idxbuf_format = type; +	} + +	return TRUE; +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, +		       int attrib, struct pipe_vertex_element *ve, +		       struct pipe_vertex_buffer *vb) +{ +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_grobj *curie = nv40->screen->curie; +	unsigned type, ncomp; +	void *map; + +	if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) +		return FALSE; + +	map  = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); +	map += vb->buffer_offset + ve->src_offset; + +	switch (type) { +	case NV40TCL_VTXFMT_TYPE_FLOAT: +	{ +		float *v = map; + +		switch (ncomp) { +		case 4: +			so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			so_data  (so, fui(v[3])); +			break; +		case 3: +			so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			so_data  (so, fui(v[2])); +			break; +		case 2: +			so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); +			so_data  (so, fui(v[0])); +			so_data  (so, fui(v[1])); +			break; +		case 1: +			so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); +			so_data  (so, fui(v[0])); +			break; +		default: +			ws->buffer_unmap(ws, vb->buffer); +			return FALSE; +		} +	} +		break; +	default: +		ws->buffer_unmap(ws, vb->buffer); +		return FALSE; +	} + +	ws->buffer_unmap(ws, vb->buffer); + +	return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, +		 unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_channel *chan = nv40->nvws->channel; +	unsigned restart; + +	nv40_vbo_set_idxbuf(nv40, NULL, 0); +	if (FORCE_SWTNL || !nv40_state_validate(nv40)) { +		return nv40_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count); +	} + +	while (count) { +		unsigned vc, nr; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint8_t *elts = (uint8_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint16_t *elts = (uint16_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		if (vc & 1) { +			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); +			OUT_RING  (elts[0]); +			elts++; vc--; +		} + +		while (vc) { +			unsigned i; + +			push = MIN2(vc, 2047 * 2); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); +			for (i = 0; i < push; i+=2) +				OUT_RING((elts[i+1] << 16) | elts[i]); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nouveau_channel *chan = nv40->nvws->channel; + +	while (count) { +		uint32_t *elts = (uint32_t *)ib + start; +		unsigned vc, push, restart; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, +					mode, start, count, &restart); +		if (vc == 0) { +			FIRE_RING(NULL); +			continue; +		} +		count -= vc; + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		while (vc) { +			push = MIN2(vc, 2047); + +			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); +			OUT_RINGp    (elts, push); + +			vc -= push; +			elts += push; +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		start = restart; +	} +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, +			  struct pipe_buffer *ib, unsigned ib_size, +			  unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct pipe_winsys *ws = pipe->winsys; +	void *map; + +	map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); +	if (!ib) { +		NOUVEAU_ERR("failed mapping ib\n"); +		return FALSE; +	} + +	switch (ib_size) { +	case 1: +		nv40_draw_elements_u08(nv40, map, mode, start, count); +		break; +	case 2: +		nv40_draw_elements_u16(nv40, map, mode, start, count); +		break; +	case 4: +		nv40_draw_elements_u32(nv40, map, mode, start, count); +		break; +	default: +		NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); +		break; +	} + +	ws->buffer_unmap(ws, ib); +	return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, +		       unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	struct nouveau_channel *chan = nv40->nvws->channel; +	unsigned restart; + +	while (count) { +		unsigned nr, vc; + +		nv40_state_emit(nv40); + +		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, +					mode, start, count, &restart); +		if (!vc) { +			FIRE_RING(NULL); +			continue; +		} +		 +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (nvgl_primitive(mode)); + +		nr = (vc & 0xff); +		if (nr) { +			BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); +			OUT_RING  (((nr - 1) << 24) | start); +			start += nr; +		} + +		nr = vc >> 8; +		while (nr) { +			unsigned push = nr > 2047 ? 2047 : nr; + +			nr -= push; + +			BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); +			while (push--) { +				OUT_RING(((0x100 - 1) << 24) | start); +				start += 0x100; +			} +		} + +		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); +		OUT_RING  (0); + +		count -= vc; +		start = restart; +	} + +	return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, +		   struct pipe_buffer *indexBuffer, unsigned indexSize, +		   unsigned mode, unsigned start, unsigned count) +{ +	struct nv40_context *nv40 = nv40_context(pipe); +	boolean idxbuf; + +	idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); +	if (FORCE_SWTNL || !nv40_state_validate(nv40)) { +		return nv40_draw_elements_swtnl(pipe, NULL, 0, +						mode, start, count); +	} + +	if (idxbuf) { +		nv40_draw_elements_vbo(pipe, mode, start, count); +	} else { +		nv40_draw_elements_inline(pipe, indexBuffer, indexSize, +					  mode, start, count); +	} + +	pipe->flush(pipe, 0, NULL); +	return TRUE; +} + +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ +	struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct pipe_buffer *ib = nv40->idxbuf; +	unsigned ib_format = nv40->idxbuf_format; +	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; +	int hw; + +	if (nv40->edgeflags) { +		nv40->fallback_swtnl |= NV40_NEW_ARRAYS; +		return FALSE; +	} + +	vtxbuf = so_new(20, 18); +	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); +	vtxfmt = so_new(17, 0); +	so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); + +	for (hw = 0; hw < nv40->vtxelt_nr; hw++) { +		struct pipe_vertex_element *ve; +		struct pipe_vertex_buffer *vb; +		unsigned type, ncomp; + +		ve = &nv40->vtxelt[hw]; +		vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + +		if (!vb->stride) { +			if (!sattr) +				sattr = so_new(16 * 5, 0); + +			if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { +				so_data(vtxbuf, 0); +				so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); +				continue; +			} +		} + +		if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { +			nv40->fallback_swtnl |= NV40_NEW_ARRAYS; +			so_ref(NULL, &vtxbuf); +			so_ref(NULL, &vtxfmt); +			return FALSE; +		} + +		so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, +			 vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, +			 0, NV40TCL_VTXBUF_ADDRESS_DMA1); +		so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | +				  (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); +	} + +	if (ib) { +		so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); +		so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); +		so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, +			  0, NV40TCL_IDXBUF_FORMAT_DMA1); +	} + +	so_method(vtxbuf, curie, 0x1710, 1); +	so_data  (vtxbuf, 0); + +	so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); +	so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); +	so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); +	nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); +	return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { +	.validate = nv40_vbo_validate, +	.dirty = { +		.pipe = NV40_NEW_ARRAYS, +		.hw = 0, +	} +}; + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 0000000000..0862386638 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,1070 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + *  1. Indexed consts  + ARL + *  3. NV_vp11, NV_vp2, NV_vp3 features + *       - extra arith opcodes + *       - branching + *       - texture sampling + *       - indexed attribs + *       - indexed results + *  4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nv40_vpc { +	struct nv40_vertex_program *vp; + +	struct nv40_vertex_program_exec *vpi; + +	unsigned r_temps; +	unsigned r_temps_discard; +	struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; +	struct nv40_sreg *r_address; +	struct nv40_sreg *r_temp; + +	struct nv40_sreg *imm; +	unsigned nr_imm; + +	unsigned hpos_idx; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ +	int idx = ffs(~vpc->r_temps) - 1; + +	if (idx < 0) { +		NOUVEAU_ERR("out of temps!!\n"); +		assert(0); +		return nv40_sr(NV40SR_TEMP, 0); +	} + +	vpc->r_temps |= (1 << idx); +	vpc->r_temps_discard |= (1 << idx); +	return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ +	vpc->r_temps &= ~vpc->r_temps_discard; +	vpc->r_temps_discard = 0; +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	struct nv40_vertex_program_data *vpd; +	int idx; + +	if (pipe >= 0) { +		for (idx = 0; idx < vp->nr_consts; idx++) { +			if (vp->consts[idx].index == pipe) +				return nv40_sr(NV40SR_CONST, idx); +		} +	} + +	idx = vp->nr_consts++; +	vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); +	vpd = &vp->consts[idx]; + +	vpd->index = pipe; +	vpd->value[0] = x; +	vpd->value[1] = y; +	vpd->value[2] = z; +	vpd->value[3] = w; +	return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ +	nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	uint32_t sr = 0; + +	switch (src.type) { +	case NV40SR_TEMP: +		sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); +		sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); +		break; +	case NV40SR_INPUT: +		sr |= (NV40_VP_SRC_REG_TYPE_INPUT << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		vp->ir |= (1 << src.index); +		hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); +		break; +	case NV40SR_CONST: +		sr |= (NV40_VP_SRC_REG_TYPE_CONST << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		assert(vpc->vpi->const_index == -1 || +		       vpc->vpi->const_index == src.index); +		vpc->vpi->const_index = src.index; +		break; +	case NV40SR_NONE: +		sr |= (NV40_VP_SRC_REG_TYPE_INPUT << +		       NV40_VP_SRC_REG_TYPE_SHIFT); +		break; +	default: +		assert(0); +	} + +	if (src.negate) +		sr |= NV40_VP_SRC_NEGATE; + +	if (src.abs) +		hw[0] |= (1 << (21 + pos)); + +	sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | +	       (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | +	       (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | +	       (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + +	switch (pos) { +	case 0: +		hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> +			  NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; +		hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << +			  NV40_VP_INST_SRC0L_SHIFT; +		break; +	case 1: +		hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; +		break; +	case 2: +		hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> +			  NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; +		hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << +			  NV40_VP_INST_SRC2L_SHIFT; +		break; +	default: +		assert(0); +	} +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ +	struct nv40_vertex_program *vp = vpc->vp; + +	switch (dst.type) { +	case NV40SR_TEMP: +		hw[3] |= NV40_VP_INST_DEST_MASK; +		if (slot == 0) { +			hw[0] |= (dst.index << +				  NV40_VP_INST_VEC_DEST_TEMP_SHIFT); +		} else { +			hw[3] |= (dst.index <<  +				  NV40_VP_INST_SCA_DEST_TEMP_SHIFT); +		} +		break; +	case NV40SR_OUTPUT: +		switch (dst.index) { +		case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; +		case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; +		case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; +		case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; +		case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; +		case NV40_VP_INST_DEST_PSZ  : vp->or |= (1 << 5); break; +		case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; +		case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; +		case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; +		case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; +		case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; +		case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; +		case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; +		case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; +		case NV40_VP_INST_DEST_CLIP(0): +			vp->or |= (1 << 6); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(1): +			vp->or |= (1 << 7); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(2): +			vp->or |= (1 << 8); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; +			dst.index = NV40_VP_INST_DEST_FOGC; +			break; +		case NV40_VP_INST_DEST_CLIP(3): +			vp->or |= (1 << 9); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		case NV40_VP_INST_DEST_CLIP(4): +			vp->or |= (1 << 10); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		case NV40_VP_INST_DEST_CLIP(5): +			vp->or |= (1 << 11); +			vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; +			dst.index = NV40_VP_INST_DEST_PSZ; +			break; +		default: +			break; +		} + +		hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); +		if (slot == 0) { +			hw[0] |= NV40_VP_INST_VEC_RESULT; +			hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); +		} else { +			hw[3] |= NV40_VP_INST_SCA_RESULT; +			hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; +		} +		break; +	default: +		assert(0); +	} +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, +	      struct nv40_sreg dst, int mask, +	      struct nv40_sreg s0, struct nv40_sreg s1, +	      struct nv40_sreg s2) +{ +	struct nv40_vertex_program *vp = vpc->vp; +	uint32_t *hw; + +	vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); +	vpc->vpi = &vp->insns[vp->nr_insns - 1]; +	memset(vpc->vpi, 0, sizeof(*vpc->vpi)); +	vpc->vpi->const_index = -1; + +	hw = vpc->vpi->data; + +	hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); +	hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | +		  (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | +		  (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | +		  (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + +	if (slot == 0) { +		hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); +		hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; +		hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); +	} else { +		hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); +		hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); +		hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); +	} + +	emit_dst(vpc, hw, slot, dst); +	emit_src(vpc, hw, 0, s0); +	emit_src(vpc, hw, 1, s1); +	emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { +	struct nv40_sreg src; + +	switch (fsrc->SrcRegister.File) { +	case TGSI_FILE_INPUT: +		src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); +		break; +	case TGSI_FILE_CONSTANT: +		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); +		break; +	case TGSI_FILE_IMMEDIATE: +		src = vpc->imm[fsrc->SrcRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		src = vpc->r_temp[fsrc->SrcRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad src file\n"); +		break; +	} + +	src.abs = fsrc->SrcRegisterExtMod.Absolute; +	src.negate = fsrc->SrcRegister.Negate; +	src.swz[0] = fsrc->SrcRegister.SwizzleX; +	src.swz[1] = fsrc->SrcRegister.SwizzleY; +	src.swz[2] = fsrc->SrcRegister.SwizzleZ; +	src.swz[3] = fsrc->SrcRegister.SwizzleW; +	return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { +	struct nv40_sreg dst; + +	switch (fdst->DstRegister.File) { +	case TGSI_FILE_OUTPUT: +		dst = vpc->r_result[fdst->DstRegister.Index]; +		break; +	case TGSI_FILE_TEMPORARY: +		dst = vpc->r_temp[fdst->DstRegister.Index]; +		break; +	case TGSI_FILE_ADDRESS: +		dst = vpc->r_address[fdst->DstRegister.Index]; +		break; +	default: +		NOUVEAU_ERR("bad dst file\n"); +		break; +	} + +	return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ +	int mask = 0; + +	if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; +	if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; +	if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; +	if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; +	return mask; +} + +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, +	       struct nv40_sreg *src) +{ +	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); +	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; +	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, +			fsrc->SrcRegisterExtSwz.NegateY, +			fsrc->SrcRegisterExtSwz.NegateZ, +			fsrc->SrcRegisterExtSwz.NegateW }; +	uint c; + +	for (c = 0; c < 4; c++) { +		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { +		case TGSI_EXTSWIZZLE_X: +		case TGSI_EXTSWIZZLE_Y: +		case TGSI_EXTSWIZZLE_Z: +		case TGSI_EXTSWIZZLE_W: +			mask |= tgsi_mask(1 << c); +			break; +		case TGSI_EXTSWIZZLE_ZERO: +			zero_mask |= tgsi_mask(1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		case TGSI_EXTSWIZZLE_ONE: +			one_mask |= tgsi_mask(1 << c); +			tgsi.swz[c] = SWZ_X; +			break; +		default: +			assert(0); +		} + +		if (!tgsi.negate && neg[c]) +			neg_mask |= tgsi_mask(1 << c); +	} + +	if (mask == MASK_ALL && !neg_mask) +		return TRUE; + +	*src = temp(vpc); + +	if (mask) +		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + +	if (zero_mask) +		arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + +	if (one_mask) +		arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + +	if (neg_mask) { +		struct nv40_sreg one = temp(vpc); +		arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); +		arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); +	} + +	return FALSE; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, +				const struct tgsi_full_instruction *finst) +{ +	struct nv40_sreg src[3], dst, tmp; +	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	int mask; +	int ai = -1, ci = -1, ii = -1; +	int i; + +	if (finst->Instruction.Opcode == TGSI_OPCODE_END) +		return TRUE; + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; +		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { +			src[i] = tgsi_src(vpc, fsrc); +		} +	} + +	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { +		const struct tgsi_full_src_register *fsrc; + +		fsrc = &finst->FullSrcRegisters[i]; + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +		case TGSI_FILE_CONSTANT: +		case TGSI_FILE_TEMPORARY: +			if (!src_native_swz(vpc, fsrc, &src[i])) +				continue; +			break; +		default: +			break; +		} + +		switch (fsrc->SrcRegister.File) { +		case TGSI_FILE_INPUT: +			if (ai == -1 || ai == fsrc->SrcRegister.Index) { +				ai = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_CONSTANT: +			if ((ci == -1 && ii == -1) || +			    ci == fsrc->SrcRegister.Index) { +				ci = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_IMMEDIATE: +			if ((ci == -1 && ii == -1) || +			    ii == fsrc->SrcRegister.Index) { +				ii = fsrc->SrcRegister.Index; +				src[i] = tgsi_src(vpc, fsrc); +			} else { +				src[i] = temp(vpc); +				arith(vpc, 0, OP_MOV, src[i], MASK_ALL, +				      tgsi_src(vpc, fsrc), none, none); +			} +			break; +		case TGSI_FILE_TEMPORARY: +			/* handled above */ +			break; +		default: +			NOUVEAU_ERR("bad src file\n"); +			return FALSE; +		} +	} + +	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]); +	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + +	switch (finst->Instruction.Opcode) { +	case TGSI_OPCODE_ABS: +		arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); +		break; +	case TGSI_OPCODE_ADD: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); +		break; +	case TGSI_OPCODE_ARL: +		arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_DP3: +		arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DP4: +		arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DPH: +		arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_DST: +		arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_EX2: +		arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_EXP: +		arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_FLR: +		arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_FRC: +		arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_LG2: +		arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LIT: +		arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_LOG: +		arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_MAD: +		arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); +		break; +	case TGSI_OPCODE_MAX: +		arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MIN: +		arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_MOV: +		arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); +		break; +	case TGSI_OPCODE_MUL: +		arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_POW: +		tmp = temp(vpc); +		arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, +		      swz(src[0], X, X, X, X)); +		arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), +		      swz(src[1], X, X, X, X), none); +		arith(vpc, 1, OP_EX2, dst, mask, none, none, +		      swz(tmp, X, X, X, X)); +		break; +	case TGSI_OPCODE_RCP: +		arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); +		break; +	case TGSI_OPCODE_RET: +		break; +	case TGSI_OPCODE_RSQ: +		arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); +		break; +	case TGSI_OPCODE_SGE: +		arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SLT: +		arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); +		break; +	case TGSI_OPCODE_SUB: +		arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); +		break; +	case TGSI_OPCODE_XPD: +		tmp = temp(vpc); +		arith(vpc, 0, OP_MUL, tmp, mask, +		      swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); +		arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), +		      swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), +		      neg(tmp)); +		break; +	default: +		NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); +		return FALSE; +	} + +	release_temps(vpc); +	return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, +				const struct tgsi_full_declaration *fdec) +{ +	unsigned idx = fdec->DeclarationRange.First; +	int hw; + +	switch (fdec->Semantic.SemanticName) { +	case TGSI_SEMANTIC_POSITION: +		hw = NV40_VP_INST_DEST_POS; +		vpc->hpos_idx = idx; +		break; +	case TGSI_SEMANTIC_COLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_VP_INST_DEST_COL0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_VP_INST_DEST_COL1; +		} else { +			NOUVEAU_ERR("bad colour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_BCOLOR: +		if (fdec->Semantic.SemanticIndex == 0) { +			hw = NV40_VP_INST_DEST_BFC0; +		} else +		if (fdec->Semantic.SemanticIndex == 1) { +			hw = NV40_VP_INST_DEST_BFC1; +		} else { +			NOUVEAU_ERR("bad bcolour semantic index\n"); +			return FALSE; +		} +		break; +	case TGSI_SEMANTIC_FOG: +		hw = NV40_VP_INST_DEST_FOGC; +		break; +	case TGSI_SEMANTIC_PSIZE: +		hw = NV40_VP_INST_DEST_PSZ; +		break; +	case TGSI_SEMANTIC_GENERIC: +		if (fdec->Semantic.SemanticIndex <= 7) { +			hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); +		} else { +			NOUVEAU_ERR("bad generic semantic index\n"); +			return FALSE; +		} +		break; +	default: +		NOUVEAU_ERR("bad output semantic\n"); +		return FALSE; +	} + +	vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); +	return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ +	struct tgsi_parse_context p; +	int high_temp = -1, high_addr = -1, nr_imm = 0, i; + +	tgsi_parse_init(&p, vpc->vp->pipe.tokens); +	while (!tgsi_parse_end_of_tokens(&p)) { +		const union tgsi_full_token *tok = &p.FullToken; + +		tgsi_parse_token(&p); +		switch(tok->Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +			nr_imm++; +			break; +		case TGSI_TOKEN_TYPE_DECLARATION: +		{ +			const struct tgsi_full_declaration *fdec; + +			fdec = &p.FullToken.FullDeclaration; +			switch (fdec->Declaration.File) { +			case TGSI_FILE_TEMPORARY: +				if (fdec->DeclarationRange.Last > high_temp) { +					high_temp = +						fdec->DeclarationRange.Last; +				} +				break; +#if 0 /* this would be nice.. except gallium doesn't track it */ +			case TGSI_FILE_ADDRESS: +				if (fdec->DeclarationRange.Last > high_addr) { +					high_addr = +						fdec->DeclarationRange.Last; +				} +				break; +#endif +			case TGSI_FILE_OUTPUT: +				if (!nv40_vertprog_parse_decl_output(vpc, fdec)) +					return FALSE; +				break; +			default: +				break; +			} +		} +			break; +#if 1 /* yay, parse instructions looking for address regs instead */ +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			const struct tgsi_full_dst_register *fdst; + +			finst = &p.FullToken.FullInstruction; +			fdst = &finst->FullDstRegisters[0]; + +			if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { +				if (fdst->DstRegister.Index > high_addr) +					high_addr = fdst->DstRegister.Index; +			} +		 +		} +			break; +#endif +		default: +			break; +		} +	} +	tgsi_parse_free(&p); + +	if (nr_imm) { +		vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); +		assert(vpc->imm); +	} + +	if (++high_temp) { +		vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_temp; i++) +			vpc->r_temp[i] = temp(vpc); +	} + +	if (++high_addr) { +		vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); +		for (i = 0; i < high_addr; i++) +			vpc->r_address[i] = temp(vpc); +	} + +	vpc->r_temps_discard = 0; +	return TRUE; +} + +static void +nv40_vertprog_translate(struct nv40_context *nv40, +			struct nv40_vertex_program *vp) +{ +	struct tgsi_parse_context parse; +	struct nv40_vpc *vpc = NULL; +	struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); +	int i; + +	vpc = CALLOC(1, sizeof(struct nv40_vpc)); +	if (!vpc) +		return; +	vpc->vp = vp; + +	if (!nv40_vertprog_prepare(vpc)) { +		FREE(vpc); +		return; +	} + +	/* Redirect post-transform vertex position to a temp if user clip +	 * planes are enabled.  We need to append code the the vtxprog +	 * to handle clip planes later. +	 */ +	if (vp->ucp.nr)  { +		vpc->r_result[vpc->hpos_idx] = temp(vpc); +		vpc->r_temps_discard = 0; +	} + +	tgsi_parse_init(&parse, vp->pipe.tokens); + +	while (!tgsi_parse_end_of_tokens(&parse)) { +		tgsi_parse_token(&parse); + +		switch (parse.FullToken.Token.Type) { +		case TGSI_TOKEN_TYPE_IMMEDIATE: +		{ +			const struct tgsi_full_immediate *imm; + +			imm = &parse.FullToken.FullImmediate; +			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); +			assert(imm->Immediate.NrTokens == 4 + 1); +			vpc->imm[vpc->nr_imm++] = +				constant(vpc, -1, +					 imm->u.ImmediateFloat32[0].Float, +					 imm->u.ImmediateFloat32[1].Float, +					 imm->u.ImmediateFloat32[2].Float, +					 imm->u.ImmediateFloat32[3].Float); +		} +			break; +		case TGSI_TOKEN_TYPE_INSTRUCTION: +		{ +			const struct tgsi_full_instruction *finst; +			finst = &parse.FullToken.FullInstruction; +			if (!nv40_vertprog_parse_instruction(vpc, finst)) +				goto out_err; +		} +			break; +		default: +			break; +		} +	} + +	/* Write out HPOS if it was redirected to a temp earlier */ +	if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { +		struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, +						NV40_VP_INST_DEST_POS); +		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + +		arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); +	} + +	/* Insert code to handle user clip planes */ +	for (i = 0; i < vp->ucp.nr; i++) { +		struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, +						NV40_VP_INST_DEST_CLIP(i)); +		struct nv40_sreg ceqn = constant(vpc, -1, +						 nv40->clip.ucp[i][0], +						 nv40->clip.ucp[i][1], +						 nv40->clip.ucp[i][2], +						 nv40->clip.ucp[i][3]); +		struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; +		unsigned mask; + +		switch (i) { +		case 0: case 3: mask = MASK_Y; break; +		case 1: case 4: mask = MASK_Z; break; +		case 2: case 5: mask = MASK_W; break; +		default: +			NOUVEAU_ERR("invalid clip dist #%d\n", i); +			goto out_err; +		} + +		arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); +	} + +	vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; +	vp->translated = TRUE; +out_err: +	tgsi_parse_free(&parse); +	if (vpc->r_temp) +		FREE(vpc->r_temp);  +	if (vpc->r_address) +		FREE(vpc->r_address);  +	if (vpc->imm)	 +		FREE(vpc->imm);  +	FREE(vpc); +} + +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) +{  +	struct nouveau_winsys *nvws = nv40->nvws; +	struct pipe_winsys *ws = nv40->pipe.winsys; +	struct nouveau_grobj *curie = nv40->screen->curie; +	struct nv40_vertex_program *vp; +	struct pipe_buffer *constbuf; +	boolean upload_code = FALSE, upload_data = FALSE; +	int i; + +	if (nv40->render_mode == HW) { +		vp = nv40->vertprog; +		constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + +		if ((nv40->dirty & NV40_NEW_UCP) || +		    memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { +			nv40_vertprog_destroy(nv40, vp); +			memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); +		} +	} else { +		vp = nv40->swtnl.vertprog; +		constbuf = NULL; +	} + +	/* Translate TGSI shader into hw bytecode */ +	if (vp->translated) +		goto check_gpu_resources; + +	nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; +	nv40_vertprog_translate(nv40, vp); +	if (!vp->translated) { +		nv40->fallback_swtnl |= NV40_NEW_VERTPROG; +		return FALSE; +	} + +check_gpu_resources: +	/* Allocate hw vtxprog exec slots */ +	if (!vp->exec) { +		struct nouveau_resource *heap = nv40->screen->vp_exec_heap; +		struct nouveau_stateobj *so; +		uint vplen = vp->nr_insns; + +		if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { +			while (heap->next && heap->size < vplen) { +				struct nv40_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->exec); +			} + +			if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) +				assert(0); +		} + +		so = so_new(7, 0); +		so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); +		so_data  (so, vp->exec->start); +		so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); +		so_data  (so, vp->ir); +		so_data  (so, vp->or); +		so_method(so, curie,  NV40TCL_CLIP_PLANE_ENABLE, 1); +		so_data  (so, vp->clip_ctrl); +		so_ref(so, &vp->so); + +		upload_code = TRUE; +	} + +	/* Allocate hw vtxprog const slots */ +	if (vp->nr_consts && !vp->data) { +		struct nouveau_resource *heap = nv40->screen->vp_data_heap; + +		if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { +			while (heap->next && heap->size < vp->nr_consts) { +				struct nv40_vertex_program *evict; +				 +				evict = heap->next->priv; +				nvws->res_free(&evict->data); +			} + +			if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) +				assert(0); +		} + +		/*XXX: handle this some day */ +		assert(vp->data->start >= vp->data_start_min); + +		upload_data = TRUE; +		if (vp->data_start != vp->data->start) +			upload_code = TRUE; +	} + +	/* If exec or data segments moved we need to patch the program to +	 * fixup offsets and register IDs. +	 */ +	if (vp->exec_start != vp->exec->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->has_branch_offset) { +				assert(0); +			} +		} + +		vp->exec_start = vp->exec->start; +	} + +	if (vp->nr_consts && vp->data_start != vp->data->start) { +		for (i = 0; i < vp->nr_insns; i++) { +			struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + +			if (vpi->const_index >= 0) { +				vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; +				vpi->data[1] |= +					(vpi->const_index + vp->data->start) << +					NV40_VP_INST_CONST_SRC_SHIFT; + +			} +		} + +		vp->data_start = vp->data->start; +	} + +	/* Update + Upload constant values */ +	if (vp->nr_consts) { +		float *map = NULL; + +		if (constbuf) { +			map = ws->buffer_map(ws, constbuf, +					     PIPE_BUFFER_USAGE_CPU_READ); +		} + +		for (i = 0; i < vp->nr_consts; i++) { +			struct nv40_vertex_program_data *vpd = &vp->consts[i]; + +			if (vpd->index >= 0) { +				if (!upload_data && +				    !memcmp(vpd->value, &map[vpd->index * 4], +					    4 * sizeof(float))) +					continue; +				memcpy(vpd->value, &map[vpd->index * 4], +				       4 * sizeof(float)); +			} + +			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); +			OUT_RING  (i + vp->data->start); +			OUT_RINGp ((uint32_t *)vpd->value, 4); +		} + +		if (constbuf) +			ws->buffer_unmap(ws, constbuf); +	} + +	/* Upload vtxprog */ +	if (upload_code) { +#if 0 +		for (i = 0; i < vp->nr_insns; i++) { +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); +			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); +		} +#endif +		BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); +		OUT_RING  (vp->exec->start); +		for (i = 0; i < vp->nr_insns; i++) { +			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); +			OUT_RINGp (vp->insns[i].data, 4); +		} +	} + +	if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { +		so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); +		return TRUE; +	} + +	return FALSE; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ +	struct nouveau_winsys *nvws = nv40->screen->nvws; + +	vp->translated = FALSE; + +	if (vp->nr_insns) { +		FREE(vp->insns); +		vp->insns = NULL; +		vp->nr_insns = 0; +	} + +	if (vp->nr_consts) { +		FREE(vp->consts); +		vp->consts = NULL; +		vp->nr_consts = 0; +	} + +	nvws->res_free(&vp->exec); +	vp->exec_start = 0; +	nvws->res_free(&vp->data); +	vp->data_start = 0; +	vp->data_start_min = 0; + +	vp->ir = vp->or = vp->clip_ctrl = 0; +	so_ref(NULL, &vp->so); +} + +struct nv40_state_entry nv40_state_vertprog = { +	.validate = nv40_vertprog_validate, +	.dirty = { +		.pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, +		.hw = NV40_STATE_VERTPROG, +	} +}; +  | 
