diff options
| author | Luca Barbieri <luca@luca-barbieri.com> | 2010-09-05 05:42:59 +0200 | 
|---|---|---|
| committer | Luca Barbieri <luca@luca-barbieri.com> | 2010-09-05 17:52:25 +0200 | 
| commit | 8e2badfc269082f4b52a82ac1c5b4350bef0d01b (patch) | |
| tree | 29483004c748edea67ba8fd9041251c780eb882b | |
| parent | 43cfc1ed8ef489b1d6077fcabbce1b91830b5e55 (diff) | |
nvfx: add rewritten swtnl support
The old swtnl code was broken by the new shader linkage support for
GLSL.
This is a rewrite of swtnl support, which should instead work properly,
be faster and more closer to the much more tested hardware pipeline.
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.h | 12 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_draw.c | 236 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragprog.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_screen.c | 2 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state.h | 19 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state_emit.c | 31 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vbo.c | 38 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_vertprog.c | 159 | 
8 files changed, 237 insertions, 262 deletions
| diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index b837437c58..369c216388 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -151,14 +151,6 @@ struct nvfx_context {  	/* HW state derived from pipe states */  	struct nvfx_state state; -	struct { -		struct nvfx_vertex_program *vertprog; - -		unsigned nr_attribs; -		unsigned hw[PIPE_MAX_SHADER_INPUTS]; -		unsigned draw[PIPE_MAX_SHADER_INPUTS]; -		unsigned emit[PIPE_MAX_SHADER_INPUTS]; -	} swtnl;  	enum {  		HW, SWTNL, SWRAST @@ -170,7 +162,7 @@ struct nvfx_context {  	struct pipe_scissor_state scissor;  	unsigned stipple[32];  	struct pipe_clip_state clip; -	struct nvfx_vertex_program *vertprog; +	struct nvfx_pipe_vertex_program *vertprog;  	struct nvfx_pipe_fragment_program *fragprog;  	struct pipe_resource *constbuf[PIPE_SHADER_TYPES];  	unsigned constbuf_nr[PIPE_SHADER_TYPES]; @@ -208,6 +200,7 @@ struct nvfx_context {  	int hw_pointsprite_control;  	int hw_vp_output;  	struct nvfx_fragment_program* hw_fragprog; +	struct nvfx_vertex_program* hw_vertprog;  	unsigned relocs_needed;  }; @@ -326,6 +319,7 @@ extern void nvfx_init_transfer_functions(struct pipe_context *pipe);  /* nvfx_vbo.c */  extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); +extern void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx);  extern void nvfx_vbo_relocate(struct nvfx_context *nvfx);  extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx);  extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 2601d5b8e2..4bf38a9c18 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -1,6 +1,5 @@  #include "pipe/p_shader_tokens.h"  #include "util/u_inlines.h" -#include "tgsi/tgsi_ureg.h"  #include "util/u_pack_color.h" @@ -11,11 +10,6 @@  #include "nvfx_context.h"  #include "nvfx_resource.h" -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all.  Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ -  struct nvfx_render_stage {  	struct draw_stage stage;  	struct nvfx_context *nvfx; @@ -28,58 +22,18 @@ nvfx_render_stage(struct draw_stage *stage)  	return (struct nvfx_render_stage *)stage;  } -static INLINE void -nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +static void +nvfx_render_flush(struct draw_stage *stage, unsigned flags)  { -	struct nvfx_screen *screen = nvfx->screen; -	struct nouveau_channel *chan = screen->base.channel; -	struct nouveau_grobj *eng3d = screen->eng3d; -	unsigned i; - -	for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { -		unsigned idx = nvfx->swtnl.draw[i]; -		unsigned hw = nvfx->swtnl.hw[i]; +	struct nvfx_render_stage *rs = nvfx_render_stage(stage); +	struct nvfx_context *nvfx = rs->nvfx; +	struct nouveau_channel *chan = nvfx->screen->base.channel; -		WAIT_RING(chan, 5); -		switch (nvfx->swtnl.emit[i]) { -		case EMIT_OMIT: -			break; -		case EMIT_1F: -			nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1); -			break; -		case EMIT_2F: -			nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2); -			break; -		case EMIT_3F: -			nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3); -			break; -		case EMIT_4F: -			nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4); -			break; -		case 0xff: -			BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); -			OUT_RING  (chan, fui(v->data[idx][0] / v->data[idx][3])); -			OUT_RING  (chan, fui(v->data[idx][1] / v->data[idx][3])); -			OUT_RING  (chan, fui(v->data[idx][2] / v->data[idx][3])); -			OUT_RING  (chan, fui(1.0f / v->data[idx][3])); -			break; -		case EMIT_4UB: -			BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); -			OUT_RING  (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), -					    float_to_ubyte(v->data[idx][1]), -					    float_to_ubyte(v->data[idx][2]), -					    float_to_ubyte(v->data[idx][3]))); -		case EMIT_4UB_BGRA: -			BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); -			OUT_RING  (chan, pack_ub4(float_to_ubyte(v->data[idx][2]), -					    float_to_ubyte(v->data[idx][1]), -					    float_to_ubyte(v->data[idx][0]), -					    float_to_ubyte(v->data[idx][3]))); -			break; -		default: -			assert(0); -			break; -		} +	if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { +		assert(AVAIL_RING(chan) >= 2); +		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); +		OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP); +		rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP;  	}  } @@ -92,42 +46,61 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,  	struct nvfx_screen *screen = nvfx->screen;  	struct nouveau_channel *chan = screen->base.channel; -	struct nouveau_grobj *eng3d = screen->eng3d; -	unsigned i; +	boolean no_elements = nvfx->vertprog->draw_no_elements; +	unsigned num_attribs = nvfx->vertprog->draw_elements; -	/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ -	if (AVAIL_RING(chan) < ((count * 20) + 6)) { -		if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { -			NOUVEAU_ERR("AIII, missed flush\n"); -			assert(0); -		} +	/* we need to account the flush as well here even if it is done afterthis +	 * function +	 */ +	if (AVAIL_RING(chan) < ((1 + count * num_attribs * 4) + 6 + 64)) { +		nvfx_render_flush(stage, 0);  		FIRE_RING(chan);  		nvfx_state_emit(nvfx); + +		assert(AVAIL_RING(chan) >= ((1 + count * num_attribs * 4) + 6 + 64));  	}  	/* Switch primitive modes if necessary */  	if (rs->prim != mode) {  		if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { -			BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); -			OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP); +			OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); +			OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP); +		} + +		/* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */ +		/* this seems to cause issues on nv3x, and also be unneeded there */ +		if(nvfx->is_nv4x) +		{ +			int i; +			for(i = 0; i < 32; ++i) +			{ +				OUT_RING(chan, RING_3D(0x1dac, 1)); +				OUT_RING(chan, 0); +			}  		} -		BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); +		OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1));  		OUT_RING  (chan, mode);  		rs->prim = mode;  	} -	/* Emit vertex data */ -	for (i = 0; i < count; i++) -		nvfx_render_vertex(nvfx, prim->v[i]); - -	/* If it's likely we'll need to empty the push buffer soon, finish -	 * off the primitive now. -	 */ -	if (AVAIL_RING(chan) < ((count * 20) + 6)) { -		BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); -		OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP); -		rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; +	OUT_RING(chan, RING_3D_NI(NV34TCL_VERTEX_DATA, num_attribs * 4 * count)); +	if(no_elements) { +		OUT_RING(chan, 0); +		OUT_RING(chan, 0); +		OUT_RING(chan, 0); +		OUT_RING(chan, 0); +	} else { +		for (unsigned i = 0; i < count; ++i) +		{ +			struct vertex_header* v = prim->v[i]; +			/* TODO: disable divide where it's causing the problem, and remove this hack */ +			OUT_RING(chan, fui(v->data[0][0] / v->data[0][3])); +			OUT_RING(chan, fui(v->data[0][1] / v->data[0][3])); +			OUT_RING(chan, fui(v->data[0][2] / v->data[0][3])); +			OUT_RING(chan, fui(1.0f / v->data[0][3])); +			OUT_RINGp(chan, &v->data[1][0], 4 * (num_attribs - 1)); +		}  	}  } @@ -150,24 +123,10 @@ nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim)  }  static void -nvfx_render_flush(struct draw_stage *draw, unsigned flags) -{ -	struct nvfx_render_stage *rs = nvfx_render_stage(draw); -	struct nvfx_context *nvfx = rs->nvfx; -	struct nvfx_screen *screen = nvfx->screen; -	struct nouveau_channel *chan = screen->base.channel; -	struct nouveau_grobj *eng3d = screen->eng3d; - -	if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { -		BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); -		OUT_RING  (chan, NV34TCL_VERTEX_BEGIN_END_STOP); -		rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; -	} -} - -static void  nvfx_render_reset_stipple_counter(struct draw_stage *draw)  { +	/* this doesn't really seem to work, but it matters rather little */ +	nvfx_render_flush(draw, 0);  }  static void @@ -176,40 +135,11 @@ nvfx_render_destroy(struct draw_stage *draw)  	FREE(draw);  } -static struct nvfx_vertex_program * -nvfx_create_drawvp(struct nvfx_context *nvfx) -{ -	struct ureg_program *ureg; -	uint i; - -	ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); -	if (ureg == NULL) -		return NULL; - -	ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); -	ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); -	ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); -	ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); -	ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); -	ureg_MOV(ureg, -		   ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), -		   ureg_DECL_vs_input(ureg, 5)); -	for (i = 0; i < 8; ++i) -		ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); - -	ureg_END( ureg ); - -	return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); -} -  struct draw_stage *  nvfx_draw_render_stage(struct nvfx_context *nvfx)  {  	struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); -	if (!nvfx->swtnl.vertprog) -		nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); -  	render->nvfx = nvfx;  	render->stage.draw = nvfx->draw;  	render->stage.point = nvfx_render_point; @@ -231,6 +161,7 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info  	if (!nvfx_state_validate_swtnl(nvfx))  		return; +  	nvfx_state_emit(nvfx);  	/* these must be passed without adding the offsets */ @@ -256,62 +187,3 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info  	draw_flush(nvfx->draw);  } - -static INLINE void -emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, -	    unsigned semantic, unsigned index) -{ -	unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); -	unsigned a = nvfx->swtnl.nr_attribs++; - -	nvfx->swtnl.hw[a] = hw; -	nvfx->swtnl.emit[a] = emit; -	nvfx->swtnl.draw[a] = draw_out; -} - -void -nvfx_vtxfmt_validate(struct nvfx_context *nvfx) -{ -	struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; -	unsigned colour = 0, texcoords = 0, fog = 0, i; - -	/* Determine needed fragprog inputs */ -	for (i = 0; i < pfp->info.num_inputs; i++) { -		switch (pfp->info.input_semantic_name[i]) { -		case TGSI_SEMANTIC_POSITION: -			break; -		case TGSI_SEMANTIC_COLOR: -			colour |= (1 << pfp->info.input_semantic_index[i]); -			break; -		case TGSI_SEMANTIC_GENERIC: -			texcoords |= (1 << pfp->info.input_semantic_index[i]); -			break; -		case TGSI_SEMANTIC_FOG: -			fog = 1; -			break; -		default: -			assert(0); -		} -	} - -	nvfx->swtnl.nr_attribs = 0; - -	/* Map draw vtxprog output to hw attribute IDs */ -	for (i = 0; i < 2; i++) { -		if (!(colour & (1 << i))) -			continue; -		emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); -	} - -	for (i = 0; i < 8; i++) { -		if (!(texcoords & (1 << i))) -			continue; -		emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); -	} - -	if (fog) { -		emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); -	} - -	emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); -} diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 93ba538241..86df7f0049 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1263,7 +1263,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx)  		pfp->fps[key] = fp;  	} -	vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog; +	vp = nvfx->hw_vertprog;  	if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) {  		int sprite_real_input = -1; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index ac8053f26b..3900821de4 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -432,7 +432,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)  		return NULL;  	} -	screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE); +	screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE);  	screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE);  	screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 9200f78a54..8fafca1950 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -17,13 +17,8 @@ struct nvfx_vertex_program_data {  };  struct nvfx_vertex_program { -	struct pipe_shader_state pipe;  	unsigned long long id; -	struct draw_vertex_shader *draw; - -	boolean translated; -  	struct nvfx_vertex_program_exec *insns;  	unsigned nr_insns;  	struct nvfx_vertex_program_data *consts; @@ -46,6 +41,20 @@ struct nvfx_vertex_program {  	struct util_dynarray const_relocs;  }; +#define NVFX_VP_FAILED ((struct nvfx_vertex_program*)-1) + +struct nvfx_pipe_vertex_program { +	struct pipe_shader_state pipe; +	struct tgsi_shader_info info; + +	unsigned draw_elements; +	boolean draw_no_elements; +	struct draw_vertex_shader *draw_vs; +	struct nvfx_vertex_program* draw_vp; + +	struct nvfx_vertex_program* vp; +}; +  struct nvfx_fragment_program_data {  	unsigned offset;  	unsigned index; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 308c25fbe1..30ef12a95b 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -145,7 +145,7 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)  {  	struct nouveau_channel* chan = nvfx->screen->base.channel;  	unsigned i; -	struct nvfx_vertex_program* vp = nvfx->vertprog; +	struct nvfx_vertex_program* vp = nvfx->hw_vertprog;  	if(nvfx->clip.nr != vp->clip_nr)  	{  		unsigned idx; @@ -230,7 +230,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  	if(nvfx->render_mode == HW)  	{ -		if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP)) +		if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST))  		{  			if(!nvfx_vertprog_validate(nvfx))  				return FALSE; @@ -252,12 +252,10 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  	}  	else  	{ -		/* TODO: this looks a bit misdesigned */ -		if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) -			nvfx_vertprog_validate(nvfx); - -		if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG)) -			nvfx_vtxfmt_validate(nvfx); +		if(dirty & NVFX_NEW_VERTPROG) { +			assert(nvfx_vertprog_validate(nvfx)); +			nvfx_vbo_swtnl_validate(nvfx); +		}  	}  	if(dirty & NVFX_NEW_RAST) @@ -284,7 +282,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  	if(nvfx->is_nv4x)  	{ -		unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or; +		unsigned vp_output = nvfx->hw_vertprog->or | nvfx->hw_fragprog->or;  		vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6);  		if(vp_output != nvfx->hw_vp_output) @@ -399,8 +397,6 @@ nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs)  boolean  nvfx_state_validate(struct nvfx_context *nvfx)  { -	boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; -  	if (nvfx->render_mode != HW) {  		/* Don't even bother trying to go back to hw if none  		 * of the states that caused swtnl previously have changed. @@ -429,7 +425,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)  	/* Setup for swtnl */  	if (nvfx->render_mode == HW) { -		NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); +		static boolean warned = FALSE; +		if(!warned) { +			NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); +			warned = TRUE; +		}  		nvfx->pipe.flush(&nvfx->pipe, 0, NULL);  		nvfx->dirty |= (NVFX_NEW_VIEWPORT |  				NVFX_NEW_VERTPROG | @@ -437,8 +437,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx)  		nvfx->render_mode = SWTNL;  	} -	if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) -		draw_bind_vertex_shader(draw, nvfx->vertprog->draw); +	if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) { +		if(!nvfx->vertprog->draw_vs) +			nvfx->vertprog->draw_vs = draw_create_vertex_shader(draw, &nvfx->vertprog->pipe); +		draw_bind_vertex_shader(draw, nvfx->vertprog->draw_vs); +	}  	if (nvfx->draw_dirty & NVFX_NEW_RAST)             draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe, diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 611de808af..c35e926a7a 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -339,6 +339,44 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)  }  void +nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) +{ +	struct nouveau_channel* chan = nvfx->screen->base.channel; +	unsigned num_outputs = nvfx->vertprog->draw_elements; +	int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); + +	if (!elements) +		return; + +	WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); + +	OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); +	for(unsigned i = 0; i < num_outputs; ++i) +		OUT_RING(chan, (4 << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT); +	for(unsigned i = num_outputs; i < elements; ++i) +		OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT); + +	if(nvfx->is_nv4x) { +		unsigned i; +		/* seems to be some kind of cache flushing */ +		for(i = 0; i < 3; ++i) { +			OUT_RING(chan, RING_3D(0x1718, 1)); +			OUT_RING(chan, 0); +		} +	} + +	OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); +	for (unsigned i = 0; i < elements; i++) +		OUT_RING(chan, 0); + +	OUT_RING(chan, RING_3D(0x1710, 1)); +	OUT_RING(chan, 0); + +	nvfx->hw_vtxelt_nr = num_outputs; +	nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; +} + +void  nvfx_vbo_relocate(struct nvfx_context *nvfx)  {  	struct nouveau_channel* chan; diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index bc78ed400a..30385b26f7 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -8,6 +8,7 @@  #include "tgsi/tgsi_parse.h"  #include "tgsi/tgsi_dump.h"  #include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_ureg.h"  #include "draw/draw_context.h" @@ -37,6 +38,7 @@ struct nvfx_loop_entry  struct nvfx_vpc {  	struct nvfx_context* nvfx; +	struct pipe_shader_state pipe;  	struct nvfx_vertex_program *vp;  	struct nvfx_vertex_program_exec *vpi; @@ -813,7 +815,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)  	unsigned num_outputs;  	unsigned num_texcoords = nvfx->is_nv4x ? 10 : 8; -	num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT); +	num_outputs = util_semantic_set_from_program_file(&set, vpc->pipe.tokens, TGSI_FILE_OUTPUT);  	if(num_outputs > num_texcoords) {  		NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs); @@ -840,7 +842,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)  		}  	} -	tgsi_parse_init(&p, vpc->vp->pipe.tokens); +	tgsi_parse_init(&p, vpc->pipe.tokens);  	while (!tgsi_parse_end_of_tokens(&p)) {  		const union tgsi_full_token *tok = &p.FullToken; @@ -917,21 +919,35 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc)  DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) -static void -nvfx_vertprog_translate(struct nvfx_context *nvfx, -			struct nvfx_vertex_program *vp) +static struct nvfx_vertex_program* +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps)  {  	struct tgsi_parse_context parse; +	struct nvfx_vertex_program* vp = NULL;  	struct nvfx_vpc *vpc = NULL;  	struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));  	struct util_dynarray insns;  	int i; -	vpc = CALLOC(1, sizeof(struct nvfx_vpc)); +	tgsi_parse_init(&parse, vps->tokens); + +	vp = CALLOC_STRUCT(nvfx_vertex_program); +	if(!vp) +		goto out_err; + +	vpc = CALLOC_STRUCT(nvfx_vpc);  	if (!vpc) -		return; +		goto out_err; +  	vpc->nvfx = nvfx;  	vpc->vp = vp; +	vpc->pipe = *vps; + +	{ +		// TODO: use a 64-bit atomic here! +		static unsigned long long id = 0; +		vp->id = ++id; +	}  	/* reserve space for ucps */  	if(nvfx->use_vp_clipping) @@ -942,7 +958,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,  	if (!nvfx_vertprog_prepare(nvfx, vpc)) {  		FREE(vpc); -		return; +		return NULL;  	}  	/* Redirect post-transform vertex position to a temp if user clip @@ -955,8 +971,6 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,  		vpc->r_temps_discard = 0;  	} -	tgsi_parse_init(&parse, vp->pipe.tokens); -  	util_dynarray_init(&insns);  	while (!tgsi_parse_end_of_tokens(&parse)) {  		tgsi_parse_token(&parse); @@ -1058,7 +1072,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,  	if(debug_get_option_nvfx_dump_vp())  	{  		debug_printf("\n"); -		tgsi_dump(vp->pipe.tokens, 0); +		tgsi_dump(vpc->pipe.tokens, 0);  		debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x");  		for (i = 0; i < vp->nr_insns; i++) @@ -1068,20 +1082,49 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx,  	vp->clip_nr = -1;  	vp->exec_start = -1; -	vp->translated = TRUE; -out_err: + +out:  	tgsi_parse_free(&parse); -	util_dynarray_fini(&vpc->label_relocs); -	util_dynarray_fini(&vpc->loop_stack); -	if (vpc->r_temp) +	if(vpc) { +		util_dynarray_fini(&vpc->label_relocs); +		util_dynarray_fini(&vpc->loop_stack);  		FREE(vpc->r_temp); -	if (vpc->r_address)  		FREE(vpc->r_address); -	if (vpc->r_const)  		FREE(vpc->r_const); -	if (vpc->imm)  		FREE(vpc->imm); -	FREE(vpc); +		FREE(vpc); +	} +	return vp; + +out_err: +	FREE(vp); +	vp = NULL; +	goto out; +} + +static struct nvfx_vertex_program* +nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vertex_program* pvp) +{ +	struct nvfx_vertex_program* vp = NULL; +	struct pipe_shader_state vps; +	struct ureg_program *ureg = NULL; +	unsigned num_outputs = MIN2(pvp->info.num_outputs, 16); + +	ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); +	if(ureg == NULL) +		return 0; + +	for (unsigned i = 0; i < num_outputs; i++) +		ureg_MOV(ureg, ureg_DECL_output(ureg, pvp->info.output_semantic_name[i], pvp->info.output_semantic_index[i]), ureg_DECL_vs_input(ureg, i)); + +	ureg_END( ureg ); + +	vps.tokens = ureg_get_tokens(ureg, 0); +	vp = nvfx_vertprog_translate(nvfx, &vps); +	ureg_free_tokens(vps.tokens); +	ureg_destroy(ureg); + +	return vp;  }  boolean @@ -1090,30 +1133,44 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)  	struct nvfx_screen *screen = nvfx->screen;  	struct nouveau_channel *chan = screen->base.channel;  	struct nouveau_grobj *eng3d = screen->eng3d; -	struct nvfx_vertex_program *vp; +	struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; +	struct nvfx_vertex_program* vp;  	struct pipe_resource *constbuf;  	boolean upload_code = FALSE, upload_data = FALSE;  	int i;  	if (nvfx->render_mode == HW) { -		vp = nvfx->vertprog; -		constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; -	} else { -		vp = nvfx->swtnl.vertprog; -		constbuf = NULL; -	} - -	/* Translate TGSI shader into hw bytecode */ -	if (!vp->translated) -	{  		nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; -		nvfx_vertprog_translate(nvfx, vp); -		if (!vp->translated) { +		vp = pvp->vp; + +		if(!vp) { +			vp = nvfx_vertprog_translate(nvfx, &pvp->pipe); +			if(!vp) +				vp = NVFX_VP_FAILED; +			pvp->vp = vp; +		} + +		if(vp == NVFX_VP_FAILED) {  			nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;  			return FALSE;  		} + +		constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; +	} else { +		vp = pvp->draw_vp; +		if(!vp) +		{ +			pvp->draw_vp = vp = nvfx_vertprog_translate_draw_vp(nvfx, pvp); +			if(!vp) { +				_debug_printf("Error: unable to create a swtnl passthrough vertex shader: aborting."); +				abort(); +			} +		} +		constbuf = NULL;  	} +	nvfx->hw_vertprog = vp; +  	/* Allocate hw vtxprog exec slots */  	if (!vp->exec) {  		struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; @@ -1294,24 +1351,22 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)  	util_dynarray_fini(&vp->branch_relocs);  	util_dynarray_fini(&vp->const_relocs); +	FREE(vp);  }  static void * -nvfx_vp_state_create(struct pipe_context *pipe, -                     const struct pipe_shader_state *cso) +nvfx_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso)  {          struct nvfx_context *nvfx = nvfx_context(pipe); -        struct nvfx_vertex_program *vp; +        struct nvfx_pipe_vertex_program *pvp; -        // TODO: use a 64-bit atomic here! -        static unsigned long long id = 0; +        pvp = CALLOC(1, sizeof(struct nvfx_pipe_vertex_program)); +        pvp->pipe.tokens = tgsi_dup_tokens(cso->tokens); +        tgsi_scan_shader(pvp->pipe.tokens, &pvp->info); +        pvp->draw_elements = MAX2(1, MIN2(pvp->info.num_outputs, 16)); +        pvp->draw_no_elements = pvp->info.num_outputs == 0; -        vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); -        vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); -        vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); -        vp->id = ++id; - -        return (void *)vp; +        return (void *)pvp;  }  static void @@ -1327,13 +1382,17 @@ nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso)  static void  nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso)  { -        struct nvfx_context *nvfx = nvfx_context(pipe); -        struct nvfx_vertex_program *vp = hwcso; +	struct nvfx_context *nvfx = nvfx_context(pipe); +	struct nvfx_pipe_vertex_program *pvp = hwcso; -        draw_delete_vertex_shader(nvfx->draw, vp->draw); -        nvfx_vertprog_destroy(nvfx, vp); -        FREE((void*)vp->pipe.tokens); -        FREE(vp); +	if(pvp->draw_vs) +		draw_delete_vertex_shader(nvfx->draw, pvp->draw_vs); +	if(pvp->vp && pvp->vp != NVFX_VP_FAILED) +		nvfx_vertprog_destroy(nvfx, pvp->vp); +	if(pvp->draw_vp) +		nvfx_vertprog_destroy(nvfx, pvp->draw_vp); +	FREE((void*)pvp->pipe.tokens); +	FREE(pvp);  }  void | 
