diff options
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.c | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_context.h | 6 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_fragtex.c | 4 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_miptree.c | 49 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_resource.h | 36 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state_emit.c | 81 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_state_fb.c | 243 | ||||
| -rw-r--r-- | src/gallium/drivers/nvfx/nvfx_surface.c | 121 | 
8 files changed, 403 insertions, 141 deletions
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c index 3d45f5f0ba..7ab81de7dd 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.c +++ b/src/gallium/drivers/nvfx/nvfx_context.c @@ -15,6 +15,7 @@ nvfx_flush(struct pipe_context *pipe, unsigned flags,  	struct nouveau_channel *chan = screen->base.channel;  	struct nouveau_grobj *eng3d = screen->eng3d; +	/* XXX: we need to actually be intelligent here */  	if (flags & PIPE_FLUSH_TEXTURE_CACHE) {  		BEGIN_RING(chan, eng3d, 0x1fd8, 1);  		OUT_RING  (chan, 2); @@ -87,5 +88,8 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)  	/* set these to that we init them on first validation */  	nvfx->state.scissor_enabled = ~0;  	nvfx->state.stipple_enabled = ~0; + +	LIST_INITHEAD(&nvfx->render_cache); +  	return &nvfx->pipe;  } diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index 278be94d52..a6ea913967 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -11,6 +11,7 @@  #include "util/u_memory.h"  #include "util/u_math.h"  #include "util/u_inlines.h" +#include "util/u_double_list.h"  #include "draw/draw_vertex.h"  #include "util/u_blitter.h" @@ -67,6 +68,7 @@ struct nvfx_state {  	unsigned scissor_enabled;  	unsigned stipple_enabled;  	unsigned fp_samplers; +	unsigned render_temps;  };  struct nvfx_vtxelt_state { @@ -90,6 +92,7 @@ struct nvfx_context {  	struct draw_context *draw;  	struct blitter_context* blitter; +	struct list_head render_cache;  	/* HW state derived from pipe states */  	struct nvfx_state state; @@ -185,7 +188,8 @@ extern void nvfx_draw_elements_swtnl(struct pipe_context *pipe,  extern void nvfx_vtxfmt_validate(struct nvfx_context *nvfx);  /* nvfx_fb.c */ -extern void nvfx_state_framebuffer_validate(struct nvfx_context *nvfx); +extern int nvfx_framebuffer_prepare(struct nvfx_context *nvfx); +extern void nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result);  void  nvfx_framebuffer_relocate(struct nvfx_context *nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_fragtex.c b/src/gallium/drivers/nvfx/nvfx_fragtex.c index 0b4a434fec..6605745433 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragtex.c +++ b/src/gallium/drivers/nvfx/nvfx_fragtex.c @@ -16,6 +16,10 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)  		samplers &= ~(1 << unit);  		if(nvfx->fragment_sampler_views[unit] && nvfx->tex_sampler[unit]) { +			util_dirty_surfaces_use_for_sampling(&nvfx->pipe, +					&((struct nvfx_miptree*)nvfx->fragment_sampler_views[unit]->texture)->dirty_surfaces, +					nvfx_surface_flush); +  			if(!nvfx->is_nv4x)  				nv30_fragtex_set(nvfx, unit);  			else diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c index 7deb9d7b9a..530d705e13 100644 --- a/src/gallium/drivers/nvfx/nvfx_miptree.c +++ b/src/gallium/drivers/nvfx/nvfx_miptree.c @@ -11,6 +11,7 @@  #include "nvfx_screen.h"  #include "nvfx_resource.h"  #include "nvfx_transfer.h" +#include "nv04_2d.h"  static void  nvfx_miptree_choose_format(struct nvfx_miptree *mt) @@ -115,16 +116,23 @@ nvfx_miptree_get_handle(struct pipe_screen *pscreen,  static void +nvfx_miptree_surface_final_destroy(struct pipe_surface* ps) +{ +	struct nvfx_surface* ns = (struct nvfx_surface*)ps; +	pipe_resource_reference(&ps->texture, 0); +	pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); +	FREE(ps); +} + +static void  nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)  {  	struct nvfx_miptree *mt = (struct nvfx_miptree *)pt; +	util_surfaces_destroy(&mt->surfaces, pt, nvfx_miptree_surface_final_destroy);  	nouveau_screen_bo_release(screen, mt->base.bo);  	FREE(mt);  } - - -  struct u_resource_vtbl nvfx_miptree_vtbl =   {     nvfx_miptree_get_handle,	      /* get_handle */ @@ -152,6 +160,8 @@ nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_reso          mt->base.base = *pt;          mt->base.vtbl = &nvfx_miptree_vtbl; +        util_dirty_surfaces_init(&mt->dirty_surfaces); +          pipe_reference_init(&mt->base.base.reference, 1);          mt->base.base.screen = pscreen; @@ -218,29 +228,28 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,  			 unsigned face, unsigned level, unsigned zslice,  			 unsigned flags)  { +	struct nvfx_miptree* mt = (struct nvfx_miptree*)pt;  	struct nvfx_surface *ns; -	ns = CALLOC_STRUCT(nvfx_surface); -	if (!ns) -		return NULL; -	pipe_resource_reference(&ns->base.texture, pt); -	ns->base.format = pt->format; -	ns->base.width = u_minify(pt->width0, level); -	ns->base.height = u_minify(pt->height0, level); -	ns->base.usage = flags; -	pipe_reference_init(&ns->base.reference, 1); -	ns->base.face = face; -	ns->base.level = level; -	ns->base.zslice = zslice; -	ns->pitch = nvfx_subresource_pitch(pt, level); -	ns->base.offset = nvfx_subresource_offset(pt, face, level, zslice); +	ns = (struct nvfx_surface*)util_surfaces_get(&mt->surfaces, sizeof(struct nvfx_surface), pscreen, pt, face, level, zslice, flags); +	if(ns->base.base.offset == ~0) { +		util_dirty_surface_init(&ns->base); +		ns->pitch = nvfx_subresource_pitch(pt, level); +		ns->base.base.offset = nvfx_subresource_offset(pt, face, level, zslice); +	} -	return &ns->base; +	return &ns->base.base;  }  void  nvfx_miptree_surface_del(struct pipe_surface *ps)  { -	pipe_resource_reference(&ps->texture, NULL); -	FREE(ps); +	struct nvfx_surface* ns = (struct nvfx_surface*)ps; + +	if(!ns->temp) +	{ +		util_surfaces_detach(&((struct nvfx_miptree*)ps->texture)->surfaces, ps); +		pipe_resource_reference(&ps->texture, 0); +		FREE(ps); +	}  } diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h index 42d04ebb37..be1845dd9c 100644 --- a/src/gallium/drivers/nvfx/nvfx_resource.h +++ b/src/gallium/drivers/nvfx/nvfx_resource.h @@ -1,13 +1,16 @@ -  #ifndef NVFX_RESOURCE_H  #define NVFX_RESOURCE_H  #include "util/u_transfer.h"  #include "util/u_format.h"  #include "util/u_math.h" +#include "util/u_double_list.h" +#include "util/u_surfaces.h" +#include "util/u_dirty_surfaces.h"  #include <nouveau/nouveau_bo.h>  struct pipe_resource; +struct nv04_region;  /* This gets further specialized into either buffer or texture @@ -38,17 +41,34 @@ nvfx_resource_on_gpu(struct pipe_resource* pr)  #define NVFX_MAX_TEXTURE_LEVELS  16 +/* We have the following invariants for render temporaries + * + * 1. Render temporaries are always linear + * 2. Render temporaries are always up to date + * 3. Currently, render temporaries are destroyed when the resource is used for sampling, but kept for any other use + * + * Also, we do NOT flush temporaries on any pipe->flush(). + * This is fine, as long as scanout targets and shared resources never need temps. + * + * TODO: we may want to also support swizzled temporaries to improve performance in some cases. + */ +  struct nvfx_miptree {          struct nvfx_resource base;          unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */          unsigned face_size; /* 128-byte aligned face/total size */          unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS]; + +        struct util_surfaces surfaces; +        struct util_dirty_surfaces dirty_surfaces;  };  struct nvfx_surface { -	struct pipe_surface base; +	struct util_dirty_surface base;  	unsigned pitch; + +	struct nvfx_miptree* temp;  };  static INLINE  @@ -65,6 +85,12 @@ nvfx_surface_buffer(struct pipe_surface *surf)  	return mt->bo;  } +static INLINE struct util_dirty_surfaces* +nvfx_surface_get_dirty_surfaces(struct pipe_surface* surf) +{ +	struct nvfx_miptree *mt = (struct nvfx_miptree *)surf->texture; +	return &mt->dirty_surfaces; +}  void  nvfx_init_resource_functions(struct pipe_context *pipe); @@ -141,4 +167,10 @@ nvfx_subresource_pitch(struct pipe_resource* pt, unsigned level)  	}  } +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf); + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf); +  #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index f91ae19ecd..dc70f3de87 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -1,15 +1,48 @@  #include "nvfx_context.h"  #include "nvfx_state.h" +#include "nvfx_resource.h"  #include "draw/draw_context.h"  static boolean  nvfx_state_validate_common(struct nvfx_context *nvfx)  {  	struct nouveau_channel* chan = nvfx->screen->base.channel; -	unsigned dirty = nvfx->dirty; +	unsigned dirty; +	int all_swizzled = -1; +	boolean flush_tex_cache = FALSE;  	if(nvfx != nvfx->screen->cur_ctx) -		dirty = ~0; +	{ +		nvfx->dirty = ~0; +		nvfx->screen->cur_ctx = nvfx; +	} + +	/* These can trigger use the of 3D engine to copy temporaries. +	 * That will recurse here and thus dirty all 3D state, so we need to this before anything else, and in a loop.. +	 * This converges to having clean temps, then binding both fragtexes and framebuffers. +	 */ +	while(nvfx->dirty & (NVFX_NEW_FB | NVFX_NEW_SAMPLER)) +	{ +		if(nvfx->dirty & NVFX_NEW_SAMPLER) +		{ +			nvfx->dirty &=~ NVFX_NEW_SAMPLER; +			nvfx_fragtex_validate(nvfx); + +			// TODO: only set this if really necessary +			flush_tex_cache = TRUE; +		} + +		if(nvfx->dirty & NVFX_NEW_FB) +		{ +			nvfx->dirty &=~ NVFX_NEW_FB; +			all_swizzled = nvfx_framebuffer_prepare(nvfx); + +			// TODO: make sure this doesn't happen, i.e. fbs have matching formats +			assert(all_swizzled >= 0); +		} +	} + +	dirty = nvfx->dirty;  	if(nvfx->render_mode == HW)  	{ @@ -35,9 +68,6 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  			nvfx_vtxfmt_validate(nvfx);  	} -	if(dirty & NVFX_NEW_FB) -		nvfx_state_framebuffer_validate(nvfx); -  	if(dirty & NVFX_NEW_RAST)  		sb_emit(chan, nvfx->rasterizer->sb, nvfx->rasterizer->sb_len); @@ -48,10 +78,14 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  		nvfx_state_stipple_validate(nvfx);  	if(dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_FRAGCONST)) +	{  		nvfx_fragprog_validate(nvfx); +		if(dirty & NVFX_NEW_FRAGPROG) +			flush_tex_cache = TRUE; // TODO: do we need this? +	} -	if(dirty & NVFX_NEW_SAMPLER) -		nvfx_fragtex_validate(nvfx); +	if(all_swizzled >= 0) +		nvfx_framebuffer_validate(nvfx, all_swizzled);  	if(dirty & NVFX_NEW_BLEND)  		sb_emit(chan, nvfx->blend->sb, nvfx->blend->sb_len); @@ -72,13 +106,17 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)  	if(dirty & (NVFX_NEW_VIEWPORT | NVFX_NEW_FB))  		nvfx_state_viewport_validate(nvfx); -	/* TODO: could nv30 need this or something similar too? */ -	if((dirty & (NVFX_NEW_FRAGPROG | NVFX_NEW_SAMPLER)) && nvfx->is_nv4x) { -		WAIT_RING(chan, 4); -		OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); -		OUT_RING(chan, 2); -		OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); -		OUT_RING(chan, 1); +	if(flush_tex_cache) +	{ +		// TODO: what about nv30? +		if(nvfx->is_nv4x) +		{ +			WAIT_RING(chan, 4); +			OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); +			OUT_RING(chan, 2); +			OUT_RING(chan, RING_3D(NV40TCL_TEX_CACHE_CTL, 1)); +			OUT_RING(chan, 1); +		}  	}  	nvfx->dirty = 0;  	return TRUE; @@ -99,6 +137,21 @@ nvfx_state_emit(struct nvfx_context *nvfx)  	      ;  	MARK_RING(chan, max_relocs * 2, max_relocs * 2);  	nvfx_state_relocate(nvfx); + +	unsigned render_temps = nvfx->state.render_temps; +	if(render_temps) +	{ +		for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) +		{ +			if(render_temps & (1 << i)) +				util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.cbufs[i]), +						(struct util_dirty_surface*)nvfx->framebuffer.cbufs[i]); +		} + +		if(render_temps & 0x80) +			util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(nvfx->framebuffer.zsbuf), +					(struct util_dirty_surface*)nvfx->framebuffer.zsbuf); +	}  }  void diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c index e111d11627..80b0f21575 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_fb.c +++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c @@ -1,19 +1,56 @@  #include "nvfx_context.h"  #include "nvfx_resource.h"  #include "nouveau/nouveau_util.h" +#include "util/u_format.h" -void -nvfx_state_framebuffer_validate(struct nvfx_context *nvfx) +static inline boolean +nvfx_surface_linear_renderable(struct pipe_surface* surf) +{ +	return (surf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR) +		&& !(surf->offset & 63) +		&& !(((struct nvfx_surface*)surf)->pitch & 63); +} + +static inline boolean +nvfx_surface_swizzled_renderable(struct pipe_framebuffer_state* fb, struct pipe_surface* surf) +{ +	/* TODO: return FALSE if we have a format not supporting swizzled rendering (e.g. r8); currently those are not supported at all */ +	return !((struct nvfx_miptree*)surf->texture)->linear_pitch +		&& (surf->texture->target != PIPE_TEXTURE_3D || u_minify(surf->texture->depth0, surf->level) <= 1) +		&& !(surf->offset & 127) +		&& (surf->width == fb->width) +		&& (surf->height == fb->height) +		&& !((struct nvfx_surface*)surf)->temp; +} + +static boolean +nvfx_surface_get_render_target(struct pipe_surface* surf, int all_swizzled, struct nvfx_render_target* target) +{ +	struct nvfx_surface* ns = (struct nvfx_surface*)surf; +	if(!ns->temp) +	{ +		target->bo = ((struct nvfx_miptree*)surf->texture)->base.bo; +		target->offset = surf->offset; +		target->pitch = align(ns->pitch, 64); +		assert(target->pitch); +		return FALSE; +	} +	else +	{ +		target->offset = 0; +		target->pitch = ns->temp->linear_pitch; +		target->bo = ns->temp->base.bo; +		assert(target->pitch); +		return TRUE; +	} +} + +int +nvfx_framebuffer_prepare(struct nvfx_context *nvfx)  {  	struct pipe_framebuffer_state *fb = &nvfx->framebuffer; -	struct nouveau_channel *chan = nvfx->screen->base.channel; -	uint32_t rt_enable = 0, rt_format = 0; -	int i, colour_format = 0, zeta_format = 0; -	int depth_only = 0; -	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; -	unsigned w = fb->width; -	unsigned h = fb->height; -	int colour_bits = 32, zeta_bits = 32; +	int i, color_format = 0, zeta_format = 0; +	int all_swizzled = 1;  	if(!nvfx->is_nv4x)  		assert(fb->nr_cbufs <= 2); @@ -21,113 +58,135 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)  		assert(fb->nr_cbufs <= 4);  	for (i = 0; i < fb->nr_cbufs; i++) { -		if (colour_format) -			assert(colour_format == fb->cbufs[i]->format); -		else -			colour_format = fb->cbufs[i]->format; +		if (color_format) { +			if(color_format != fb->cbufs[i]->format) +				return -1; +		} else +			color_format = fb->cbufs[i]->format; -		rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); -		nvfx->hw_rt[i].bo = ((struct nvfx_miptree*)fb->cbufs[i]->texture)->base.bo; -		nvfx->hw_rt[i].offset = fb->cbufs[i]->offset; -		nvfx->hw_rt[i].pitch = ((struct nvfx_surface *)fb->cbufs[i])->pitch; +		if(!nvfx_surface_swizzled_renderable(fb, fb->cbufs[i])) +			all_swizzled = 0;  	} -	for(; i < 4; ++i) -		nvfx->hw_rt[i].bo = 0; +	if (fb->zsbuf) { +		/* TODO: return FALSE if we have a format not supporting a depth buffer (e.g. r8); currently those are not supported at all */ +		if(!nvfx_surface_swizzled_renderable(fb, fb->zsbuf)) +			all_swizzled = 0; + +		if(all_swizzled && util_format_get_blocksize(color_format) != util_format_get_blocksize(zeta_format)) +			all_swizzled = 0; +	} + +	for (i = 0; i < fb->nr_cbufs; i++) { +		if(!((struct nvfx_surface*)fb->cbufs[i])->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->cbufs[i])) +			nvfx_surface_create_temp(&nvfx->pipe, fb->cbufs[i]); +	} + +	if(fb->zsbuf) { +		if(!((struct nvfx_surface*)fb->zsbuf)->temp && !all_swizzled && !nvfx_surface_linear_renderable(fb->zsbuf)) +			nvfx_surface_create_temp(&nvfx->pipe, fb->zsbuf); +	} + +	return all_swizzled; +} + +void +nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result) +{ +	struct pipe_framebuffer_state *fb = &nvfx->framebuffer; +	struct nouveau_channel *chan = nvfx->screen->base.channel; +	uint32_t rt_enable, rt_format; +	int i; +	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; +	unsigned w = fb->width; +	unsigned h = fb->height; + +	rt_enable = (NV34TCL_RT_ENABLE_COLOR0 << fb->nr_cbufs) - 1;  	if (rt_enable & (NV34TCL_RT_ENABLE_COLOR1 |  			 NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3))  		rt_enable |= NV34TCL_RT_ENABLE_MRT; -	if (fb->zsbuf) { -		zeta_format = fb->zsbuf->format; -		nvfx->hw_zeta.bo = ((struct nvfx_miptree*)fb->zsbuf->texture)->base.bo; -		nvfx->hw_zeta.offset = fb->zsbuf->offset; -		nvfx->hw_zeta.pitch = ((struct nvfx_surface *)fb->zsbuf)->pitch; -	} -	else -		nvfx->hw_zeta.bo = 0; +	nvfx->state.render_temps = 0; -	if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0 | NV34TCL_RT_ENABLE_COLOR1 | -		NV40TCL_RT_ENABLE_COLOR2 | NV40TCL_RT_ENABLE_COLOR3)) { -		/* Render to at least a colour buffer */ -		if (!(fb->cbufs[0]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { -			assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); -			for (i = 1; i < fb->nr_cbufs; i++) -				assert(!(fb->cbufs[i]->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)); +	for (i = 0; i < fb->nr_cbufs; i++) +		nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->cbufs[i], prepare_result, &nvfx->hw_rt[i]) << i; -			rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | -				(log2i(fb->cbufs[0]->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | -				(log2i(fb->cbufs[0]->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); -		} -		else -			rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; -	} else if (fb->zsbuf) { -		depth_only = 1; +	for(; i < 4; ++i) +		nvfx->hw_rt[i].bo = 0; -		/* Render to depth buffer only */ -		if (!(fb->zsbuf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) { -			assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); +	if (fb->zsbuf) { +		nvfx->state.render_temps |= nvfx_surface_get_render_target(fb->zsbuf, prepare_result, &nvfx->hw_zeta) << 7; -			rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | -				(log2i(fb->zsbuf->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | -				(log2i(fb->zsbuf->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); -		} -		else -			rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; -	} else { -		return; +		assert(util_format_get_stride(fb->zsbuf->format, fb->width) <= nvfx->hw_zeta.pitch); +		assert(nvfx->hw_zeta.offset + nvfx->hw_zeta.pitch * fb->height <= nvfx->hw_zeta.bo->size);  	} -	switch (colour_format) { -	case PIPE_FORMAT_B8G8R8X8_UNORM: -		rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; -		break; -	case PIPE_FORMAT_B8G8R8A8_UNORM: -	case 0: -		rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; -		break; -	case PIPE_FORMAT_B5G6R5_UNORM: +	if (prepare_result) { +		assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + +		rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | +			(log2i(fb->width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) | +			(log2i(fb->height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); +	} else +		rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + +	if(fb->nr_cbufs > 0) { +		switch (fb->cbufs[0]->format) { +		case PIPE_FORMAT_B8G8R8X8_UNORM: +			rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8; +			break; +		case PIPE_FORMAT_B8G8R8A8_UNORM: +		case 0: +			rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; +			break; +		case PIPE_FORMAT_B5G6R5_UNORM: +			rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; +			break; +		default: +			assert(0); +		} +	} else if(fb->zsbuf && util_format_get_blocksize(fb->zsbuf->format) == 2)  		rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; -		colour_bits = 16; -		break; -	default: -		assert(0); -	} +	else +		rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; -	switch (zeta_format) { -	case PIPE_FORMAT_Z16_UNORM: +	if(fb->zsbuf) { +		switch (fb->zsbuf->format) { +		case PIPE_FORMAT_Z16_UNORM: +			rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; +			break; +		case PIPE_FORMAT_S8_USCALED_Z24_UNORM: +		case PIPE_FORMAT_X8Z24_UNORM: +		case 0: +			rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; +			break; +		default: +			assert(0); +		} +	} else if(fb->nr_cbufs && util_format_get_blocksize(fb->cbufs[0]->format) == 2)  		rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; -		zeta_bits = 16; -		break; -	case PIPE_FORMAT_S8_USCALED_Z24_UNORM: -	case PIPE_FORMAT_X8Z24_UNORM: -	case 0: +	else  		rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; -		break; -	default: -		assert(0); -	} -	if ((!nvfx->is_nv4x) && colour_bits > zeta_bits) { -		/* TODO: does this limitation really exist? -		   TODO: can it be worked around somehow? */ -		assert(0); -	} +	if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) || fb->zsbuf) { +		struct nvfx_render_target *rt0 = &nvfx->hw_rt[0]; +		uint32_t pitch; -	if ((rt_enable & NV34TCL_RT_ENABLE_COLOR0) -		|| ((!nvfx->is_nv4x) && depth_only)) { -		struct nvfx_render_target *rt0 = (depth_only ? &nvfx->hw_zeta : &nvfx->hw_rt[0]); -		uint32_t pitch = rt0->pitch; +		if(!(rt_enable & NV34TCL_RT_ENABLE_COLOR0)) +			rt0 = &nvfx->hw_zeta; + +		pitch = rt0->pitch;  		if(!nvfx->is_nv4x)  		{ -			if (nvfx->hw_zeta.bo) { +			if (nvfx->hw_zeta.bo)  				pitch |= (nvfx->hw_zeta.pitch << 16); -			} else { +			else  				pitch |= (pitch << 16); -			}  		} +		//printf("rendering to bo %p [%i] at offset %i with pitch %i\n", rt0->bo, rt0->bo->handle, rt0->offset, pitch); +  		OUT_RING(chan, RING_3D(NV34TCL_DMA_COLOR0, 1));  		OUT_RELOC(chan, rt0->bo, 0,  			      rt_flags | NOUVEAU_BO_OR, @@ -180,7 +239,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)  		}  	} -	if (zeta_format) { +	if (fb->zsbuf) {  		OUT_RING(chan, RING_3D(NV34TCL_DMA_ZETA, 1));  		OUT_RELOC(chan, nvfx->hw_zeta.bo, 0,  			      rt_flags | NOUVEAU_BO_OR, diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index a97f342c64..8208c67f2a 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -94,23 +94,44 @@ nvfx_region_fixup_swizzled(struct nv04_region* rgn, unsigned zslice, unsigned wi  }  static INLINE void -nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y) +nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write)  { -	rgn->bo = ((struct nvfx_resource*)surf->base.texture)->bo; -	rgn->offset = surf->base.offset; -	rgn->pitch = surf->pitch;  	rgn->x = x;  	rgn->y = y;  	rgn->z = 0; +	nvfx_region_set_format(rgn, surf->base.base.format); -	nvfx_region_set_format(rgn, surf->base.format); -	if(!(surf->base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) -		nvfx_region_fixup_swizzled(rgn, surf->base.zslice, surf->base.width, surf->base.height, u_minify(surf->base.texture->depth0, surf->base.level)); +	if(surf->temp) +	{ +		rgn->bo = surf->temp->base.bo; +		rgn->offset = 0; +		rgn->pitch = surf->temp->linear_pitch; + +		if(for_write) +			util_dirty_surface_set_dirty(nvfx_surface_get_dirty_surfaces(&surf->base.base), &surf->base); +	} else { +		rgn->bo = ((struct nvfx_resource*)surf->base.base.texture)->bo; +		rgn->offset = surf->base.base.offset; +		rgn->pitch = surf->pitch; + +	        if(!(surf->base.base.texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) +		        nvfx_region_fixup_swizzled(rgn, surf->base.base.zslice, surf->base.base.width, surf->base.base.height, u_minify(surf->base.base.texture->depth0, surf->base.base.level)); +	}  }  static INLINE void -nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z) +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, struct pipe_subresource sub, unsigned x, unsigned y, unsigned z, bool for_write)  { +	if(pt->target != PIPE_BUFFER) +	{ +		struct nvfx_surface* ns = (struct nvfx_surface*)util_surfaces_peek(&((struct nvfx_miptree*)pt)->surfaces, pt, sub.face, sub.level, z); +		if(ns && util_dirty_surface_is_dirty(&ns->base)) +		{ +			nvfx_region_init_for_surface(rgn, ns, x, y, for_write); +			return; +		} +	} +  	rgn->bo = ((struct nvfx_resource*)pt)->bo;  	rgn->offset = nvfx_subresource_offset(pt, sub.face, sub.level, z);  	rgn->pitch = nvfx_subresource_pitch(pt, sub.level); @@ -165,6 +186,7 @@ nv04_scaled_image_format(enum pipe_format format)  	}  } +// XXX: must save index buffer too!  static struct blitter_context*  nvfx_get_blitter(struct pipe_context* pipe, int copy)  { @@ -237,8 +259,8 @@ nvfx_resource_copy_region(struct pipe_context *pipe,  	int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;  	int src_on_gpu = nvfx_resource_on_gpu(srcr); -	nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz); -	nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz); +	nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE); +	nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);  	w = util_format_get_stride(dstr->format, w) >> dst.bpps;  	h = util_format_get_nblocksy(dstr->format, h); @@ -293,10 +315,11 @@ nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dsts,  	struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;  	struct nv04_region dst;  	/* Always try to use the GPU right now, if possible -	 * If the user wanted the surface data on the CPU, he would have cleared with memset */ +	 * If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */  	// we don't care about interior pixel order since we set all them to the same value -	nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy); +	nvfx_region_init_for_surface(&dst, (struct nvfx_surface*)dsts, dx, dy, TRUE); +  	w = util_format_get_stride(dsts->format, w) >> dst.bpps;  	h = util_format_get_nblocksy(dsts->format, h); @@ -342,6 +365,80 @@ nvfx_screen_surface_init(struct pipe_screen *pscreen)  }  static void +nvfx_surface_copy_temp(struct pipe_context* pipe, struct pipe_surface* surf, int to_temp) +{ +	struct nvfx_surface* ns = (struct nvfx_surface*)surf; +	struct pipe_subresource tempsr, surfsr; +	struct pipe_resource *idxbuf_buffer; +	unsigned idxbuf_format; + +	tempsr.face = 0; +	tempsr.level = 0; +	surfsr.face = surf->face; +	surfsr.level = surf->level; + +	// TODO: do this properly, in blitter save +	idxbuf_buffer = ((struct nvfx_context*)pipe)->idxbuf_buffer; +	idxbuf_format = ((struct nvfx_context*)pipe)->idxbuf_format; + +	if(to_temp) +		nvfx_resource_copy_region(pipe, &ns->temp->base.base, tempsr, 0, 0, 0, surf->texture, surfsr, 0, 0, surf->zslice, surf->width, surf->height); +	else +		nvfx_resource_copy_region(pipe, surf->texture, surfsr, 0, 0, surf->zslice, &ns->temp->base.base, tempsr, 0, 0, 0, surf->width, surf->height); + +	((struct nvfx_context*)pipe)->idxbuf_buffer = idxbuf_buffer; +	((struct nvfx_context*)pipe)->idxbuf_format = idxbuf_format; +} + +void +nvfx_surface_create_temp(struct pipe_context* pipe, struct pipe_surface* surf) +{ +	struct nvfx_surface* ns = (struct nvfx_surface*)surf; +	struct pipe_resource template; +	memset(&template, 0, sizeof(struct pipe_resource)); +	template.target = PIPE_TEXTURE_2D; +	template.format = surf->format; +	template.width0 = surf->width; +	template.height0 = surf->height; +	template.depth0 = 1; +	template.nr_samples = surf->texture->nr_samples; +	template.flags = NVFX_RESOURCE_FLAG_LINEAR; + +	ns->temp = (struct nvfx_miptree*)nvfx_miptree_create(pipe->screen, &template); +	nvfx_surface_copy_temp(pipe, surf, 1); +} + +void +nvfx_surface_flush(struct pipe_context* pipe, struct pipe_surface* surf) +{ +	struct nvfx_context* nvfx = (struct nvfx_context*)pipe; +	struct nvfx_surface* ns = (struct nvfx_surface*)surf; +	boolean bound = FALSE; + +	/* must be done before the copy, otherwise the copy will use the temp as destination */ +	util_dirty_surface_set_clean(nvfx_surface_get_dirty_surfaces(surf), &ns->base); + +	nvfx_surface_copy_temp(pipe, surf, 0); + +	if(nvfx->framebuffer.zsbuf == surf) +		bound = TRUE; +	else +	{ +		for(unsigned i = 0; i < nvfx->framebuffer.nr_cbufs; ++i) +		{ +			if(nvfx->framebuffer.cbufs[i] == surf) +			{ +				bound = TRUE; +				break; +			} +		} +	} + +	if(!bound) +		pipe_resource_reference((struct pipe_resource**)&ns->temp, 0); +} + +static void  nvfx_clear_render_target(struct pipe_context *pipe,  			 struct pipe_surface *dst,  			 const float *rgba,  | 
