From 94b5c28a98850f42fbcdab9ceda1450279e1e6fd Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 2 Dec 2009 16:55:33 +0100
Subject: gallium: adapt nv drivers to interface cleanups

---
 src/gallium/drivers/nv50/nv50_miptree.c  | 10 +++----
 src/gallium/drivers/nv50/nv50_transfer.c | 48 ++++++++++++++------------------
 2 files changed, 25 insertions(+), 33 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3d58746793..40ee665999 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -91,13 +91,11 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
-
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+		unsigned nblocksy = pf_get_nblocksy(pt->format, height);
 
 		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
-		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
-		lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
+		lvl->pitch = align(pf_get_stride(pt->format, width), 64);
+		lvl->tile_mode = get_tile_mode(nblocksy, depth);
 
 		width = u_minify(width, 1);
 		height = u_minify(height, 1);
@@ -118,7 +116,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 			unsigned tile_d = get_tile_depth(lvl->tile_mode);
 
 			size  = lvl->pitch;
-			size *= align(pt->nblocksy[l], tile_h);
+			size *= align(pf_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
 			size *= align(u_minify(pt->depth0, l), tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 39d65279fc..4705f96f57 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,6 +16,8 @@ struct nv50_transfer {
 	int level_depth;
 	int level_x;
 	int level_y;
+	unsigned nblocksx;
+	unsigned nblocksy;
 };
 
 static void
@@ -151,20 +153,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
+	tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level));
+	tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level));
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	if (!pt->nblocksx[level]) {
-		tx->base.nblocksx = pf_get_nblocksx(&pt->block,
-						    u_minify(pt->width0, level));
-		tx->base.nblocksy = pf_get_nblocksy(&pt->block,
-						    u_minify(pt->height0, level));
-	} else {
-		tx->base.nblocksx = pt->nblocksx[level];
-		tx->base.nblocksy = pt->nblocksy[level];
-	}
-	tx->base.stride = tx->base.nblocksx * pt->block.size;
+	tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format);
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
@@ -173,10 +166,10 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
-	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
-	tx->level_y = pf_get_nblocksy(&tx->base.block, y);
+	tx->level_x = pf_get_nblocksx(pt->format, x);
+	tx->level_y = pf_get_nblocksy(pt->format, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
-			     tx->base.nblocksy * tx->base.stride, &tx->bo);
+			     tx->nblocksy * tx->base.stride, &tx->bo);
 	if (ret) {
 		FREE(tx);
 		return NULL;
@@ -185,22 +178,22 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	if (pt->target == PIPE_TEXTURE_3D)
 		tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
 						      lvl->pitch,
-						      tx->base.nblocksy);
+						      tx->nblocksy);
 
 	if (usage & PIPE_TRANSFER_READ) {
-		nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
-		ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+		nx = pf_get_nblocksx(pt->format, tx->base.width);
+		ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					x, y,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy, 1,
-					tx->base.block.size, nx, ny,
+					tx->nblocksx, tx->nblocksy, 1,
+					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
 					NOUVEAU_BO_GART);
 	}
@@ -213,23 +206,24 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 {
 	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
 	struct nv50_miptree *mt = nv50_miptree(ptx->texture);
+	struct pipe_texture *pt = ptx->texture;
 
-	unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
-	unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+	unsigned nx = pf_get_nblocksx(pt->format, tx->base.width);
+	unsigned ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 	if (ptx->usage & PIPE_TRANSFER_WRITE) {
-		struct pipe_screen *pscreen = ptx->texture->screen;
+		struct pipe_screen *pscreen = pt->screen;
 
 		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy, 1,
+					tx->nblocksx, tx->nblocksy, 1,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					tx->level_x, tx->level_y,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
-					tx->base.block.size, nx, ny,
+					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
 					NOUVEAU_BO_GART);
 	}
-- 
cgit v1.2.3


From 47c780180b888e115b630cd940fe9c29dd53b4c5 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 8 Dec 2009 17:51:19 +0100
Subject: nouveau: use boolean instead of bool

---
 src/gallium/drivers/nv04/nv04_transfer.c | 2 +-
 src/gallium/drivers/nv10/nv10_transfer.c | 2 +-
 src/gallium/drivers/nv20/nv20_transfer.c | 2 +-
 src/gallium/drivers/nv30/nv30_transfer.c | 2 +-
 src/gallium/drivers/nv40/nv40_transfer.c | 2 +-
 src/gallium/drivers/nv50/nv50_context.h  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index e8ff686b4a..d66d6c6346 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -11,7 +11,7 @@
 struct nv04_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 9e44d37367..06bb513417 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -11,7 +11,7 @@
 struct nv10_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index f2e0a34db9..26a73c5143 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -11,7 +11,7 @@
 struct nv20_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index c8c3bd1f17..e29bfbd3ef 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -11,7 +11,7 @@
 struct nv30_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 1ee5cf39e0..ed5be1cf87 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -11,7 +11,7 @@
 struct nv40_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 4b0f062295..79135f2f36 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -65,7 +65,7 @@ struct nv50_rasterizer_stateobj {
 };
 
 struct nv50_sampler_stateobj {
-	bool normalized;
+	boolean normalized;
 	unsigned tsc[8];
 };
 
-- 
cgit v1.2.3


From 6a15ec9141b070b088d03d87673d0d2741b7db6b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 20:50:02 +0100
Subject: nv50: support vertex program textures

---
 src/gallium/drivers/nv50/nv50_context.h        | 12 ++--
 src/gallium/drivers/nv50/nv50_screen.c         | 17 +++--
 src/gallium/drivers/nv50/nv50_state.c          | 59 ++++++++++++-----
 src/gallium/drivers/nv50/nv50_state_validate.c | 52 ++++++++++-----
 src/gallium/drivers/nv50/nv50_tex.c            | 90 +++++++++++++++++---------
 5 files changed, 156 insertions(+), 74 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 79135f2f36..5578a5838f 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -126,7 +126,7 @@ struct nv50_state {
 	unsigned viewport_bypass;
 	struct nouveau_stateobj *tsc_upload;
 	struct nouveau_stateobj *tic_upload;
-	unsigned miptree_nr;
+	unsigned miptree_nr[PIPE_SHADER_TYPES];
 	struct nouveau_stateobj *vertprog;
 	struct nouveau_stateobj *fragprog;
 	struct nouveau_stateobj *programs;
@@ -162,10 +162,10 @@ struct nv50_context {
 	unsigned vtxbuf_nr;
 	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
 	unsigned vtxelt_nr;
-	struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
-	unsigned sampler_nr;
-	struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
-	unsigned miptree_nr;
+	struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+	unsigned sampler_nr[PIPE_SHADER_TYPES];
+	struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+	unsigned miptree_nr[PIPE_SHADER_TYPES];
 
 	uint16_t vbo_fifo;
 };
@@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
 			      struct nouveau_stateobj *so,
 			      struct nouveau_bo *bo, unsigned reloc,
-			      unsigned size);
+			      unsigned offset, unsigned size);
 
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index e1b2f11239..862be46a9e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -97,6 +97,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	switch (param) {
 	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
 		return 32;
+	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+		return 32;
+	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+		return 64;
 	case PIPE_CAP_NPOT_TEXTURES:
 		return 1;
 	case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,8 +126,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 	case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
 		return 1;
-	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-		return 0;
 	case PIPE_CAP_TGSI_CONT_SUPPORTED:
 		return 0;
 	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -315,6 +317,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, 0x1400, 1);
 	so_data  (so, 0xf);
 
+	/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+	so_method(so, screen->tesla, 0x13b4, 1);
+	so_data  (so, 0x54);
 	so_method(so, screen->tesla, 0x13bc, 1);
 	so_data  (so, 0x54);
 	/* origin is top left (set to 1 for bottom left) */
@@ -387,7 +392,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x00000131 | (NV50_CB_PFP << 12));
 
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+			     &screen->tic);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
@@ -398,9 +404,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x000007ff);
+	so_data  (so, PIPE_SHADER_TYPES * 32 - 1);
 
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+			     &screen->tsc);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 07318f2394..9c8c0c261e 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -215,41 +215,66 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	return (void *)sso;
 }
 
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+			unsigned nr, void **sampler)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
-	int i;
 
-	nv50->sampler_nr = nr;
-	for (i = 0; i < nv50->sampler_nr; i++)
-		nv50->sampler[i] = sampler[i];
+	memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
 
+	nv50->sampler_nr[type] = nr;
 	nv50->dirty |= NV50_NEW_SAMPLER;
 }
 
+static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+	nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+	nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
 static void
 nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 {
 	FREE(hwcso);
 }
 
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
-			 struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+			 unsigned nr, struct pipe_texture **pt)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
-	int i;
+	unsigned i;
 
 	for (i = 0; i < nr; i++)
-		pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
-	for (i = nr; i < nv50->miptree_nr; i++)
-		pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+		pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+	for (i = nr; i < nv50->miptree_nr[type]; i++)
+		pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
 
-	nv50->miptree_nr = nr;
+	nv50->miptree_nr[type] = nr;
 	nv50->dirty |= NV50_NEW_TEXTURE;
 }
 
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+			     unsigned nr, struct pipe_texture **pt)
+{
+	nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+			     unsigned nr, struct pipe_texture **pt)
+{
+	nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
 static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
 			     const struct pipe_rasterizer_state *cso)
@@ -648,9 +673,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
 	nv50->pipe.delete_blend_state = nv50_blend_state_delete;
 
 	nv50->pipe.create_sampler_state = nv50_sampler_state_create;
-	nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
 	nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
-	nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
+	nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+	nv50->pipe.bind_vertex_sampler_states   = nv50_vp_sampler_state_bind;
+	nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+	nv50->pipe.set_vertex_sampler_textures   = nv50_set_vp_sampler_textures;
 
 	nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
 	nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c871acaab8..871e8097b6 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -155,6 +155,30 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 	so_ref(NULL, &so);
 }
 
+static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+		       unsigned p)
+{
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+	unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+	if (!dw)
+		return;
+	nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+			  p * (32 * 8 * 4), dw * 4);
+
+	so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+	for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+		if (nv50->sampler[p][i])
+			so_datap(so, nv50->sampler[p][i]->tsc, 8);
+		else {
+			for (j = 0; j < 8; ++j) /* you get punished */
+				so_data(so, 0); /* ... for leaving holes */
+		}
+	}
+}
+
 static void
 nv50_state_emit(struct nv50_context *nv50)
 {
@@ -246,7 +270,6 @@ boolean
 nv50_state_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_stateobj *so;
 	unsigned i;
 
@@ -369,22 +392,16 @@ scissor_uptodate:
 viewport_uptodate:
 
 	if (nv50->dirty & NV50_NEW_SAMPLER) {
-		unsigned i;
+		unsigned nr = 0;
 
-		so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+		for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+			nr += nv50->sampler_nr[i];
 
-		nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
-				  nv50->sampler_nr * 8 * 4);
+		so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
 
-		for (i = 0; i < nv50->sampler_nr; i++) {
-			if (!nv50->sampler[i])
-				continue;
-			so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-			so_datap (so, nv50->sampler[i]->tsc, 8);
-		}
+		nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+		nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-		so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-		so_data  (so, 0);
 		so_method(so, tesla, 0x1334, 1); /* flush TSC */
 		so_data  (so, 0);
 
@@ -407,10 +424,13 @@ viewport_uptodate:
 
 void nv50_so_init_sifc(struct nv50_context *nv50,
 		       struct nouveau_stateobj *so,
-		       struct nouveau_bo *bo, unsigned reloc, unsigned size)
+		       struct nouveau_bo *bo, unsigned reloc,
+		       unsigned offset, unsigned size)
 {
 	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 
+	reloc |= NOUVEAU_BO_WR;
+
 	so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
 	so_data  (so, NV50_2D_DST_FORMAT_R8_UNORM);
 	so_data  (so, 1);
@@ -418,8 +438,8 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
 	so_data  (so, 262144);
 	so_data  (so, 65536);
 	so_data  (so, 1);
-	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
 	so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
 	so_data  (so, 0);
 	so_data  (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 417d367942..60b0ca7159 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -85,7 +85,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 
 static int
 nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
-		   struct nv50_miptree *mt, int unit)
+		   struct nv50_miptree *mt, int unit, unsigned p)
 {
 	unsigned i;
 	uint32_t mode;
@@ -96,7 +96,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	if (i == NV50_TEX_FORMAT_LIST_SIZE)
                 return 1;
 
-	if (nv50->sampler[unit]->normalized)
+	if (nv50->sampler[p][unit]->normalized)
 		mode = 0x50001000 | (1 << 31);
 	else {
 		mode = 0x50001000 | (7 << 14);
@@ -140,48 +140,78 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	return 0;
 }
 
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+		       unsigned p)
 {
+	static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+
 	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_stateobj *so;
-	unsigned i, unit, push;
-
-	push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
-	so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
+	unsigned unit, j, p_hw = p_remap[p];
 
 	nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
-			  nv50->miptree_nr * 8 * 4);
+			  p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
 
-	for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
-		struct nv50_miptree *mt = nv50->miptree[unit];
-
-		if (!mt)
-			continue;
+	for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+		struct nv50_miptree *mt = nv50->miptree[p][unit];
 
 		so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-		if (nv50_tex_construct(nv50, so, mt, unit)) {
-			NOUVEAU_ERR("failed tex validate\n");
-			so_ref(NULL, &so);
-			return;
+		if (mt) {
+			if (nv50_tex_construct(nv50, so, mt, unit, p))
+				return FALSE;
+			/* Set TEX insn $t src binding $unit in program type p
+			 * to TIC, TSC entry (32 * p + unit), mark valid (1).
+			 */
+			so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+			so_data  (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+		} else {
+			for (j = 0; j < 8; ++j)
+				so_data(so, 0);
+			so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+			so_data  (so, (unit << 1) | 0);
 		}
+	}
+
+	for (; unit < nv50->state.miptree_nr[p]; unit++) {
+		/* Make other bindings invalid. */
+		so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+		so_data  (so, (unit << 1) | 0);
+	}
+
+	nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+	return TRUE;
+}
 
-		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-		so_data  (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
-			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
-			  NV50TCL_SET_SAMPLER_TEX_VALID);
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	unsigned p, push, nrlc;
+
+	for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+		push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+		nrlc += nv50->miptree_nr[p];
 	}
+	push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+	nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+	so = so_new(push, nrlc);
+
+	if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+	    nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+		so_ref(NULL, &so);
 
-	for (; unit < nv50->state.miptree_nr; unit++) {
-		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-		so_data  (so,
-			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+		NOUVEAU_ERR("failed tex validate\n");
+		return;
 	}
 
 	/* not sure if the following really do what I think: */
-	so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-	so_data  (so, 0);
 	so_method(so, tesla, 0x1330, 1); /* flush TIC */
 	so_data  (so, 0);
 	so_method(so, tesla, 0x1338, 1); /* flush texture caches */
@@ -189,6 +219,4 @@ nv50_tex_validate(struct nv50_context *nv50)
 
 	so_ref(so, &nv50->state.tic_upload);
 	so_ref(NULL, &so);
-	nv50->state.miptree_nr = nv50->miptree_nr;
 }
-
-- 
cgit v1.2.3


From f7a97344924461d64bfa5bd1b6a2c1151b70cc7c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 2 Dec 2009 19:59:07 +0100
Subject: nv50: use copies of tgsi src nv50_regs

So we can use the 'mod' member without concern
if a source is used multiple times in 1 insn.
---
 src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index f0fe7e6168..6116056857 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -131,6 +131,9 @@ struct nv50_pc {
 	struct nv50_reg *r_brdc;
 	struct nv50_reg *r_dst[4];
 
+	struct nv50_reg reg_instances[16];
+	unsigned reg_instance_nr;
+
 	unsigned interp_mode[32];
 	/* perspective interpolation registers */
 	struct nv50_reg *iv_p;
@@ -150,6 +153,19 @@ struct nv50_pc {
 	boolean allow32;
 };
 
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+	struct nv50_reg *dup = NULL;
+	if (reg) {
+		assert(pc->reg_instance_nr < 16);
+		dup = &pc->reg_instances[pc->reg_instance_nr++];
+		*dup = *reg;
+		reg->mod = 0;
+	}
+	return dup;
+}
+
 static INLINE void
 ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
 {
@@ -898,7 +914,6 @@ static INLINE void
 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
 {
-	assert(src0 != src1);
 	src1->mod ^= NV50_MOD_NEG;
 	emit_add(pc, dst, src0, src1);
 	src1->mod ^= NV50_MOD_NEG;
@@ -967,7 +982,6 @@ static INLINE void
 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
 {
-	assert(src2 != src0 && src2 != src1);
 	src2->mod ^= NV50_MOD_NEG;
 	emit_mad(pc, dst, src0, src1, src2);
 	src2->mod ^= NV50_MOD_NEG;
@@ -1515,8 +1529,6 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
-	int s;
-
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
@@ -1526,29 +1538,14 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_ADD:
 	case TGSI_OPCODE_SUB:
 	case TGSI_OPCODE_MAD:
-		break;
+		return TRUE;
 	case TGSI_OPCODE_POW:
 		if (i == 1)
-			break;
+			return TRUE;
 		return FALSE;
 	default:
 		return FALSE;
 	}
-
-	/* Watch out for possible multiple uses of an nv50_reg, we
-	 * can't use nv50_reg::neg in these cases.
-	 */
-	for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
-		if (s == i)
-			continue;
-		if ((insn->Src[s].Register.Index ==
-		     insn->Src[i].Register.Index) &&
-		    (insn->Src[s].Register.File ==
-		     insn->Src[i].Register.File))
-			return FALSE;
-	}
-
-	return TRUE;
 }
 
 /* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1882,7 +1879,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
-				src[i][c] = tgsi_src(pc, c, fs, neg_supp);
+				src[i][c] = reg_instance(pc,
+					tgsi_src(pc, c, fs, neg_supp));
 	}
 
 	brdc = temp = pc->r_brdc;
@@ -2249,16 +2247,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!src[i][c])
 				continue;
-			src[i][c]->mod = 0;
-			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
-				FREE(src[i][c]);
-			else
 			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
 				FREE(src[i][c]); /* indirect constant */
 		}
 	}
 
 	kill_temp_temp(pc);
+	pc->reg_instance_nr = 0;
+
 	return TRUE;
 }
 
-- 
cgit v1.2.3


From 9f3644c42350fec2cda17e66548c517d9d00e47f Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 4 Dec 2009 23:16:32 +0100
Subject: nv50: plug memory leak in miptree creation/destruction

Keeping this dynamically allocated for texture arrays.
Since we don't use it to store zslice offsets anymore
it's either 1 or 6 integers (cube) ...
---
 src/gallium/drivers/nv50/nv50_miptree.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 40ee665999..795db5872d 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -130,6 +130,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 				  mt->level[0].tile_mode, tile_flags,
 				  &mt->base.bo);
 	if (ret) {
+		for (l = 0; l < pt->last_level; ++l)
+			FREE(mt->level[l].image_offset);
 		FREE(mt);
 		return NULL;
 	}
@@ -169,6 +171,10 @@ static void
 nv50_miptree_destroy(struct pipe_texture *pt)
 {
 	struct nv50_miptree *mt = nv50_miptree(pt);
+	unsigned l;
+
+	for (l = 0; l < pt->last_level; ++l)
+		FREE(mt->level[l].image_offset);
 
 	nouveau_bo_ref(NULL, &mt->base.bo);
 	FREE(mt);
-- 
cgit v1.2.3


From 6a689783b9f61fc12e35f7e613697a3f4b07766b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 7 Dec 2009 20:40:39 +0100
Subject: nv50: add src_mask case for IF opcode

---
 src/gallium/drivers/nv50/nv50_program.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 6116056857..8c82652913 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1573,6 +1573,8 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_RSQ:
 	case TGSI_OPCODE_SCS:
 		return 0x1;
+	case TGSI_OPCODE_IF:
+		return 0x1;
 	case TGSI_OPCODE_LIT:
 		return 0xb;
 	case TGSI_OPCODE_TEX:
-- 
cgit v1.2.3


From cc0ffaba7d1df234b3c62769ade9dee712117d2f Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 20:54:18 +0100
Subject: nv50: fix depth comparison func TSC bits

Unfortunately it seems that if depth comparison is
active and we read a 2D texture, i.e. provide only
2 inputs, the second is used for comparison ...
---
 src/gallium/drivers/nv50/nv50_state.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 9c8c0c261e..88aef52d08 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -196,8 +196,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	}
 
 	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-		tsc[0] |= (1 << 8);
-		tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+		/* XXX: must be deactivated for non-shadow textures */
+		tsc[0] |= (1 << 9);
+		tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
 	}
 
 	limit = CLAMP(cso->lod_bias, -16.0, 15.0);
-- 
cgit v1.2.3


From b0036f391a1862c15c4e33d221314926dba3213b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 9 Dec 2009 23:45:52 +0100
Subject: nv50: add S8Z24 depth texture format too

---
 src/gallium/drivers/nv50/nv50_screen.c  | 1 +
 src/gallium/drivers/nv50/nv50_tex.c     | 1 +
 src/gallium/drivers/nv50/nv50_texture.h | 1 +
 3 files changed, 3 insertions(+)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 862be46a9e..9e05745349 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_DXT3_RGBA:
 		case PIPE_FORMAT_DXT5_RGBA:
 		case PIPE_FORMAT_Z24S8_UNORM:
+		case PIPE_FORMAT_S8Z24_UNORM:
 		case PIPE_FORMAT_Z32_FLOAT:
 		case PIPE_FORMAT_R16G16B16A16_SNORM:
 		case PIPE_FORMAT_R16G16B16A16_UNORM:
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 60b0ca7159..120aa6f362 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -68,6 +68,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 	_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
 
 	_MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+	_MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
 
 	_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
 	_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index d531e61132..b870302019 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -82,6 +82,7 @@
 #define NV50TIC_0_0_FMT_RGTC1                                     0x00000027
 #define NV50TIC_0_0_FMT_RGTC2                                     0x00000028
 #define NV50TIC_0_0_FMT_24_8                                      0x00000029
+#define NV50TIC_0_0_FMT_8_24                                      0x0000002a
 #define NV50TIC_0_0_FMT_32_DEPTH                                  0x0000002f
 #define NV50TIC_0_0_FMT_32_8                                      0x00000030
 
-- 
cgit v1.2.3


From d80778218d512f51e1b52e2fe652021ecefd724a Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 00:36:03 +0100
Subject: nv50: support TXB and TXL

... and don't set the 'live' flag for TEX anymore, we'd
have to know if results affect the inputs for another TEX,
and I'm not going to do that kind of analysis now.
---
 src/gallium/drivers/nv50/nv50_program.c | 162 +++++++++++++++++++++-----------
 src/gallium/drivers/nv50/nv50_screen.c  |   3 +-
 2 files changed, 108 insertions(+), 57 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 8c82652913..ddb049f391 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1273,7 +1273,7 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 
 static void
 load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
-		     struct nv50_reg **src, boolean proj)
+		     struct nv50_reg **src, unsigned arg, boolean proj)
 {
 	int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
 
@@ -1290,6 +1290,10 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 
 	if (proj && 0 /* looks more correct without this */)
 		emit_mul(pc, t[2], t[2], src[3]);
+	else
+	if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+		emit_mov(pc, t[3], src[3]);
+
 	emit_flop(pc, 0, t[2], t[2]);
 
 	emit_mul(pc, t[0], src[0], t[2]);
@@ -1298,85 +1302,115 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 }
 
 static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
-	 struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+		     struct nv50_reg **src, unsigned dim, unsigned arg)
 {
-	struct nv50_reg *t[4];
-	struct nv50_program_exec *e;
+	unsigned c, mode;
+
+	if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+		mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
+
+		t[3]->rhw = src[3]->rhw;
+		emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+		emit_flop(pc, 0, t[3], t[3]);
 
-	unsigned c, mode, dim;
+		for (c = 0; c < dim; ++c) {
+			t[c]->rhw = src[c]->rhw;
+			emit_interp(pc, t[c], t[3], mode);
+		}
+		if (arg != dim) { /* depth reference value */
+			t[dim]->rhw = src[2]->rhw;
+			emit_interp(pc, t[dim], t[3], mode);
+		}
+	} else {
+		/* XXX: for some reason the blob sometimes uses MAD
+		 * (mad f32 $rX $rY $rZ neg $r63)
+		 */
+		emit_flop(pc, 0, t[3], src[3]);
+		for (c = 0; c < dim; ++c)
+			emit_mul(pc, t[c], src[c], t[3]);
+		if (arg != dim) /* depth reference value */
+			emit_mul(pc, t[dim], src[2], t[3]);
+	}
+}
 
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
 	switch (type) {
 	case TGSI_TEXTURE_1D:
-		dim = 1;
+		*arg = *dim = 1;
+		break;
+	case TGSI_TEXTURE_SHADOW1D:
+		*dim = 1;
+		*arg = 2;
 		break;
 	case TGSI_TEXTURE_UNKNOWN:
 	case TGSI_TEXTURE_2D:
-	case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
 	case TGSI_TEXTURE_RECT:
-		dim = 2;
+		*arg = *dim = 2;
+		break;
+	case TGSI_TEXTURE_SHADOW2D:
+	case TGSI_TEXTURE_SHADOWRECT:
+		*dim = 2;
+		*arg = 3;
 		break;
 	case TGSI_TEXTURE_3D:
 	case TGSI_TEXTURE_CUBE:
-	case TGSI_TEXTURE_SHADOW2D:
-	case TGSI_TEXTURE_SHADOWRECT: /* XXX */
-		dim = 3;
+		*dim = *arg = 3;
 		break;
 	default:
 		assert(0);
 		break;
 	}
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+	 struct nv50_reg **src, unsigned unit, unsigned type,
+	 boolean proj, int bias_lod)
+{
+	struct nv50_reg *t[4];
+	struct nv50_program_exec *e;
+	unsigned c, dim, arg;
 
-	/* some cards need t[0]'s hw index to be a multiple of 4 */
+	/* t[i] must be within a single 128 bit super-reg */
 	alloc_temp4(pc, t, 0);
 
+	e = exec(pc);
+	e->inst[0] = 0xf0000000;
+	set_long(pc, e);
+	set_dst(pc, t[0], e);
+
+	/* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+	e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+	/* live flag (don't set if TEX results affect input to another TEX): */
+	/* e->inst[0] |= 0x00000004; */
+
+	get_tex_dim(type, &dim, &arg);
+
 	if (type == TGSI_TEXTURE_CUBE) {
-		load_cube_tex_coords(pc, t, src, proj);
+		e->inst[0] |= 0x08000000;
+		load_cube_tex_coords(pc, t, src, arg, proj);
 	} else
-	if (proj) {
-		if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
-			mode = pc->interp_mode[src[0]->index];
-
-			t[3]->rhw = src[3]->rhw;
-			emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
-			emit_flop(pc, 0, t[3], t[3]);
-
-			for (c = 0; c < dim; c++) {
-				t[c]->rhw = src[c]->rhw;
-				emit_interp(pc, t[c], t[3],
-					    (mode | INTERP_PERSPECTIVE));
-			}
-		} else {
-			emit_flop(pc, 0, t[3], src[3]);
-			for (c = 0; c < dim; c++)
-				emit_mul(pc, t[c], src[c], t[3]);
-
-			/* XXX: for some reason the blob sometimes uses MAD:
-			 * emit_mad(pc, t[c], src[0][c], t[3], t[3])
-			 * pc->p->exec_tail->inst[1] |= 0x080fc000;
-			 */
-		}
-	} else {
+	if (proj)
+		load_proj_tex_coords(pc, t, src, dim, arg);
+	else {
 		for (c = 0; c < dim; c++)
 			emit_mov(pc, t[c], src[c]);
+		if (arg != dim) /* depth reference value (always src.z here) */
+			emit_mov(pc, t[dim], src[2]);
 	}
 
-	e = exec(pc);
-	set_long(pc, e);
-	e->inst[0] |= 0xf0000000;
-	e->inst[1] |= 0x00000004;
-	set_dst(pc, t[0], e);
-	e->inst[0] |= (unit << 9);
-
-	if (dim == 2)
-		e->inst[0] |= 0x00400000;
-	else
-	if (dim == 3) {
-		e->inst[0] |= 0x00800000;
-		if (type == TGSI_TEXTURE_CUBE)
-			e->inst[0] |= 0x08000000;
+	if (bias_lod) {
+		assert(arg < 4);
+		emit_mov(pc, t[arg++], src[3]);
+		e->inst[1] |= (bias_lod < 0) ? 0x20000000 : 0x40000000;
 	}
 
+	e->inst[0] |= (arg - 1) << 22;
+
 	e->inst[0] |= (mask & 0x3) << 25;
 	e->inst[1] |= (mask & 0xc) << 12;
 
@@ -1578,6 +1612,8 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_LIT:
 		return 0xb;
 	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TXB:
+	case TGSI_OPCODE_TXL:
 	case TGSI_OPCODE_TXP:
 	{
 		const struct tgsi_instruction_texture *tex;
@@ -1586,13 +1622,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 		tex = &insn->Texture;
 
 		mask = 0x7;
-		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
-			mask |= 0x8;
+		if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+		    insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+			mask |= 0x8; /* bias, lod or proj */
 
 		switch (tex->Texture) {
 		case TGSI_TEXTURE_1D:
 			mask &= 0x9;
 			break;
+		case TGSI_TEXTURE_SHADOW1D:
+			mask &= 0x5;
+			break;
 		case TGSI_TEXTURE_2D:
 			mask &= 0xb;
 			break;
@@ -1784,6 +1824,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
 	case TGSI_OPCODE_LIT:
 	case TGSI_OPCODE_SCS:
 	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TXB:
+	case TGSI_OPCODE_TXL:
 	case TGSI_OPCODE_TXP:
 		/* these take care of dangerous swizzles themselves */
 		return 0x0;
@@ -2187,11 +2229,19 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		break;
 	case TGSI_OPCODE_TEX:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->Texture.Texture, FALSE);
+			 inst->Texture.Texture, FALSE, 0);
+		break;
+	case TGSI_OPCODE_TXB:
+		emit_tex(pc, dst, mask, src[0], unit,
+			 inst->Texture.Texture, FALSE, -1);
+		break;
+	case TGSI_OPCODE_TXL:
+		emit_tex(pc, dst, mask, src[0], unit,
+			 inst->Texture.Texture, FALSE, 1);
 		break;
 	case TGSI_OPCODE_TXP:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->Texture.Texture, TRUE);
+			 inst->Texture.Texture, TRUE, 0);
 		break;
 	case TGSI_OPCODE_TRUNC:
 		for (c = 0; c < 4; c++) {
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 9e05745349..d443ca3ad0 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -419,7 +419,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x00000000);
+	so_data  (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
 
 
 	/* Vertex array limits - max them out */
@@ -433,6 +433,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_data  (so, fui(0.0));
 	so_data  (so, fui(1.0));
 
+	/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
 	so_method(so, screen->tesla, 0x1234, 1);
 	so_data  (so, 1);
 
-- 
cgit v1.2.3


From 44d8c9add2f095fc365ede751253d9fb7fc5c6e1 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 13:44:49 +0100
Subject: nv50: add craziness for non-constant TXB and TXL

If lod or bias can be non-constant across a quad of fragments,
we need to execute TEX separately for each value.
Don't ask why.
---
 src/gallium/drivers/nv50/nv50_program.c | 248 ++++++++++++++++++++++++++------
 1 file changed, 204 insertions(+), 44 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index ddb049f391..2e4279ff83 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -660,7 +660,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 	if (src->type == P_IMMD || src->type == P_CONST) {
 		set_long(pc, e);
 		set_data(pc, src, 0x7f, 9, e);
-		e->inst[1] |= 0x20000000; /* src0 const? */
+		e->inst[1] |= 0x20000000; /* mov from c[] */
 	} else {
 		if (src->type == P_ATTR) {
 			set_long(pc, e);
@@ -675,9 +675,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 
 	if (is_long(e) && !is_immd(e)) {
 		e->inst[1] |= 0x04000000; /* 32-bit */
-		e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+		e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
 		if (!(e->inst[1] & 0x20000000))
-			e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+			e->inst[1] |= 0x00030000; /* lane mask 2:3 */
 	} else
 		e->inst[0] |= 0x00008000;
 
@@ -692,6 +692,17 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
 	FREE(imm);
 }
 
+static void
+emit_nop(struct nv50_pc *pc)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0xf0000000;
+	set_long(pc, e);
+	e->inst[1] = 0xe0000000;
+	emit(pc, e);
+}
+
 static boolean
 check_swap_src_0_1(struct nv50_pc *pc,
 		   struct nv50_reg **s0, struct nv50_reg **s1)
@@ -810,6 +821,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	e->inst[1] |= ((src->hw & 127) << 14);
 }
 
+static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(dst->type == P_TEMP);
+	e->inst[1] = 0x20000000 | (pred << 12);
+	set_long(pc, e);
+	set_dst(pc, dst, e);
+
+	emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x000001fc;
+	e->inst[1] = 0xa0000008;
+	set_long(pc, e);
+	set_pred_wr(pc, 1, pred, e);
+	set_src_0_restricted(pc, src, e);
+
+	emit(pc, e);
+}
+
 static void
 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
@@ -1271,6 +1309,65 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 	emit(pc, e);
 }
 
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+	    struct nv50_program_exec **join)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	if (join) {
+		set_long(pc, e);
+		e->inst[0] |= 0xa0000002;
+		emit(pc, e);
+		*join = e;
+		e = exec(pc);
+	}
+
+	set_long(pc, e);
+	e->inst[0] |= 0x10000002;
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
+	emit(pc, e);
+	return pc->p->exec_tail;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+static void
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+	    struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       e->inst[0] = 0xc0000000;
+       e->inst[1] = 0x80000000;
+       set_long(pc, e);
+       e->inst[0] |= lane_src0 << 16;
+       set_src_0(pc, src0, e);
+       set_src_2(pc, src1, e);
+
+       if (wp >= 0)
+	       set_pred_wr(pc, 1, wp, e);
+
+       if (dst)
+	       set_dst(pc, dst, e);
+       else {
+	       e->inst[0] |= 0x000001fc;
+	       e->inst[1] |= 0x00000008;
+       }
+
+       e->inst[0] |= (qop & 3) << 20;
+       e->inst[1] |= (qop >> 2) << 22;
+
+       emit(pc, e);
+}
+
 static void
 load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 		     struct nv50_reg **src, unsigned arg, boolean proj)
@@ -1365,6 +1462,94 @@ get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
 	}
 }
 
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+		     struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+	struct nv50_program_exec *join_at;
+	unsigned i, target = pc->p->exec_size + 7 * 2;
+
+	/* Subtract lod of each pixel from lod of top left pixel, jump
+	 * texlod insn if result is 0, then repeat for 2 other pixels.
+	 */
+	emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+	emit_branch(pc, 0, 2, &join_at)->param.index = target;
+
+	for (i = 1; i < 4; ++i) {
+		emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+		emit_branch(pc, 0, 2, NULL)->param.index = target;
+	}
+
+	emit_mov(pc, tlod, src); /* target */
+	emit(pc, tex); /* texlod */
+
+	join_at->param.index = target + 2 * 2;
+	emit_nop(pc);
+	pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+		      struct nv50_program_exec *tex)
+{
+	struct nv50_program_exec *e;
+	struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+	int r_pred = 0;
+	unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+	pc->allow32 = FALSE;
+	ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+	/* Subtract bias value of thread i from bias values of each thread,
+	 * store result in r_pred, and set bit i in r_bits if result was 0.
+	 */
+	assert(arg < 4);
+	for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+		emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+		emit_mov(pc, r_bits, &imm_1248);
+		set_pred(pc, 2, r_pred, pc->p->exec_tail);
+	}
+	emit_mov_to_pred(pc, r_pred, r_bits);
+
+	/* The lanes of a quad are now grouped by the bit in r_pred they have
+	 * set. Put the input values for TEX into a new register set for each
+	 * group and execute TEX only for a specific group.
+	 * We cannot use the same register set for each group because we need
+	 * the derivatives, which are implicitly calculated, to be correct.
+	 */
+	for (i = 1; i < 4; ++i) {
+		alloc_temp4(pc, t123[i], 0);
+
+		for (c = 0; c <= arg; ++c)
+			emit_mov(pc, t123[i][c], t[c]);
+
+		*(e = exec(pc)) = *(tex);
+		e->inst[0] &= ~0x01fc;
+		set_dst(pc, t123[i][0], e);
+		set_pred(pc, cc[i], r_pred, e);
+		emit(pc, e);
+	}
+	/* finally TEX on the original regs (where we kept the input) */
+	set_pred(pc, cc[0], r_pred, tex);
+	emit(pc, tex);
+
+	/* put the 3 * n other results into regs for lane 0 */
+	n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+	for (i = 1; i < 4; ++i) {
+		for (c = 0; c < n; ++c) {
+			emit_mov(pc, t[c], t123[i][c]);
+			set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
+		}
+		free_temp4(pc, t123[i]);
+	}
+
+	emit_nop(pc);
+	free_temp(pc, r_bits);
+}
+
 static void
 emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	 struct nv50_reg **src, unsigned unit, unsigned type,
@@ -1403,18 +1588,25 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 			emit_mov(pc, t[dim], src[2]);
 	}
 
-	if (bias_lod) {
-		assert(arg < 4);
-		emit_mov(pc, t[arg++], src[3]);
-		e->inst[1] |= (bias_lod < 0) ? 0x20000000 : 0x40000000;
-	}
-
-	e->inst[0] |= (arg - 1) << 22;
-
 	e->inst[0] |= (mask & 0x3) << 25;
 	e->inst[1] |= (mask & 0xc) << 12;
 
-	emit(pc, e);
+	if (!bias_lod) {
+		e->inst[0] |= (arg - 1) << 22;
+		emit(pc, e);
+	} else
+	if (bias_lod < 0) {
+		e->inst[0] |= arg << 22;
+		e->inst[1] |= 0x20000000; /* texbias */
+		emit_mov(pc, t[arg], src[3]);
+		emit_texbias_sequence(pc, t, arg, e);
+	} else {
+		e->inst[0] |= arg << 22;
+		e->inst[1] |= 0x40000000; /* texlod */
+		emit_mov(pc, t[arg], src[3]);
+		emit_texlod_sequence(pc, t[arg], src[3], e);
+	}
+
 #if 1
 	c = 0;
 	if (mask & 1) emit_mov(pc, dst[0], t[c++]);
@@ -1436,38 +1628,6 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 #endif
 }
 
-static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
-	    struct nv50_program_exec **join)
-{
-	struct nv50_program_exec *e = exec(pc);
-
-	if (join) {
-		set_long(pc, e);
-		e->inst[0] |= 0xa0000002;
-		emit(pc, e);
-		*join = e;
-		e = exec(pc);
-	}
-
-	set_long(pc, e);
-	e->inst[0] |= 0x10000002;
-	if (pred >= 0)
-		set_pred(pc, cc, pred, e);
-	emit(pc, e);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
-	struct nv50_program_exec *e = exec(pc);
-
-	e->inst[0] = 0xf0000000;
-	set_long(pc, e);
-	e->inst[1] = 0xe0000000;
-	emit(pc, e);
-}
-
 static void
 emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-- 
cgit v1.2.3


From 7b5a6fa0c87a821835161494987994a781401303 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 14:14:41 +0100
Subject: nv50: use m2mf z pos instead of calculating offset manually

---
 src/gallium/drivers/nv50/nv50_transfer.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4705f96f57..6a98d806d0 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,6 +16,7 @@ struct nv50_transfer {
 	int level_depth;
 	int level_x;
 	int level_y;
+	int level_z;
 	unsigned nblocksx;
 	unsigned nblocksy;
 };
@@ -24,10 +25,10 @@ static void
 nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			struct nouveau_bo *src_bo, unsigned src_offset,
 			int src_pitch, unsigned src_tile_mode,
-			int sx, int sy, int sw, int sh, int sd,
+			int sx, int sy, int sz, int sw, int sh, int sd,
 			struct nouveau_bo *dst_bo, unsigned dst_offset,
 			int dst_pitch, unsigned dst_tile_mode,
-			int dx, int dy, int dw, int dh, int dd,
+			int dx, int dy, int dz, int dw, int dh, int dd,
 			int cpp, int width, int height,
 			unsigned src_reloc, unsigned dst_reloc)
 {
@@ -56,7 +57,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, sw * cpp);
 		OUT_RING  (chan, sh);
 		OUT_RING  (chan, sd);
-		OUT_RING  (chan, 0);
+		OUT_RING  (chan, sz); /* copying only 1 zslice per call */
 	}
 
 	if (!dst_bo->tile_flags) {
@@ -75,7 +76,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, dw * cpp);
 		OUT_RING  (chan, dh);
 		OUT_RING  (chan, dd);
-		OUT_RING  (chan, 0);
+		OUT_RING  (chan, dz); /* copying only 1 zslice per call */
 	}
 
 	while (height) {
@@ -166,6 +167,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
+	tx->level_z = zslice;
 	tx->level_x = pf_get_nblocksx(pt->format, x);
 	tx->level_y = pf_get_nblocksy(pt->format, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -175,23 +177,18 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
-	if (pt->target == PIPE_TEXTURE_3D)
-		tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
-						      lvl->pitch,
-						      tx->nblocksy);
-
 	if (usage & PIPE_TRANSFER_READ) {
 		nx = pf_get_nblocksx(pt->format, tx->base.width);
 		ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
-					x, y,
+					x, y, zslice,
 					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
-					0, 0,
+					0, 0, 0,
 					tx->nblocksx, tx->nblocksy, 1,
 					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -216,11 +213,11 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 
 		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
-					0, 0,
+					0, 0, 0,
 					tx->nblocksx, tx->nblocksy, 1,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
-					tx->level_x, tx->level_y,
+					tx->level_x, tx->level_y, tx->level_z,
 					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					pf_get_blocksize(pt->format), nx, ny,
-- 
cgit v1.2.3


From 079b670111fe41cabf700d089f489d4b116af5eb Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 14:36:54 +0100
Subject: nv50: add proper zslice offset in miptree_surface

---
 src/gallium/drivers/nv50/nv50_miptree.c  | 27 +++++++++++++++++++++------
 src/gallium/drivers/nv50/nv50_transfer.c | 14 --------------
 2 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 795db5872d..9e083b662d 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -55,6 +55,20 @@ get_tile_mode(unsigned ny, unsigned d)
 	return tile_mode | 0x10;
 }
 
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+	unsigned tile_h = get_tile_height(tile_mode);
+	unsigned tile_d = get_tile_depth(tile_mode);
+
+	/* pitch_2d == to next slice within this volume-tile */
+	/* pitch_3d == size (in bytes) of a volume-tile */
+	unsigned pitch_2d = tile_h * 64;
+	unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+	return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
 static struct pipe_texture *
 nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 {
@@ -188,15 +202,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	struct nv50_miptree *mt = nv50_miptree(pt);
 	struct nv50_miptree_level *lvl = &mt->level[level];
 	struct pipe_surface *ps;
-	int img;
+	unsigned img = 0;
 
 	if (pt->target == PIPE_TEXTURE_CUBE)
 		img = face;
-	else
-	if (pt->target == PIPE_TEXTURE_3D)
-		img = zslice;
-	else
-		img = 0;
 
 	ps = CALLOC_STRUCT(pipe_surface);
 	if (!ps)
@@ -212,6 +221,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	ps->zslice = zslice;
 	ps->offset = lvl->image_offset[img];
 
+	if (pt->target == PIPE_TEXTURE_3D) {
+		unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+		ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+						lvl->pitch, nb_h);
+	}
+
 	return ps;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 6a98d806d0..104d29a003 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -119,20 +119,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 	}
 }
 
-static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
-{
-	unsigned tile_h = get_tile_height(tile_mode);
-	unsigned tile_d = get_tile_depth(tile_mode);
-
-	/* pitch_2d == to next slice within this volume-tile */
-	/* pitch_3d == to next slice in next 2D array of blocks */
-	unsigned pitch_2d = tile_h * 64;
-	unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
-
-	return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
-}
-
 static struct pipe_transfer *
 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		  unsigned face, unsigned level, unsigned zslice,
-- 
cgit v1.2.3


From d4d880199ead954e79cad141f7a29f7dd17fe7fc Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sun, 13 Dec 2009 20:09:33 +0100
Subject: nouveau: nv50: Add missing ctor_immd_4u32 function

---
 src/gallium/drivers/nv50/nv50_program.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2e4279ff83..feb3d42286 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -358,7 +358,7 @@ static void
 kill_temp_temp(struct nv50_pc *pc)
 {
 	int i;
-	
+
 	for (i = 0; i < pc->temp_temp_nr; i++)
 		free_temp(pc, pc->temp_temp[i]);
 	pc->temp_temp_nr = 0;
@@ -373,7 +373,20 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
 	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
 	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
 	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-	
+
+	return pc->immd_nr++;
+}
+
+static int
+ctor_immd_4u32(struct nv50_pc *pc, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+{
+	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(uint32_t)),
+			       (pc->immd_nr + 1) * 4 * sizeof(uint32_t));
+	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
+	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
+	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
+	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
+
 	return pc->immd_nr++;
 }
 
-- 
cgit v1.2.3


From 1778ddaf74aba72df167769bf42150810aac91a3 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 14 Dec 2009 11:10:16 +0100
Subject: nv50: store immediates as uint32

Sometimes we want non-float immediates, hacking them into
floats isn't nice.

Sorry, this should have already been committed before.
---
 src/gallium/drivers/nv50/nv50_program.c | 62 ++++++++++++++++-----------------
 src/gallium/drivers/nv50/nv50_program.h |  2 +-
 2 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index feb3d42286..fe8ccd0349 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -119,7 +119,7 @@ struct nv50_pc {
 	struct nv50_reg *param;
 	int param_nr;
 	struct nv50_reg *immd;
-	float *immd_buf;
+	uint32_t *immd_buf;
 	int immd_nr;
 	struct nv50_reg **addr;
 	int addr_nr;
@@ -365,10 +365,13 @@ kill_temp_temp(struct nv50_pc *pc)
 }
 
 static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+	       uint32_t x, uint32_t y, uint32_t z, uint32_t w)
 {
-	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
-			       (pc->immd_nr + 1) * 4 * sizeof(float));
+	unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+	pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
 	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
 	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
 	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
@@ -377,17 +380,10 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
 	return pc->immd_nr++;
 }
 
-static int
-ctor_immd_4u32(struct nv50_pc *pc, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
 {
-	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(uint32_t)),
-			       (pc->immd_nr + 1) * 4 * sizeof(uint32_t));
-	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
-	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
-	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
-	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
-	return pc->immd_nr++;
+	return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
 }
 
 static struct nv50_reg *
@@ -397,11 +393,11 @@ alloc_immd(struct nv50_pc *pc, float f)
 	unsigned hw;
 
 	for (hw = 0; hw < pc->immd_nr * 4; hw++)
-		if (pc->immd_buf[hw] == f)
+		if (pc->immd_buf[hw] == fui(f))
 			break;
 
 	if (hw == pc->immd_nr * 4)
-		hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+		hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
 
 	ctor_reg(r, P_IMMD, -1, hw);
 	return r;
@@ -493,22 +489,24 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
-	unsigned val;
-	float f = pc->immd_buf[imm->hw];
+	union {
+		float f;
+		uint32_t ui;
+	} u;
+	u.ui = pc->immd_buf[imm->hw];
 
-	if (imm->mod & NV50_MOD_ABS)
-		f = fabsf(f);
-	val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
+	u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
+	u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
 
 	set_long(pc, e);
-	/*XXX: can't be predicated - bits overlap.. catch cases where both
-	 *     are required and avoid them. */
+	/* XXX: can't be predicated - bits overlap; cases where both
+	 * are required should be avoided by using pc->allow32 */
 	set_pred(pc, 0, 0, e);
 	set_pred_wr(pc, 0, 0, e);
 
 	e->inst[1] |= 0x00000002 | 0x00000001;
-	e->inst[0] |= (val & 0x3f) << 16;
-	e->inst[1] |= (val >> 6) << 2;
+	e->inst[0] |= (u.ui & 0x3f) << 16;
+	e->inst[1] |= (u.ui >> 6) << 2;
 }
 
 static INLINE void
@@ -2762,10 +2760,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			const struct tgsi_full_immediate *imm =
 				&tp.FullToken.FullImmediate;
 
-			ctor_immd(pc, imm->u[0].Float,
-				      imm->u[1].Float,
-				      imm->u[2].Float,
-				      imm->u[3].Float);
+			ctor_immd_4f32(pc, imm->u[0].Float,
+				       imm->u[1].Float,
+				       imm->u[2].Float,
+				       imm->u[3].Float);
 		}
 			break;
 		case TGSI_TOKEN_TYPE_DECLARATION:
@@ -3245,7 +3243,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
 }
 
 static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
 			unsigned start, unsigned count, unsigned cbuf)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -3293,8 +3291,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 
 	if (p->param_nr) {
 		unsigned cb;
-		float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
-					     PIPE_BUFFER_USAGE_CPU_READ);
+		uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+						PIPE_BUFFER_USAGE_CPU_READ);
 
 		if (p->type == PIPE_SHADER_VERTEX)
 			cb = NV50_CB_PVP;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 255c7c737e..4a90c372ce 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -37,7 +37,7 @@ struct nv50_program {
 
 	struct nouveau_bo *bo;
 
-	float *immd;
+	uint32_t *immd;
 	unsigned immd_nr;
 	unsigned param_nr;
 
-- 
cgit v1.2.3


From 2677f199a547f6e44d964b8c34dd7f60d9523ab2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 14 Dec 2009 18:39:13 +0100
Subject: nv50: be more cautious about using reg_instance

Trying to free part of nv50_pc->reg_instances[] for an
nv50_reg representing an indirect constant resulted in
a segmentation fault.
---
 src/gallium/drivers/nv50/nv50_program.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index fe8ccd0349..e496cf4cad 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -156,14 +156,15 @@ struct nv50_pc {
 static INLINE struct nv50_reg *
 reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
 {
-	struct nv50_reg *dup = NULL;
+	struct nv50_reg *ri;
+
+	assert(pc->reg_instance_nr < 16);
+	ri = &pc->reg_instances[pc->reg_instance_nr++];
 	if (reg) {
-		assert(pc->reg_instance_nr < 16);
-		dup = &pc->reg_instances[pc->reg_instance_nr++];
-		*dup = *reg;
+		*ri = *reg;
 		reg->mod = 0;
 	}
-	return dup;
+	return ri;
 }
 
 static INLINE void
@@ -1886,7 +1887,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			/* Indicate indirection by setting r->acc < 0 and
 			 * use the index field to select the address reg.
 			 */
-			r = MALLOC_STRUCT(nv50_reg);
+			r = reg_instance(pc, NULL);
 			swz = tgsi_util_get_src_register_swizzle(
 						 &src->Indirect, 0);
 			ctor_reg(r, P_CONST,
@@ -1940,6 +1941,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		break;
 	}
 
+	if (r && r->acc >= 0 && r != temp)
+		return reg_instance(pc, r);
 	return r;
 }
 
@@ -2094,8 +2097,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
-				src[i][c] = reg_instance(pc,
-					tgsi_src(pc, c, fs, neg_supp));
+				src[i][c] = tgsi_src(pc, c, fs, neg_supp);
 	}
 
 	brdc = temp = pc->r_brdc;
@@ -2466,15 +2468,6 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 	}
 
-	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		for (c = 0; c < 4; c++) {
-			if (!src[i][c])
-				continue;
-			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
-				FREE(src[i][c]); /* indirect constant */
-		}
-	}
-
 	kill_temp_temp(pc);
 	pc->reg_instance_nr = 0;
 
-- 
cgit v1.2.3


From 41b52aa3362665e08bdc2f75cc9bfdc4debc6eb0 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Fri, 4 Dec 2009 22:58:22 +0100
Subject: nouveau: avoid running out of relocs

- Added flush notify functions for NV30 and NV40.
- Flushing mid frame will call flush notify, which will resubmit all
relocs.
- We don't try to recover from reloc failure yet.
---
 src/gallium/drivers/nouveau/nouveau_stateobj.h | 49 ++++++++++++++++++++------
 src/gallium/drivers/nv04/nv04_surface_2d.c     |  9 +++--
 src/gallium/drivers/nv30/nv30_context.c        |  3 ++
 src/gallium/drivers/nv30/nv30_context.h        |  1 +
 src/gallium/drivers/nv30/nv30_state_emit.c     | 10 +++++-
 src/gallium/drivers/nv40/nv40_context.c        |  3 ++
 src/gallium/drivers/nv40/nv40_context.h        |  1 +
 src/gallium/drivers/nv40/nv40_state_emit.c     | 10 +++++-
 src/gallium/drivers/nv50/nv50_query.c          |  2 +-
 src/gallium/drivers/nv50/nv50_surface.c        |  2 ++
 src/gallium/drivers/nv50/nv50_transfer.c       |  4 +--
 11 files changed, 76 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers/nv50')

diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 62990f9b6a..9aee9e4956 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -112,20 +112,30 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
 	struct nouveau_pushbuf *pb = chan->pushbuf;
 	unsigned nr, i;
+	int ret = 0;
 
 	nr = so->cur - so->push;
-	if (pb->remaining < nr)
-		nouveau_pushbuf_flush(chan, nr);
+	/* This will flush if we need space.
+	 * We don't actually need the marker.
+	 */
+	if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
+		debug_printf("so_emit failed marker emit with error %d\n", ret);
+		return;
+	}
 	pb->remaining -= nr;
 
 	memcpy(pb->cur, so->push, nr * 4);
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
 					   r->bo, r->data, 0, r->flags,
-					   r->vor, r->tor);
+					   r->vor, r->tor))) {
+			debug_printf("so_emit failed reloc with error %d\n", ret);
+			goto out;
+		}
 	}
+out:
 	pb->cur += nr;
 }
 
@@ -134,26 +144,45 @@ so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
 	struct nouveau_pushbuf *pb = chan->pushbuf;
 	unsigned i;
+	int ret = 0;
 
 	if (!so)
 		return;
 
 	i = so->cur_reloc << 1;
-	if (pb->remaining < i)
-		nouveau_pushbuf_flush(chan, i);
+	/* This will flush if we need space.
+	 * We don't actually need the marker.
+	 */
+	if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
+		debug_printf("so_emit_reloc_markers failed marker emit with" \
+			"error %d\n", ret);
+		return;
+	}
 	pb->remaining -= i;
 
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0,
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+					   r->packet, 0,
 					   (r->flags & (NOUVEAU_BO_VRAM |
 							NOUVEAU_BO_GART |
 							NOUVEAU_BO_RDWR)) |
-					   NOUVEAU_BO_DUMMY, 0, 0);
-		nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0,
+					   NOUVEAU_BO_DUMMY, 0, 0))) {
+			debug_printf("so_emit_reloc_markers failed reloc" \
+						"with error %d\n", ret);
+			pb->remaining += ((so->cur_reloc - i) << 1);
+			return;
+		}
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+					   r->data, 0,
 					   r->flags | NOUVEAU_BO_DUMMY,
-					   r->vor, r->tor);
+					   r->vor, r->tor))) {
+			debug_printf("so_emit_reloc_markers failed reloc" \
+						"with error %d\n", ret);
+			pb->remaining += ((so->cur_reloc - i) << 1) - 1;
+			return;
+		}
 	}
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 932893eef5..3020806c5d 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -133,6 +133,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	assert(sub_w == w || util_is_pot(sub_w));
 	assert(sub_h == h || util_is_pot(sub_h));
 
+	MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+			 ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
 	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
 	OUT_RELOCo(chan, dst_bo,
 	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -202,7 +205,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 	unsigned src_offset = src->offset + sy * src_pitch +
 	                      sx * pf_get_blocksize(src->texture->format);
 
-	WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+	MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
 	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
 	OUT_RELOCo(chan, src_bo,
 		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -250,7 +253,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
 	if (format < 0)
 		return 1;
 
-	WAIT_RING (chan, 12);
+	MARK_RING (chan, 12, 4);
 	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
 	OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -315,7 +318,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
 	gdirect_format = nv04_rect_format(dst->format);
 	assert(gdirect_format >= 0);
 
-	WAIT_RING (chan, 16);
+	MARK_RING (chan, 16, 4);
 	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index d8300fd69f..46a821a48b 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -58,6 +58,9 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
 	nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+	screen->base.channel->user_private = nv30;
+	screen->base.channel->flush_notify = nv30_state_flush_notify;
+
 	nv30_init_query_functions(nv30);
 	nv30_init_surface_functions(nv30);
 	nv30_init_state_functions(nv30);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 8d49366dfc..6f44b1c7fe 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -184,6 +184,7 @@ extern void nv30_fragtex_bind(struct nv30_context *);
 /* nv30_state.c and friends */
 extern boolean nv30_state_validate(struct nv30_context *nv30);
 extern void nv30_state_emit(struct nv30_context *nv30);
+extern void nv30_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv30_state_entry nv30_state_rasterizer;
 extern struct nv30_state_entry nv30_state_scissor;
 extern struct nv30_state_entry nv30_state_stipple;
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
index 621b8846c8..ac52d946f0 100644
--- a/src/gallium/drivers/nv30/nv30_state_emit.c
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30)
 	struct nouveau_channel *chan = nv30->screen->base.channel;
 	struct nv30_state *state = &nv30->state;
 	struct nv30_screen *screen = nv30->screen;
-	unsigned i, samplers;
+	unsigned i;
 	uint64_t states;
 
 	if (nv30->pctx_id != screen->cur_pctx) {
@@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30)
 	}
 
 	state->dirty = 0;
+}
+
+void
+nv30_state_flush_notify(struct nouveau_channel *chan)
+{
+	struct nv30_context *nv30 = chan->user_private;
+	struct nv30_state *state = &nv30->state;
+	unsigned i, samplers;
 
 	so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
 	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 7f008274a4..eb9cce4c78 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -58,6 +58,9 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
 	nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+	screen->base.channel->user_private = nv40;
+	screen->base.channel->flush_notify = nv40_state_flush_notify;
+
 	nv40_init_query_functions(nv40);
 	nv40_init_surface_functions(nv40);
 	nv40_init_state_functions(nv40);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index a3d594167a..cf33b64a86 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -204,6 +204,7 @@ extern void nv40_fragtex_bind(struct nv40_context *);
 extern boolean nv40_state_validate(struct nv40_context *nv40);
 extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
 extern void nv40_state_emit(struct nv40_context *nv40);
+extern void nv40_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv40_state_entry nv40_state_rasterizer;
 extern struct nv40_state_entry nv40_state_scissor;
 extern struct nv40_state_entry nv40_state_stipple;
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 198692965d..ba0fbcb26a 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -57,7 +57,7 @@ nv40_state_emit(struct nv40_context *nv40)
 	struct nouveau_channel *chan = nv40->screen->base.channel;
 	struct nv40_state *state = &nv40->state;
 	struct nv40_screen *screen = nv40->screen;
-	unsigned i, samplers;
+	unsigned i;
 	uint64_t states;
 
 	if (nv40->pctx_id != screen->cur_pctx) {
@@ -87,6 +87,14 @@ nv40_state_emit(struct nv40_context *nv40)
 	}
 
 	state->dirty = 0;
+}
+
+void
+nv40_state_flush_notify(struct nouveau_channel *chan)
+{
+	struct nv40_context *nv40 = chan->user_private;
+	struct nv40_state *state = &nv40->state;
+	unsigned i, samplers;
 
 	so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
 	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5305c93d59..268c9823f7 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_query *q = nv50_query(pq);
 
-	WAIT_RING (chan, 5);
+	MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
 	BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
 	OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 6bf6f773b0..79655fc08d 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
  		return 1;
 
  	if (!bo->tile_flags) {
+		MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
  		BEGIN_RING(chan, eng2d, mthd, 2);
  		OUT_RING  (chan, format);
  		OUT_RING  (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
  		OUT_RELOCh(chan, bo, ps->offset, flags);
  		OUT_RELOCl(chan, bo, ps->offset, flags);
  	} else {
+		MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
  		BEGIN_RING(chan, eng2d, mthd, 5);
  		OUT_RING  (chan, format);
  		OUT_RING  (chan, 0);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 104d29a003..6240a0c757 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -82,7 +82,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 	while (height) {
 		int line_count = height > 2047 ? 2047 : height;
 
-		WAIT_RING (chan, 15);
+		MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
 		BEGIN_RING(chan, m2mf,
 			NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
 		OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
@@ -265,7 +265,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
 
 	reloc |= NOUVEAU_BO_WR;
 
-	WAIT_RING (chan, 32);
+	MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
 
 	if (bo->tile_flags) {
 		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
-- 
cgit v1.2.3