summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/nv50
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r--src/gallium/drivers/nv50/Makefile3
-rw-r--r--src/gallium/drivers/nv50/nv50_clear.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_context.c44
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h122
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c5
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c40
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c326
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c305
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.h4
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c110
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c436
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c31
-rw-r--r--src/gallium/drivers/nv50/nv50_tex.c225
-rw-r--r--src/gallium/drivers/nv50/nv50_texture.h32
-rw-r--r--src/gallium/drivers/nv50/nv50_transfer.c21
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c1112
16 files changed, 1363 insertions, 1459 deletions
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile
index 612aea28a3..5d622e1c13 100644
--- a/src/gallium/drivers/nv50/Makefile
+++ b/src/gallium/drivers/nv50/Makefile
@@ -16,6 +16,7 @@ C_SOURCES = \
nv50_surface.c \
nv50_tex.c \
nv50_transfer.c \
- nv50_vbo.c
+ nv50_vbo.c \
+ nv50_push.c
include ../../Makefile.template
diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
index e0b2d2880b..5447904e9c 100644
--- a/src/gallium/drivers/nv50/nv50_clear.c
+++ b/src/gallium/drivers/nv50/nv50_clear.c
@@ -35,8 +35,11 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct pipe_framebuffer_state *fb = &nv50->framebuffer;
unsigned mode = 0, i;
+ const unsigned dirty = nv50->dirty;
- if (!nv50_state_validate(nv50))
+ /* don't need NEW_BLEND, NV50TCL_COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ nv50->dirty &= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
+ if (!nv50_state_validate(nv50, 64))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
@@ -64,5 +67,6 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
OUT_RING (chan, (i << 6) | 0x3c);
}
+ nv50->dirty = dirty;
}
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 7be12fcdef..aa14e17872 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -46,43 +46,13 @@ static void
nv50_destroy(struct pipe_context *pipe)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ int i;
- if (nv50->state.fb)
- so_ref(NULL, &nv50->state.fb);
- if (nv50->state.blend)
- so_ref(NULL, &nv50->state.blend);
- if (nv50->state.blend_colour)
- so_ref(NULL, &nv50->state.blend_colour);
- if (nv50->state.zsa)
- so_ref(NULL, &nv50->state.zsa);
- if (nv50->state.rast)
- so_ref(NULL, &nv50->state.rast);
- if (nv50->state.stipple)
- so_ref(NULL, &nv50->state.stipple);
- if (nv50->state.scissor)
- so_ref(NULL, &nv50->state.scissor);
- if (nv50->state.viewport)
- so_ref(NULL, &nv50->state.viewport);
- if (nv50->state.tsc_upload)
- so_ref(NULL, &nv50->state.tsc_upload);
- if (nv50->state.tic_upload)
- so_ref(NULL, &nv50->state.tic_upload);
- if (nv50->state.vertprog)
- so_ref(NULL, &nv50->state.vertprog);
- if (nv50->state.fragprog)
- so_ref(NULL, &nv50->state.fragprog);
- if (nv50->state.geomprog)
- so_ref(NULL, &nv50->state.geomprog);
- if (nv50->state.fp_linkage)
- so_ref(NULL, &nv50->state.fp_linkage);
- if (nv50->state.gp_linkage)
- so_ref(NULL, &nv50->state.gp_linkage);
- if (nv50->state.vtxfmt)
- so_ref(NULL, &nv50->state.vtxfmt);
- if (nv50->state.vtxbuf)
- so_ref(NULL, &nv50->state.vtxbuf);
- if (nv50->state.vtxattr)
- so_ref(NULL, &nv50->state.vtxattr);
+ for (i = 0; i < 64; i++) {
+ if (!nv50->state.hw[i])
+ continue;
+ so_ref(NULL, &nv50->state.hw[i]);
+ }
draw_destroy(nv50->draw);
@@ -123,11 +93,11 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
screen->base.channel->user_private = nv50;
- screen->base.channel->flush_notify = nv50_state_flush_notify;
nv50_init_surface_functions(nv50);
nv50_init_state_functions(nv50);
nv50_init_query_functions(nv50);
+ nv50_init_transfer_functions(nv50);
nv50->draw = draw_create();
assert(nv50->draw);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index c540594b94..bc7831d9ac 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -72,6 +72,23 @@ struct nv50_sampler_stateobj {
unsigned tsc[8];
};
+struct nv50_sampler_view {
+ struct pipe_sampler_view pipe;
+ uint32_t tic[8];
+};
+
+struct nv50_vtxelt_stateobj {
+ struct pipe_vertex_element pipe[16];
+ unsigned num_elements;
+ uint32_t hw[16];
+};
+
+static INLINE struct nv50_sampler_view *
+nv50_sampler_view(struct pipe_sampler_view *view)
+{
+ return (struct nv50_sampler_view *)view;
+}
+
static INLINE unsigned
get_tile_height(uint32_t tile_mode)
{
@@ -90,10 +107,12 @@ struct nv50_miptree_level {
unsigned tile_mode;
};
+#define NV50_MAX_TEXTURE_LEVELS 16
+
struct nv50_miptree {
struct nouveau_miptree base;
- struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS];
+ struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
int image_nr;
int total_size;
};
@@ -115,30 +134,12 @@ nv50_surface(struct pipe_surface *pt)
}
struct nv50_state {
- unsigned dirty;
+ struct nouveau_stateobj *hw[64];
+ uint64_t hw_dirty;
- struct nouveau_stateobj *fb;
- struct nouveau_stateobj *blend;
- struct nouveau_stateobj *blend_colour;
- struct nouveau_stateobj *zsa;
- struct nouveau_stateobj *stencil_ref;
- struct nouveau_stateobj *rast;
- struct nouveau_stateobj *stipple;
- struct nouveau_stateobj *scissor;
- unsigned scissor_enabled;
- struct nouveau_stateobj *viewport;
- struct nouveau_stateobj *tsc_upload;
- struct nouveau_stateobj *tic_upload;
- unsigned miptree_nr[PIPE_SHADER_TYPES];
- struct nouveau_stateobj *vertprog;
- struct nouveau_stateobj *fragprog;
- struct nouveau_stateobj *geomprog;
- struct nouveau_stateobj *fp_linkage;
- struct nouveau_stateobj *gp_linkage;
- struct nouveau_stateobj *vtxfmt;
+ unsigned sampler_view_nr[3];
struct nouveau_stateobj *vtxbuf;
struct nouveau_stateobj *vtxattr;
- struct nouveau_stateobj *instbuf;
unsigned vtxelt_nr;
};
@@ -167,14 +168,13 @@ struct nv50_context {
struct pipe_buffer *constbuf[PIPE_SHADER_TYPES];
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned vtxbuf_nr;
- struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
- unsigned vtxelt_nr;
- struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- unsigned sampler_nr[PIPE_SHADER_TYPES];
- struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
- unsigned miptree_nr[PIPE_SHADER_TYPES];
+ struct nv50_vtxelt_stateobj *vtxelt;
+ struct nv50_sampler_stateobj *sampler[3][PIPE_MAX_SAMPLERS];
+ unsigned sampler_nr[3];
+ struct pipe_sampler_view *sampler_views[3][PIPE_MAX_SAMPLERS];
+ unsigned sampler_view_nr[3];
- uint16_t vbo_fifo;
+ unsigned vbo_fifo;
};
static INLINE struct nv50_context *
@@ -186,6 +186,7 @@ nv50_context(struct pipe_context *pipe)
extern void nv50_init_surface_functions(struct nv50_context *nv50);
extern void nv50_init_state_functions(struct nv50_context *nv50);
extern void nv50_init_query_functions(struct nv50_context *nv50);
+extern void nv50_init_transfer_functions(struct nv50_context *nv50);
extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen);
@@ -216,24 +217,36 @@ extern void nv50_draw_elements_instanced(struct pipe_context *pipe,
unsigned count,
unsigned startInstance,
unsigned instanceCount);
-extern void nv50_vbo_validate(struct nv50_context *nv50);
+extern void nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso);
+extern struct nouveau_stateobj *nv50_vbo_validate(struct nv50_context *nv50);
+
+/* nv50_push.c */
+extern void
+nv50_push_elements_instanced(struct pipe_context *, struct pipe_buffer *,
+ unsigned idxsize, unsigned mode, unsigned start,
+ unsigned count, unsigned i_start,
+ unsigned i_count);
/* nv50_clear.c */
extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
const float *rgba, double depth, unsigned stencil);
/* nv50_program.c */
-extern void nv50_vertprog_validate(struct nv50_context *nv50);
-extern void nv50_fragprog_validate(struct nv50_context *nv50);
-extern void nv50_geomprog_validate(struct nv50_context *nv50);
-extern void nv50_fp_linkage_validate(struct nv50_context *nv50);
-extern void nv50_gp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_vertprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fragprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_geomprog_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_fp_linkage_validate(struct nv50_context *nv50);
+extern struct nouveau_stateobj *
+nv50_gp_linkage_validate(struct nv50_context *nv50);
extern void nv50_program_destroy(struct nv50_context *nv50,
struct nv50_program *p);
/* nv50_state_validate.c */
-extern boolean nv50_state_validate(struct nv50_context *nv50);
-extern void nv50_state_flush_notify(struct nouveau_channel *chan);
+extern boolean nv50_state_validate(struct nv50_context *nv50, unsigned dwords);
extern void nv50_so_init_sifc(struct nv50_context *nv50,
struct nouveau_stateobj *so,
@@ -241,7 +254,9 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
unsigned offset, unsigned size);
/* nv50_tex.c */
-extern void nv50_tex_validate(struct nv50_context *);
+extern boolean nv50_tex_construct(struct nv50_sampler_view *view);
+extern void nv50_tex_relocs(struct nv50_context *);
+extern struct nouveau_stateobj *nv50_tex_validate(struct nv50_context *);
/* nv50_transfer.c */
extern void
@@ -255,4 +270,35 @@ nv50_upload_sifc(struct nv50_context *nv50,
struct pipe_context *
nv50_create(struct pipe_screen *pscreen, void *priv);
+static INLINE unsigned
+nv50_prim(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
+ case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
+ case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
+ case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
+ case PIPE_PRIM_LINES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
+ default:
+ break;
+ }
+
+ NOUVEAU_ERR("invalid primitive type %d\n", mode);
+ return NV50TCL_VERTEX_BEGIN_POINTS;
+}
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3f9d869d7a..e091cae602 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -104,7 +104,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
tile_flags = 0x7400;
break;
default:
- if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) &&
+ if ((pt->tex_usage & PIPE_TEXTURE_USAGE_SCANOUT) &&
util_format_get_blocksizebits(pt->format) == 32)
tile_flags = 0x7a00;
else
@@ -255,9 +255,10 @@ void
nv50_screen_init_miptree_functions(struct pipe_screen *pscreen)
{
pscreen->texture_create = nv50_miptree_create;
- pscreen->texture_blanket = nv50_miptree_blanket;
pscreen->texture_destroy = nv50_miptree_destroy;
pscreen->get_tex_surface = nv50_miptree_surface_new;
pscreen->tex_surface_destroy = nv50_miptree_surface_del;
+
+ nouveau_screen(pscreen)->texture_blanket = nv50_miptree_blanket;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2372cbbef6..c857816b31 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -4270,7 +4270,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
FREE(up);
}
-void
+struct nouveau_stateobj *
nv50_vertprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4286,6 +4286,9 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_VERTPROG))
+ return NULL;
+
so = so_new(5, 7, 2);
so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4301,11 +4304,10 @@ nv50_vertprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_temp);
so_method(so, tesla, NV50TCL_VP_START_ID, 1);
so_data (so, 0); /* program start offset */
- so_ref(so, &nv50->state.vertprog);
- so_ref(NULL, &so);
+ return so;
}
-void
+struct nouveau_stateobj *
nv50_fragprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4321,6 +4323,9 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_FRAGPROG))
+ return NULL;
+
so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4337,11 +4342,10 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.regs[3]);
so_method(so, tesla, NV50TCL_FP_START_ID, 1);
so_data (so, 0); /* program start offset */
- so_ref(so, &nv50->state.fragprog);
- so_ref(NULL, &so);
+ return so;
}
-void
+struct nouveau_stateobj *
nv50_geomprog_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4357,6 +4361,9 @@ nv50_geomprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
+ if (!(nv50->dirty & NV50_NEW_GEOMPROG))
+ return NULL;
+
so = so_new(6, 7, 2);
so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2);
so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
@@ -4373,8 +4380,7 @@ nv50_geomprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.vert_count);
so_method(so, tesla, NV50TCL_GP_START_ID, 1);
so_data (so, 0);
- so_ref(so, &nv50->state.geomprog);
- so_ref(NULL, &so);
+ return so;
}
static uint32_t
@@ -4454,7 +4460,7 @@ nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4],
return mid;
}
-void
+struct nouveau_stateobj *
nv50_fp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4580,8 +4586,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_GP_ENABLE, 1);
so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0);
- so_ref(so, &nv50->state.fp_linkage);
- so_ref(NULL, &so);
+ return so;
}
static int
@@ -4615,7 +4620,7 @@ construct_vp_gp_mapping(uint32_t *map32, int m,
return m;
}
-void
+struct nouveau_stateobj *
nv50_gp_linkage_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -4625,10 +4630,8 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
uint32_t map[16];
int m = 0;
- if (!gp) {
- so_ref(NULL, &nv50->state.gp_linkage);
- return;
- }
+ if (!gp)
+ return NULL;
memset(map, 0, sizeof(map));
m = construct_vp_gp_mapping(map, m, vp, gp);
@@ -4646,8 +4649,7 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m);
so_datap (so, map, m);
- so_ref(so, &nv50->state.gp_linkage);
- so_ref(NULL, &so);
+ return so;
}
void
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
new file mode 100644
index 0000000000..96a1f32d30
--- /dev/null
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -0,0 +1,326 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nouveau/nouveau_util.h"
+#include "nv50_context.h"
+
+struct push_context {
+ struct nv50_context *nv50;
+
+ unsigned vtx_size;
+
+ void *idxbuf;
+ unsigned idxsize;
+
+ float edgeflag;
+ int edgeflag_attr;
+
+ struct {
+ void *map;
+ unsigned stride;
+ unsigned divisor;
+ unsigned step;
+ void (*push)(struct nouveau_channel *, void *);
+ } attr[16];
+ unsigned attr_nr;
+};
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+ uint32_t *v = data;
+
+ OUT_RING(chan, v[0]);
+ OUT_RING(chan, v[1]);
+ OUT_RING(chan, v[2]);
+ OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+ uint16_t *v = data;
+
+ OUT_RING(chan, (v[1] << 16) | v[0]);
+ OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+ uint8_t *v = data;
+
+ OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static INLINE void
+emit_vertex(struct push_context *ctx, unsigned n)
+{
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ int i;
+
+ if (ctx->edgeflag_attr < 16) {
+ float *edgeflag = ctx->attr[ctx->edgeflag_attr].map +
+ ctx->attr[ctx->edgeflag_attr].stride * n;
+
+ if (*edgeflag != ctx->edgeflag) {
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, *edgeflag ? 1 : 0);
+ ctx->edgeflag = *edgeflag;
+ }
+ }
+
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, ctx->vtx_size);
+ for (i = 0; i < ctx->attr_nr; i++)
+ ctx->attr[i].push(chan, ctx->attr[i].map + ctx->attr[i].stride * n);
+}
+
+static void
+emit_edgeflag(void *priv, boolean enabled)
+{
+ struct push_context *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, enabled ? 1 : 0);
+}
+
+static void
+emit_elt08(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint8_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt16(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint16_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_elt32(void *priv, unsigned start, unsigned count)
+{
+ struct push_context *ctx = priv;
+ uint32_t *idxbuf = ctx->idxbuf;
+
+ while (count--)
+ emit_vertex(ctx, idxbuf[start++]);
+}
+
+static void
+emit_verts(void *priv, unsigned start, unsigned count)
+{
+ while (count--)
+ emit_vertex(priv, start++);
+}
+
+void
+nv50_push_elements_instanced(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxsize,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned i_start, unsigned i_count)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ struct push_context ctx;
+ const unsigned p_overhead = 4 + /* begin/end */
+ 4; /* potential edgeflag enable/disable */
+ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */
+ 2; /* potential edgeflag modification */
+ struct u_split_prim s;
+ unsigned vtx_size;
+ boolean nzi = FALSE;
+ int i;
+
+ ctx.nv50 = nv50;
+ ctx.attr_nr = 0;
+ ctx.idxbuf = NULL;
+ ctx.vtx_size = 0;
+ ctx.edgeflag = 0.5f;
+ ctx.edgeflag_attr = nv50->vertprog->cfg.edgeflag_in;
+
+ /* map vertex buffers, determine vertex size */
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+ struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+ struct nouveau_bo *bo = nouveau_bo(vb->buffer);
+ unsigned size, nr_components, n;
+
+ if (!(nv50->vbo_fifo & (1 << i)))
+ continue;
+ n = ctx.attr_nr++;
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+ assert(bo->map);
+ return;
+ }
+ ctx.attr[n].map = bo->map + vb->buffer_offset + ve->src_offset;
+ nouveau_bo_unmap(bo);
+
+ ctx.attr[n].stride = vb->stride;
+ ctx.attr[n].divisor = ve->instance_divisor;
+ if (ctx.attr[n].divisor) {
+ ctx.attr[n].step = i_start % ve->instance_divisor;
+ ctx.attr[n].map += i_start * vb->stride;
+ }
+
+ size = util_format_get_component_bits(ve->src_format,
+ UTIL_FORMAT_COLORSPACE_RGB, 0);
+ nr_components = util_format_get_nr_components(ve->src_format);
+ switch (size) {
+ case 8:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b08_1; break;
+ case 2: ctx.attr[n].push = emit_b16_1; break;
+ case 3: ctx.attr[n].push = emit_b08_3; break;
+ case 4: ctx.attr[n].push = emit_b32_1; break;
+ }
+ ctx.vtx_size++;
+ break;
+ case 16:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b16_1; break;
+ case 2: ctx.attr[n].push = emit_b32_1; break;
+ case 3: ctx.attr[n].push = emit_b16_3; break;
+ case 4: ctx.attr[n].push = emit_b32_2; break;
+ }
+ ctx.vtx_size += (nr_components + 1) >> 1;
+ break;
+ case 32:
+ switch (nr_components) {
+ case 1: ctx.attr[n].push = emit_b32_1; break;
+ case 2: ctx.attr[n].push = emit_b32_2; break;
+ case 3: ctx.attr[n].push = emit_b32_3; break;
+ case 4: ctx.attr[n].push = emit_b32_4; break;
+ }
+ ctx.vtx_size += nr_components;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ }
+ vtx_size = ctx.vtx_size + v_overhead;
+
+ /* map index buffer, if present */
+ if (idxbuf) {
+ struct nouveau_bo *bo = nouveau_bo(idxbuf);
+
+ if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) {
+ assert(bo->map);
+ return;
+ }
+ ctx.idxbuf = bo->map;
+ ctx.idxsize = idxsize;
+ nouveau_bo_unmap(bo);
+ }
+
+ s.priv = &ctx;
+ s.edge = emit_edgeflag;
+ if (idxbuf) {
+ if (idxsize == 1)
+ s.emit = emit_elt08;
+ else
+ if (idxsize == 2)
+ s.emit = emit_elt16;
+ else
+ s.emit = emit_elt32;
+ } else
+ s.emit = emit_verts;
+
+ /* per-instance loop */
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, i_start);
+ while (i_count--) {
+ unsigned max_verts;
+ boolean done;
+
+ for (i = 0; i < ctx.attr_nr; i++) {
+ if (!ctx.attr[i].divisor ||
+ ctx.attr[i].divisor != ++ctx.attr[i].step)
+ continue;
+ ctx.attr[i].step = 0;
+ ctx.attr[i].map += ctx.attr[i].stride;
+ }
+
+ u_split_prim_init(&s, mode, start, count);
+ do {
+ if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) {
+ assert(0);
+ return;
+ }
+ }
+
+ max_verts = AVAIL_RING(chan);
+ max_verts -= p_overhead;
+ max_verts /= vtx_size;
+
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0));
+ done = u_split_prim_next(&s, max_verts);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ } while (!done);
+
+ nzi = TRUE;
+ }
+}
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index eed6031eaf..1a4606d9e2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -95,6 +95,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
static int
nv50_screen_get_param(struct pipe_screen *pscreen, int param)
{
+ struct nv50_screen *screen = nv50_screen(pscreen);
+
switch (param) {
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return 32;
@@ -107,7 +109,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_TWO_SIDED_STENCIL:
return 1;
case PIPE_CAP_GLSL:
- return 0;
+ return 1;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 1;
case PIPE_CAP_POINT_SPRITE:
@@ -132,9 +134,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
return 1;
case NOUVEAU_CAP_HW_VTXBUF:
- return 1;
+ return screen->force_push ? 0 : 1;
case NOUVEAU_CAP_HW_IDXBUF:
- return 1;
+ return screen->force_push ? 0 : 1;
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 1;
case PIPE_CAP_INDEP_BLEND_FUNC:
@@ -188,8 +190,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
nouveau_bo_ref(NULL, &screen->tic);
if (screen->tsc)
nouveau_bo_ref(NULL, &screen->tsc);
- if (screen->static_init)
- so_ref(NULL, &screen->static_init);
nouveau_notifier_free(&screen->sync);
nouveau_grobj_free(&screen->tesla);
@@ -202,26 +202,53 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
FREE(screen);
}
-static int
-nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
- unsigned usage)
+#define BGN_RELOC(ch, bo, gr, m, n, fl) \
+ OUT_RELOC(ch, bo, (n << 18) | (gr->subc << 13) | m, fl, 0, 0)
+
+void
+nv50_screen_relocs(struct nv50_screen *screen)
{
- struct nv50_screen *screen = nv50_screen(pscreen);
- struct nv50_context *ctx = screen->cur_ctx;
+ struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_grobj *tesla = screen->tesla;
+ unsigned i;
+ const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
- return 0;
+ MARK_RING (chan, 28, 26);
- /* Our vtxbuf got mapped, it can no longer be considered part of current
- * state, remove it to avoid emitting reloc markers.
- */
- if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
- nouveau_bo(pb))) {
- so_ref(NULL, &ctx->state.vtxbuf);
- ctx->dirty |= NV50_NEW_ARRAYS;
- }
+ /* cause grobj autobind */
+ BEGIN_RING(chan, tesla, 0x0100, 1);
+ OUT_RING (chan, 0);
+
+ BGN_RELOC (chan, screen->tic, tesla, NV50TCL_TIC_ADDRESS_HIGH, 2, rl);
+ OUT_RELOCh(chan, screen->tic, 0, rl);
+ OUT_RELOCl(chan, screen->tic, 0, rl);
+
+ BGN_RELOC (chan, screen->tsc, tesla, NV50TCL_TSC_ADDRESS_HIGH, 2, rl);
+ OUT_RELOCh(chan, screen->tsc, 0, rl);
+ OUT_RELOCl(chan, screen->tsc, 0, rl);
- return 0;
+ BGN_RELOC (chan, screen->constbuf_misc[0],
+ tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl);
+ OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl);
+ OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl);
+ OUT_RELOC (chan, screen->constbuf_misc[0],
+ (NV50_CB_PMISC << 16) | 0x0200, rl, 0, 0);
+
+ BGN_RELOC (chan, screen->constbuf_misc[0],
+ tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl);
+ OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl);
+ OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl);
+ OUT_RELOC (chan, screen->constbuf_misc[0],
+ (NV50_CB_AUX << 16) | 0x0200, rl, 0, 0);
+
+ for (i = 0; i < 3; ++i) {
+ BGN_RELOC (chan, screen->constbuf_parm[i],
+ tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3, rl);
+ OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl);
+ OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl);
+ OUT_RELOC (chan, screen->constbuf_parm[i],
+ ((NV50_CB_PVP + i) << 16) | 0x0800, rl, 0, 0);
+ }
}
struct pipe_screen *
@@ -230,10 +257,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen);
struct nouveau_channel *chan;
struct pipe_screen *pscreen;
- struct nouveau_stateobj *so;
unsigned chipset = dev->chipset;
unsigned tesla_class = 0;
int ret, i;
+ const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
if (!screen)
return NULL;
@@ -252,10 +279,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
pscreen->get_paramf = nv50_screen_get_paramf;
pscreen->is_format_supported = nv50_screen_is_format_supported;
pscreen->context_create = nv50_create;
- screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
nv50_screen_init_miptree_functions(pscreen);
- nv50_transfer_init_screen_functions(pscreen);
/* DMA engine object */
ret = nouveau_grobj_alloc(chan, 0xbeef5039,
@@ -318,64 +343,58 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
/* Static M2MF init */
- so = so_new(1, 3, 0);
- so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
- so_data (so, screen->sync->handle);
- so_data (so, chan->vram->handle);
- so_data (so, chan->vram->handle);
- so_emit(chan, so);
- so_ref (NULL, &so);
+ BEGIN_RING(chan, screen->m2mf,
+ NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
+ OUT_RING (chan, screen->sync->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
/* Static 2D init */
- so = so_new(4, 7, 0);
- so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
- so_data (so, screen->sync->handle);
- so_data (so, chan->vram->handle);
- so_data (so, chan->vram->handle);
- so_data (so, chan->vram->handle);
- so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
- so_data (so, NV50_2D_OPERATION_SRCCOPY);
- so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
- so_data (so, 0);
- so_method(so, screen->eng2d, 0x0888, 1);
- so_data (so, 1);
- so_emit(chan, so);
- so_ref(NULL, &so);
+ BEGIN_RING(chan, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
+ OUT_RING (chan, screen->sync->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+ OUT_RING (chan, chan->vram->handle);
+ BEGIN_RING(chan, screen->eng2d, NV50_2D_OPERATION, 1);
+ OUT_RING (chan, NV50_2D_OPERATION_SRCCOPY);
+ BEGIN_RING(chan, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, screen->eng2d, 0x0888, 1);
+ OUT_RING (chan, 1);
/* Static tesla init */
- so = so_new(47, 95, 24);
-
- so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
- so_data (so, NV50TCL_COND_MODE_ALWAYS);
- so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
- so_data (so, screen->sync->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_COND_MODE, 1);
+ OUT_RING (chan, NV50TCL_COND_MODE_ALWAYS);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
+ OUT_RING (chan, screen->sync->handle);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_DMA_ZETA, 11);
for (i = 0; i < 11; i++)
- so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
- NV50TCL_DMA_COLOR__SIZE);
+ OUT_RING (chan, chan->vram->handle);
+ BEGIN_RING(chan, screen->tesla,
+ NV50TCL_DMA_COLOR(0), NV50TCL_DMA_COLOR__SIZE);
for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
- so_data(so, chan->vram->handle);
- so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
- so_data (so, 1);
+ OUT_RING (chan, chan->vram->handle);
+
+ BEGIN_RING(chan, screen->tesla, NV50TCL_RT_CONTROL, 1);
+ OUT_RING (chan, 1);
/* activate all 32 lanes (threads) in a warp */
- so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
- so_data (so, 0x2);
- so_method(so, screen->tesla, 0x1400, 1);
- so_data (so, 0xf);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_WARP_HALVES, 1);
+ OUT_RING (chan, 2);
+ BEGIN_RING(chan, screen->tesla, 0x1400, 1);
+ OUT_RING (chan, 0xf);
/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
for (i = 0; i < 3; ++i) {
- so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1);
- so_data (so, 0x54);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_TEX_LIMITS(i), 1);
+ OUT_RING (chan, 0x54);
}
/* origin is top left (set to 1 for bottom left) */
- so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
- so_data (so, 0);
- so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
- so_data (so, 8);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
+ OUT_RING (chan, 8);
/* constant buffers for immediates and VP/FP parameters */
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
@@ -384,6 +403,14 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
+ BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->constbuf_misc[0], 0, rl);
+ OUT_RELOCl(chan, screen->constbuf_misc[0], 0, rl);
+ OUT_RING (chan, (NV50_CB_PMISC << 16) | 0x0200);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->constbuf_misc[0], 0x200, rl);
+ OUT_RELOCl(chan, screen->constbuf_misc[0], 0x200, rl);
+ OUT_RING (chan, (NV50_CB_AUX << 16) | 0x0200);
for (i = 0; i < 3; i++) {
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (256 * 4) * 4,
@@ -392,6 +419,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
nv50_screen_destroy(pscreen);
return NULL;
}
+ BEGIN_RING(chan, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->constbuf_parm[i], 0, rl);
+ OUT_RELOCl(chan, screen->constbuf_parm[i], 0, rl);
+ OUT_RING (chan, ((NV50_CB_PVP + i) << 16) | 0x0800);
}
if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
@@ -403,123 +434,69 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
return NULL;
}
- /*
- // map constant buffers:
- // B = buffer ID (maybe more than 1 byte)
- // N = CB index used in shader instruction
- // P = program type (0 = VP, 2 = GP, 3 = FP)
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x000BBNP1);
- */
-
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PMISC << 16) | 0x00000200);
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000021 | (NV50_CB_PMISC << 12));
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000031 | (NV50_CB_PMISC << 12));
-
- /* bind auxiliary constbuf to immediate data bo */
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_misc[0], (128 * 4) * 4,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_AUX << 16) | 0x00000200);
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000201 | (NV50_CB_AUX << 12));
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000221 | (NV50_CB_AUX << 12));
-
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PVP << 16) | 0x00000800);
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000101 | (NV50_CB_PVP << 12));
-
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PGP << 16) | 0x00000800);
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000121 | (NV50_CB_PGP << 12));
-
- so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0,
- NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PFP << 16) | 0x00000800);
- so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
- so_data (so, 0x00000131 | (NV50_CB_PFP << 12));
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4),
&screen->tic);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
+ BEGIN_RING(chan, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, 3 * 32 - 1);
- so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
- so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, PIPE_SHADER_TYPES * 32 - 1);
-
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 3 * 32 * (8 * 4),
&screen->tsc);
if (ret) {
nv50_screen_destroy(pscreen);
return NULL;
}
-
- so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
- so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
- NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
-
+ BEGIN_RING(chan, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
+ OUT_RELOCh(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+ OUT_RING (chan, 0); /* ignored if TSC_LINKED (0x1234) == 1 */
+
+ /* map constant buffers:
+ * B = buffer ID (maybe more than 1 byte)
+ * N = CB index used in shader instruction
+ * P = program type (0 = VP, 2 = GP, 3 = FP)
+ * SET_PROGRAM_CB = 0x000BBNP1
+ */
+ BEGIN_RING_NI(chan, screen->tesla, NV50TCL_SET_PROGRAM_CB, 8);
+ /* bind immediate buffer */
+ OUT_RING (chan, 0x001 | (NV50_CB_PMISC << 12));
+ OUT_RING (chan, 0x021 | (NV50_CB_PMISC << 12));
+ OUT_RING (chan, 0x031 | (NV50_CB_PMISC << 12));
+ /* bind auxiliary constbuf to immediate data bo */
+ OUT_RING (chan, 0x201 | (NV50_CB_AUX << 12));
+ OUT_RING (chan, 0x221 | (NV50_CB_AUX << 12));
+ /* bind parameter buffers */
+ OUT_RING (chan, 0x101 | (NV50_CB_PVP << 12));
+ OUT_RING (chan, 0x121 | (NV50_CB_PGP << 12));
+ OUT_RING (chan, 0x131 | (NV50_CB_PFP << 12));
/* Vertex array limits - max them out */
for (i = 0; i < 16; i++) {
- so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
- so_data (so, 0x000000ff);
- so_data (so, 0xffffffff);
+ BEGIN_RING(chan, screen->tesla,
+ NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
+ OUT_RING (chan, 0x000000ff);
+ OUT_RING (chan, 0xffffffff);
}
- so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
- so_data (so, fui(0.0));
- so_data (so, fui(1.0));
+ BEGIN_RING(chan, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
+ OUT_RINGf (chan, 0.0f);
+ OUT_RINGf (chan, 1.0f);
/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
- so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
- so_data (so, 1);
-
- /* activate first scissor rectangle */
- so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
- so_data (so, 1);
+ BEGIN_RING(chan, screen->tesla, NV50TCL_LINKED_TSC, 1);
+ OUT_RING (chan, 1);
- so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
- so_data (so, 1); /* default edgeflag to TRUE */
+ BEGIN_RING(chan, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, 1); /* default edgeflag to TRUE */
- so_emit(chan, so);
- so_ref (so, &screen->static_init);
- so_ref (NULL, &so);
- nouveau_pushbuf_flush(chan, 0);
+ FIRE_RING (chan);
+ screen->force_push = debug_get_bool_option("NV50_ALWAYS_PUSH", FALSE);
return pscreen;
}
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 2687b72127..15bd4eed39 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -27,7 +27,7 @@ struct nv50_screen {
struct nouveau_bo *tic;
struct nouveau_bo *tsc;
- struct nouveau_stateobj *static_init;
+ boolean force_push;
};
static INLINE struct nv50_screen *
@@ -36,6 +36,6 @@ nv50_screen(struct pipe_screen *screen)
return (struct nv50_screen *)screen;
}
-void nv50_transfer_init_screen_functions(struct pipe_screen *);
+extern void nv50_screen_relocs(struct nv50_screen *);
#endif
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 7d304907b6..c162808928 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -238,6 +238,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
return (void *)sso;
}
+/* type == 0 for VPs, 1 for GPs, 2 for FPs, which is how the
+ * relevant tesla methods are indexed (NV50TCL_BIND_TSC etc.)
+ */
static INLINE void
nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
unsigned nr, void **sampler)
@@ -253,13 +256,13 @@ nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
static void
nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
{
- nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+ nv50_sampler_state_bind(pipe, 0, nr, s);
}
static void
nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
{
- nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+ nv50_sampler_state_bind(pipe, 2, nr, s);
}
static void
@@ -269,40 +272,74 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
}
static INLINE void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
- unsigned nr, struct pipe_texture **pt)
+nv50_set_sampler_views(struct pipe_context *pipe, unsigned p,
+ unsigned nr,
+ struct pipe_sampler_view **views)
{
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
for (i = 0; i < nr; i++)
- pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
- for (i = nr; i < nv50->miptree_nr[type]; i++)
- pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
+ pipe_sampler_view_reference(&nv50->sampler_views[p][i],
+ views[i]);
+
+ for (i = nr; i < nv50->sampler_view_nr[p]; i++)
+ pipe_sampler_view_reference(&nv50->sampler_views[p][i], NULL);
- nv50->miptree_nr[type] = nr;
+ nv50->sampler_view_nr[p] = nr;
nv50->dirty |= NV50_NEW_TEXTURE;
}
static void
-nv50_set_vp_sampler_textures(struct pipe_context *pipe,
- unsigned nr, struct pipe_texture **pt)
+nv50_set_vp_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
{
- nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+ nv50_set_sampler_views(pipe, 0, nr, views);
}
static void
-nv50_set_fp_sampler_textures(struct pipe_context *pipe,
- unsigned nr, struct pipe_texture **pt)
+nv50_set_fp_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nv50_set_sampler_views(pipe, 2, nr, views);
+}
+
+static void
+nv50_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_texture_reference(&view->texture, NULL);
+ FREE(nv50_sampler_view(view));
+}
+
+static struct pipe_sampler_view *
+nv50_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_texture *texture,
+ const struct pipe_sampler_view *templ)
{
- nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+ struct nv50_sampler_view *view = CALLOC_STRUCT(nv50_sampler_view);
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ pipe_texture_reference(&view->pipe.texture, texture);
+ view->pipe.context = pipe;
+
+ if (!nv50_tex_construct(view)) {
+ nv50_sampler_view_destroy(pipe, &view->pipe);
+ return NULL;
+ }
+ return &view->pipe;
}
+
static void *
nv50_rasterizer_state_create(struct pipe_context *pipe,
const struct pipe_rasterizer_state *cso)
{
- struct nouveau_stateobj *so = so_new(15, 21, 0);
+ struct nouveau_stateobj *so = so_new(16, 22, 0);
struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
struct nv50_rasterizer_stateobj *rso =
CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -314,6 +351,9 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
* - point_sprite / sprite_coord_mode
*/
+ so_method(so, tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
+ so_data (so, cso->scissor);
+
so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
NV50TCL_SHADE_MODEL_SMOOTH);
@@ -720,15 +760,34 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
nv50->dirty |= NV50_NEW_ARRAYS;
}
+static void *
+nv50_vtxelts_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nv50_vtxelt_stateobj *cso = CALLOC_STRUCT(nv50_vtxelt_stateobj);
+
+ assert(num_elements < 16); /* not doing fallbacks yet */
+ cso->num_elements = num_elements;
+ memcpy(cso->pipe, elements, num_elements * sizeof(*elements));
+
+ nv50_vtxelt_construct(cso);
+
+ return (void *)cso;
+}
+
static void
-nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count,
- const struct pipe_vertex_element *ve)
+nv50_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso)
{
- struct nv50_context *nv50 = nv50_context(pipe);
+ FREE(hwcso);
+}
- memcpy(nv50->vtxelt, ve, sizeof(*ve) * count);
- nv50->vtxelt_nr = count;
+static void
+nv50_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ nv50->vtxelt = hwcso;
nv50->dirty |= NV50_NEW_ARRAYS;
}
@@ -743,8 +802,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
nv50->pipe.bind_vertex_sampler_states = nv50_vp_sampler_state_bind;
- nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
- nv50->pipe.set_vertex_sampler_textures = nv50_set_vp_sampler_textures;
+ nv50->pipe.set_fragment_sampler_views = nv50_set_fp_sampler_views;
+ nv50->pipe.set_vertex_sampler_views = nv50_set_vp_sampler_views;
+ nv50->pipe.create_sampler_view = nv50_create_sampler_view;
+ nv50->pipe.sampler_view_destroy = nv50_sampler_view_destroy;
nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
@@ -778,7 +839,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
nv50->pipe.set_scissor_state = nv50_set_scissor_state;
nv50->pipe.set_viewport_state = nv50_set_viewport_state;
+ nv50->pipe.create_vertex_elements_state = nv50_vtxelts_state_create;
+ nv50->pipe.delete_vertex_elements_state = nv50_vtxelts_state_delete;
+ nv50->pipe.bind_vertex_elements_state = nv50_vtxelts_state_bind;
+
nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers;
- nv50->pipe.set_vertex_elements = nv50_set_vertex_elements;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c974cc92dc..b7e355283c 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -25,8 +25,8 @@
#include "nv50_context.h"
#include "nouveau/nouveau_stateobj.h"
-static void
-nv50_state_validate_fb(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_fb(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nouveau_stateobj *so = so_new(32, 79, 18);
@@ -167,12 +167,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data (so, w << 16);
so_data (so, h << 16);
- /* we set scissors to framebuffer size when they're 'turned off' */
- nv50->dirty |= NV50_NEW_SCISSOR;
- so_ref(NULL, &nv50->state.scissor);
-
- so_ref(so, &nv50->state.fb);
- so_ref(NULL, &so);
+ return so;
}
static void
@@ -199,263 +194,254 @@ nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
}
}
-static void
-nv50_state_emit(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_blend(struct nv50_context *nv50)
{
- struct nv50_screen *screen = nv50->screen;
- struct nouveau_channel *chan = screen->base.channel;
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->blend->so, &so);
+ return so;
+}
- /* XXX: this is racy for multiple contexts active on separate
- * threads.
- */
- if (screen->cur_ctx != nv50) {
- if (nv50->state.fb)
- nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
- if (nv50->state.blend)
- nv50->state.dirty |= NV50_NEW_BLEND;
- if (nv50->state.zsa)
- nv50->state.dirty |= NV50_NEW_ZSA;
- if (nv50->state.vertprog)
- nv50->state.dirty |= NV50_NEW_VERTPROG;
- if (nv50->state.fragprog)
- nv50->state.dirty |= NV50_NEW_FRAGPROG;
- if (nv50->state.geomprog)
- nv50->state.dirty |= NV50_NEW_GEOMPROG;
- if (nv50->state.rast)
- nv50->state.dirty |= NV50_NEW_RASTERIZER;
- if (nv50->state.blend_colour)
- nv50->state.dirty |= NV50_NEW_BLEND_COLOUR;
- if (nv50->state.stencil_ref)
- nv50->state.dirty |= NV50_NEW_STENCIL_REF;
- if (nv50->state.stipple)
- nv50->state.dirty |= NV50_NEW_STIPPLE;
- if (nv50->state.scissor)
- nv50->state.dirty |= NV50_NEW_SCISSOR;
- if (nv50->state.viewport)
- nv50->state.dirty |= NV50_NEW_VIEWPORT;
- if (nv50->state.tsc_upload)
- nv50->state.dirty |= NV50_NEW_SAMPLER;
- if (nv50->state.tic_upload)
- nv50->state.dirty |= NV50_NEW_TEXTURE;
- if (nv50->state.vtxfmt && nv50->state.vtxbuf)
- nv50->state.dirty |= NV50_NEW_ARRAYS;
- screen->cur_ctx = nv50;
- }
+static struct nouveau_stateobj *
+validate_zsa(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->zsa->so, &so);
+ return so;
+}
- if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER)
- so_emit(chan, nv50->state.fb);
- if (nv50->state.dirty & NV50_NEW_BLEND)
- so_emit(chan, nv50->state.blend);
- if (nv50->state.dirty & NV50_NEW_ZSA)
- so_emit(chan, nv50->state.zsa);
- if (nv50->state.dirty & NV50_NEW_VERTPROG)
- so_emit(chan, nv50->state.vertprog);
- if (nv50->state.dirty & NV50_NEW_FRAGPROG)
- so_emit(chan, nv50->state.fragprog);
- if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog)
- so_emit(chan, nv50->state.geomprog);
- if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
- so_emit(chan, nv50->state.fp_linkage);
- if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG))
- && nv50->state.gp_linkage)
- so_emit(chan, nv50->state.gp_linkage);
- if (nv50->state.dirty & NV50_NEW_RASTERIZER)
- so_emit(chan, nv50->state.rast);
- if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
- so_emit(chan, nv50->state.blend_colour);
- if (nv50->state.dirty & NV50_NEW_STENCIL_REF)
- so_emit(chan, nv50->state.stencil_ref);
- if (nv50->state.dirty & NV50_NEW_STIPPLE)
- so_emit(chan, nv50->state.stipple);
- if (nv50->state.dirty & NV50_NEW_SCISSOR)
- so_emit(chan, nv50->state.scissor);
- if (nv50->state.dirty & NV50_NEW_VIEWPORT)
- so_emit(chan, nv50->state.viewport);
- if (nv50->state.dirty & NV50_NEW_SAMPLER)
- so_emit(chan, nv50->state.tsc_upload);
- if (nv50->state.dirty & NV50_NEW_TEXTURE)
- so_emit(chan, nv50->state.tic_upload);
- if (nv50->state.dirty & NV50_NEW_ARRAYS) {
- so_emit(chan, nv50->state.vtxfmt);
- so_emit(chan, nv50->state.vtxbuf);
- if (nv50->state.vtxattr)
- so_emit(chan, nv50->state.vtxattr);
- }
- nv50->state.dirty = 0;
+static struct nouveau_stateobj *
+validate_rast(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->rasterizer->so, &so);
+ return so;
+}
+
+static struct nouveau_stateobj *
+validate_blend_colour(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(1, 4, 0);
+
+ so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
+ so_data (so, fui(nv50->blend_colour.color[0]));
+ so_data (so, fui(nv50->blend_colour.color[1]));
+ so_data (so, fui(nv50->blend_colour.color[2]));
+ so_data (so, fui(nv50->blend_colour.color[3]));
+ return so;
}
-void
-nv50_state_flush_notify(struct nouveau_channel *chan)
+static struct nouveau_stateobj *
+validate_stencil_ref(struct nv50_context *nv50)
{
- struct nv50_context *nv50 = chan->user_private;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so = so_new(2, 2, 0);
- if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
- so_emit(chan, nv50->state.tic_upload);
+ so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
+ so_data (so, nv50->stencil_ref.ref_value[0]);
+ so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
+ so_data (so, nv50->stencil_ref.ref_value[1]);
+ return so;
+}
- so_emit_reloc_markers(chan, nv50->state.fb);
- so_emit_reloc_markers(chan, nv50->state.vertprog);
- so_emit_reloc_markers(chan, nv50->state.fragprog);
- so_emit_reloc_markers(chan, nv50->state.vtxbuf);
- so_emit_reloc_markers(chan, nv50->screen->static_init);
+static struct nouveau_stateobj *
+validate_stipple(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(1, 32, 0);
+ int i;
- if (nv50->state.instbuf)
- so_emit_reloc_markers(chan, nv50->state.instbuf);
+ so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ for (i = 0; i < 32; i++)
+ so_data(so, util_bswap32(nv50->stipple.stipple[i]));
+ return so;
}
-boolean
-nv50_state_validate(struct nv50_context *nv50)
+static struct nouveau_stateobj *
+validate_scissor(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct pipe_scissor_state *s = &nv50->scissor;
struct nouveau_stateobj *so;
- unsigned i;
-
- if (nv50->dirty & NV50_NEW_FRAMEBUFFER)
- nv50_state_validate_fb(nv50);
- if (nv50->dirty & NV50_NEW_BLEND)
- so_ref(nv50->blend->so, &nv50->state.blend);
+ so = so_new(1, 2, 0);
+ so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
+ so_data (so, (s->maxx << 16) | s->minx);
+ so_data (so, (s->maxy << 16) | s->miny);
+ return so;
+}
- if (nv50->dirty & NV50_NEW_ZSA)
- so_ref(nv50->zsa->so, &nv50->state.zsa);
+static struct nouveau_stateobj *
+validate_viewport(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so = so_new(5, 9, 0);
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
+ so_data (so, fui(nv50->viewport.translate[0]));
+ so_data (so, fui(nv50->viewport.translate[1]));
+ so_data (so, fui(nv50->viewport.translate[2]));
+ so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
+ so_data (so, fui(nv50->viewport.scale[0]));
+ so_data (so, fui(nv50->viewport.scale[1]));
+ so_data (so, fui(nv50->viewport.scale[2]));
+
+ so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
+ so_data (so, 1);
+ /* 0x0000 = remove whole primitive only (xyz)
+ * 0x1018 = remove whole primitive only (xy), clamp z
+ * 0x1080 = clip primitive (xyz)
+ * 0x1098 = clip primitive (xy), clamp z
+ */
+ so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
+ so_data (so, 0x1080);
+ /* no idea what 0f90 does */
+ so_method(so, tesla, 0x0f90, 1);
+ so_data (so, 0);
- if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB))
- nv50_vertprog_validate(nv50);
+ return so;
+}
- if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
- nv50_fragprog_validate(nv50);
+static struct nouveau_stateobj *
+validate_sampler(struct nv50_context *nv50)
+{
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nouveau_stateobj *so;
+ unsigned nr = 0, i;
- if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB))
- nv50_geomprog_validate(nv50);
+ for (i = 0; i < 3; ++i)
+ nr += nv50->sampler_nr[i];
- if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
- NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER))
- nv50_fp_linkage_validate(nv50);
+ so = so_new(1 + 5 * 3, 1 + 19 * 3 + nr * 8, 3 * 2);
- if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG))
- nv50_gp_linkage_validate(nv50);
+ nv50_validate_samplers(nv50, so, 0); /* VP */
+ nv50_validate_samplers(nv50, so, 2); /* FP */
- if (nv50->dirty & NV50_NEW_RASTERIZER)
- so_ref(nv50->rasterizer->so, &nv50->state.rast);
+ so_method(so, tesla, 0x1334, 1); /* flush TSC */
+ so_data (so, 0);
- if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
- so = so_new(1, 4, 0);
- so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
- so_data (so, fui(nv50->blend_colour.color[0]));
- so_data (so, fui(nv50->blend_colour.color[1]));
- so_data (so, fui(nv50->blend_colour.color[2]));
- so_data (so, fui(nv50->blend_colour.color[3]));
- so_ref(so, &nv50->state.blend_colour);
- so_ref(NULL, &so);
- }
+ return so;
+}
- if (nv50->dirty & NV50_NEW_STENCIL_REF) {
- so = so_new(2, 2, 0);
- so_method(so, tesla, NV50TCL_STENCIL_FRONT_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[0]);
- so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 1);
- so_data (so, nv50->stencil_ref.ref_value[1]);
- so_ref(so, &nv50->state.stencil_ref);
- so_ref(NULL, &so);
- }
+static struct nouveau_stateobj *
+validate_vtxbuf(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->state.vtxbuf, &so);
+ return so;
+}
- if (nv50->dirty & NV50_NEW_STIPPLE) {
- so = so_new(1, 32, 0);
- so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
- for (i = 0; i < 32; i++)
- so_data(so, util_bswap32(nv50->stipple.stipple[i]));
- so_ref(so, &nv50->state.stipple);
- so_ref(NULL, &so);
- }
+static struct nouveau_stateobj *
+validate_vtxattr(struct nv50_context *nv50)
+{
+ struct nouveau_stateobj *so = NULL;
+ so_ref(nv50->state.vtxattr, &so);
+ return so;
+}
- if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) {
- struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe;
- struct pipe_scissor_state *s = &nv50->scissor;
+struct state_validate {
+ struct nouveau_stateobj *(*func)(struct nv50_context *nv50);
+ unsigned states;
+} validate_list[] = {
+ { validate_fb , NV50_NEW_FRAMEBUFFER },
+ { validate_blend , NV50_NEW_BLEND },
+ { validate_zsa , NV50_NEW_ZSA },
+ { nv50_vertprog_validate , NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB },
+ { nv50_fragprog_validate , NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB },
+ { nv50_geomprog_validate , NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB },
+ { nv50_fp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG |
+ NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER },
+ { nv50_gp_linkage_validate, NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG },
+ { validate_rast , NV50_NEW_RASTERIZER },
+ { validate_blend_colour , NV50_NEW_BLEND_COLOUR },
+ { validate_stencil_ref , NV50_NEW_STENCIL_REF },
+ { validate_stipple , NV50_NEW_STIPPLE },
+ { validate_scissor , NV50_NEW_SCISSOR },
+ { validate_viewport , NV50_NEW_VIEWPORT },
+ { validate_sampler , NV50_NEW_SAMPLER },
+ { nv50_tex_validate , NV50_NEW_TEXTURE | NV50_NEW_SAMPLER },
+ { nv50_vbo_validate , NV50_NEW_ARRAYS },
+ { validate_vtxbuf , NV50_NEW_ARRAYS },
+ { validate_vtxattr , NV50_NEW_ARRAYS },
+ {}
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
- if (nv50->state.scissor &&
- (rast->scissor == 0 && nv50->state.scissor_enabled == 0))
- goto scissor_uptodate;
- nv50->state.scissor_enabled = rast->scissor;
+boolean
+nv50_state_validate(struct nv50_context *nv50, unsigned wait_dwords)
+{
+ struct nouveau_channel *chan = nv50->screen->base.channel;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ unsigned nr_relocs = 128, nr_dwords = wait_dwords + 128 + 4;
+ int ret, i;
- so = so_new(1, 2, 0);
- so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
- if (nv50->state.scissor_enabled) {
- so_data(so, (s->maxx << 16) | s->minx);
- so_data(so, (s->maxy << 16) | s->miny);
- } else {
- so_data(so, (nv50->framebuffer.width << 16));
- so_data(so, (nv50->framebuffer.height << 16));
- }
- so_ref(so, &nv50->state.scissor);
- so_ref(NULL, &so);
- nv50->state.dirty |= NV50_NEW_SCISSOR;
- }
-scissor_uptodate:
-
- if (nv50->dirty & (NV50_NEW_VIEWPORT | NV50_NEW_RASTERIZER)) {
- if (nv50->state.viewport &&
- !(nv50->dirty & NV50_NEW_VIEWPORT))
- goto viewport_uptodate;
-
- so = so_new(5, 9, 0);
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
- so_data (so, fui(nv50->viewport.translate[0]));
- so_data (so, fui(nv50->viewport.translate[1]));
- so_data (so, fui(nv50->viewport.translate[2]));
- so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
- so_data (so, fui(nv50->viewport.scale[0]));
- so_data (so, fui(nv50->viewport.scale[1]));
- so_data (so, fui(nv50->viewport.scale[2]));
-
- so_method(so, tesla, NV50TCL_VIEWPORT_TRANSFORM_EN, 1);
- so_data (so, 1);
- /* 0x0000 = remove whole primitive only (xyz)
- * 0x1018 = remove whole primitive only (xy), clamp z
- * 0x1080 = clip primitive (xyz)
- * 0x1098 = clip primitive (xy), clamp z
- */
- so_method(so, tesla, NV50TCL_VIEW_VOLUME_CLIP_CTRL, 1);
- so_data (so, 0x1080);
- /* no idea what 0f90 does */
- so_method(so, tesla, 0x0f90, 1);
- so_data (so, 0);
+ for (i = 0; i < validate_list_len; i++) {
+ struct state_validate *validate = &validate_list[i];
+ struct nouveau_stateobj *so;
- so_ref(so, &nv50->state.viewport);
- so_ref(NULL, &so);
- nv50->state.dirty |= NV50_NEW_VIEWPORT;
- }
-viewport_uptodate:
+ if (!(nv50->dirty & validate->states))
+ continue;
- if (nv50->dirty & NV50_NEW_SAMPLER) {
- unsigned nr = 0;
+ so = validate->func(nv50);
+ if (!so)
+ continue;
- for (i = 0; i < PIPE_SHADER_TYPES; ++i)
- nr += nv50->sampler_nr[i];
+ nr_dwords += (so->total + so->cur);
+ nr_relocs += so->cur_reloc;
- so = so_new(1 + 5 * PIPE_SHADER_TYPES,
- 1 + 19 * PIPE_SHADER_TYPES + nr * 8,
- PIPE_SHADER_TYPES * 2);
+ so_ref(so, &nv50->state.hw[i]);
+ so_ref(NULL, &so);
+ nv50->state.hw_dirty |= (1 << i);
+ }
+ nv50->dirty = 0;
- nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
- nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
+ if (nv50->screen->cur_ctx != nv50) {
+ for (i = 0; i < validate_list_len; i++) {
+ if (!nv50->state.hw[i] ||
+ (nv50->state.hw_dirty & (1 << i)))
+ continue;
- so_method(so, tesla, 0x1334, 1); /* flush TSC */
- so_data (so, 0);
+ nr_dwords += (nv50->state.hw[i]->total +
+ nv50->state.hw[i]->cur);
+ nr_relocs += nv50->state.hw[i]->cur_reloc;
+ nv50->state.hw_dirty |= (1 << i);
+ }
- so_ref(so, &nv50->state.tsc_upload);
- so_ref(NULL, &so);
+ nv50->screen->cur_ctx = nv50;
}
- if (nv50->dirty & (NV50_NEW_TEXTURE | NV50_NEW_SAMPLER))
- nv50_tex_validate(nv50);
+ ret = MARK_RING(chan, nr_dwords, nr_relocs);
+ if (ret) {
+ debug_printf("MARK_RING(%d, %d) failed: %d\n",
+ nr_dwords, nr_relocs, ret);
+ return FALSE;
+ }
- if (nv50->dirty & NV50_NEW_ARRAYS)
- nv50_vbo_validate(nv50);
+ while (nv50->state.hw_dirty) {
+ i = ffs(nv50->state.hw_dirty) - 1;
+ nv50->state.hw_dirty &= ~(1 << i);
- nv50->state.dirty |= nv50->dirty;
- nv50->dirty = 0;
- nv50_state_emit(nv50);
+ so_emit(chan, nv50->state.hw[i]);
+ }
+ /* Yes, really, we need to do this. If a buffer that is referenced
+ * on the hardware isn't part of changed state above, without doing
+ * this the kernel is given no clue that the buffer is being used
+ * still. This can cause all sorts of fun issues.
+ */
+ nv50_tex_relocs(nv50);
+ so_emit_reloc_markers(chan, nv50->state.hw[0]); /* fb */
+ so_emit_reloc_markers(chan, nv50->state.hw[3]); /* vp */
+ so_emit_reloc_markers(chan, nv50->state.hw[4]); /* fp */
+ so_emit_reloc_markers(chan, nv50->state.hw[17]); /* vb */
+ nv50_screen_relocs(nv50->screen);
+
+ /* No idea.. */
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, tesla, 0x142c, 1);
+ OUT_RING (chan, 0);
return TRUE;
}
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index cabd148bc5..6467c48a32 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -28,6 +28,7 @@
#include "util/u_inlines.h"
#include "util/u_tile.h"
+#include "util/u_format.h"
static INLINE int
nv50_format(enum pipe_format format)
@@ -37,10 +38,35 @@ nv50_format(enum pipe_format format)
return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM;
case PIPE_FORMAT_B8G8R8X8_UNORM:
return NV50_2D_DST_FORMAT_X8R8G8B8_UNORM;
+ case PIPE_FORMAT_B8G8R8A8_SRGB:
+ return NV50_2D_DST_FORMAT_A8R8G8B8_SRGB;
+ case PIPE_FORMAT_B8G8R8X8_SRGB:
+ return NV50_2D_DST_FORMAT_X8R8G8B8_SRGB;
case PIPE_FORMAT_B5G6R5_UNORM:
return NV50_2D_DST_FORMAT_R5G6B5_UNORM;
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ return NV50_2D_DST_FORMAT_A1R5G5B5_UNORM;
case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
return NV50_2D_DST_FORMAT_R8_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return NV50_2D_DST_FORMAT_R32G32B32A32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return NV50_2D_DST_FORMAT_R32G32B32X32_FLOAT;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return NV50_2D_DST_FORMAT_R32_FLOAT;
+
+ /* only because we require src format == dst format: */
+ case PIPE_FORMAT_R16G16_SNORM:
+ case PIPE_FORMAT_R16G16_UNORM:
+ case PIPE_FORMAT_S8Z24_UNORM:
+ case PIPE_FORMAT_Z24S8_UNORM:
+ return NV50_2D_DST_FORMAT_A8R8G8B8_UNORM;
+ case PIPE_FORMAT_L8A8_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ return NV50_2D_DST_FORMAT_R16_UNORM;
+
default:
return -1;
}
@@ -57,8 +83,11 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD);
format = nv50_format(ps->format);
- if (format < 0)
+ if (format < 0) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(ps->format));
return 1;
+ }
if (!bo->tile_flags) {
MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index de0560e20c..85ab947c00 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -24,30 +24,21 @@
#include "nv50_texture.h"
#include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_reloc.h"
#include "util/u_format.h"
#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f) \
-{ \
- PIPE_FORMAT_##pf, \
+[PIPE_FORMAT_##pf] = ( \
NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 | \
NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 | \
NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 | \
NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 | \
- NV50TIC_0_0_FMT_##f \
-}
+ NV50TIC_0_0_FMT_##f)
#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
-struct nv50_texture_format {
- enum pipe_format pf;
- uint32_t hw;
-};
-
-#define NV50_TEX_FORMAT_LIST_SIZE \
- (sizeof(nv50_tex_format_list) / sizeof(struct nv50_texture_format))
-
-static const struct nv50_texture_format nv50_tex_format_list[] =
+static const uint32_t nv50_texture_formats[PIPE_FORMAT_COUNT] =
{
_(B8G8R8A8_UNORM, UNORM, C2, C1, C0, C3, 8_8_8_8),
_(B8G8R8A8_SRGB, UNORM, C2, C1, C0, C3, 8_8_8_8),
@@ -59,10 +50,12 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
_(B5G6R5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
_(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
+ _(L8_SRGB, UNORM, C0, C0, C0, ONE, 8),
_(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
_(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
_(L8A8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
+ _(L8A8_SRGB, UNORM, C0, C0, C0, C1, 8_8),
_(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
_(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
@@ -80,149 +73,207 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
_(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
_MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
-
};
#undef _
#undef _MIXED
-static int
-nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
- struct nv50_miptree *mt, int unit, unsigned p)
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50TIC_0_0_MAPR_MASK) >> NV50TIC_0_0_MAPR_SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50TIC_0_0_MAPG_MASK) >> NV50TIC_0_0_MAPG_SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50TIC_0_0_MAPB_MASK) >> NV50TIC_0_0_MAPB_SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50TIC_0_0_MAPA_MASK) >> NV50TIC_0_0_MAPA_SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return 7;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return 0;
+ }
+}
+
+boolean
+nv50_tex_construct(struct nv50_sampler_view *view)
{
- unsigned i;
- uint32_t mode;
const struct util_format_description *desc;
+ struct nv50_miptree *mt = nv50_miptree(view->pipe.texture);
+ uint32_t swz[4], *tic = view->tic;
- for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++)
- if (nv50_tex_format_list[i].pf == mt->base.base.format)
- break;
- if (i == NV50_TEX_FORMAT_LIST_SIZE)
- return 1;
-
- if (nv50->sampler[p][unit]->normalized)
- mode = 0x50001000 | (1 << 31);
- else {
- mode = 0x50001000 | (7 << 14);
- assert(mt->base.base.target == PIPE_TEXTURE_2D);
- }
+ tic[0] = nv50_texture_formats[view->pipe.format];
- mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
- ((mt->base.bo->tile_mode & 0xf0) << 21);
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a);
+ view->tic[0] = (tic[0] & ~NV50TIC_0_0_SWIZZLE_MASK) |
+ (swz[0] << NV50TIC_0_0_MAPR_SHIFT) |
+ (swz[1] << NV50TIC_0_0_MAPG_SHIFT) |
+ (swz[2] << NV50TIC_0_0_MAPB_SHIFT) |
+ (swz[3] << NV50TIC_0_0_MAPA_SHIFT);
- desc = util_format_description(mt->base.base.format);
- assert(desc);
+ tic[2] = 0x50001000;
+ tic[2] |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
+ ((mt->base.bo->tile_mode & 0xf0) << 21);
+ desc = util_format_description(mt->base.base.format);
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- mode |= 0x0400;
+ tic[2] |= NV50TIC_0_2_COLORSPACE_SRGB;
switch (mt->base.base.target) {
case PIPE_TEXTURE_1D:
+ tic[2] |= NV50TIC_0_2_TARGET_1D;
break;
case PIPE_TEXTURE_2D:
- mode |= (1 << 14);
+ tic[2] |= NV50TIC_0_2_TARGET_2D;
break;
case PIPE_TEXTURE_3D:
- mode |= (2 << 14);
+ tic[2] |= NV50TIC_0_2_TARGET_3D;
break;
case PIPE_TEXTURE_CUBE:
- mode |= (3 << 14);
+ tic[2] |= NV50TIC_0_2_TARGET_CUBE;
break;
default:
- assert(!"unsupported texture target");
- break;
+ NOUVEAU_ERR("invalid texture target: %d\n",
+ mt->base.base.target);
+ return FALSE;
}
- so_data (so, nv50_tex_format_list[i].hw);
- so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
- NOUVEAU_BO_RD, 0, 0);
- so_data (so, mode);
- so_data (so, 0x00300000);
- so_data (so, mt->base.base.width0 | (1 << 31));
- so_data (so, (mt->base.base.last_level << 28) |
- (mt->base.base.depth0 << 16) | mt->base.base.height0);
- so_data (so, 0x03000000);
- so_data (so, mt->base.base.last_level << 4);
-
- return 0;
-}
+ tic[3] = 0x00300000;
+
+ tic[4] = (1 << 31) | mt->base.base.width0;
+ tic[5] = (mt->base.base.last_level << 28) |
+ (mt->base.base.depth0 << 16) | mt->base.base.height0;
-#ifndef NV50TCL_BIND_TIC
-#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
-#endif
+ tic[6] = 0x03000000;
-static boolean
+ tic[7] = (view->pipe.last_level << 4) | view->pipe.first_level;
+
+ return TRUE;
+}
+
+static int
nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
unsigned p)
{
- static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 };
-
struct nouveau_grobj *eng2d = nv50->screen->eng2d;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned unit, j, p_hw = p_remap[p];
+ unsigned unit, j;
+
+ const unsigned rll = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW;
+ const unsigned rlh = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH
+ | NOUVEAU_BO_OR;
nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
- p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
+ p * (32 * 8 * 4), nv50->sampler_view_nr[p] * 8 * 4);
- for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
- struct nv50_miptree *mt = nv50->miptree[p][unit];
+ for (unit = 0; unit < nv50->sampler_view_nr[p]; ++unit) {
+ struct nv50_sampler_view *view =
+ nv50_sampler_view(nv50->sampler_views[p][unit]);
so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
- if (mt) {
- if (nv50_tex_construct(nv50, so, mt, unit, p))
- return FALSE;
+ if (view) {
+ uint32_t tic2 = view->tic[2];
+ struct nv50_miptree *mt =
+ nv50_miptree(view->pipe.texture);
+
+ if (nv50->sampler[p][unit]->normalized)
+ tic2 |= NV50TIC_0_2_NORMALIZED_COORDS;
+
+ so_data (so, view->tic[0]);
+ so_reloc (so, mt->base.bo, 0, rll, 0, 0);
+ so_reloc (so, mt->base.bo, 0, rlh, tic2, tic2);
+ so_datap (so, &view->tic[3], 5);
+
/* Set TEX insn $t src binding $unit in program type p
* to TIC, TSC entry (32 * p + unit), mark valid (1).
*/
- so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
so_data (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
} else {
for (j = 0; j < 8; ++j)
so_data(so, 0);
- so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
so_data (so, (unit << 1) | 0);
}
}
- for (; unit < nv50->state.miptree_nr[p]; unit++) {
+ for (; unit < nv50->state.sampler_view_nr[p]; unit++) {
/* Make other bindings invalid. */
- so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+ so_method(so, tesla, NV50TCL_BIND_TIC(p), 1);
so_data (so, (unit << 1) | 0);
}
- nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+ nv50->state.sampler_view_nr[p] = nv50->sampler_view_nr[p];
return TRUE;
}
void
+nv50_tex_relocs(struct nv50_context *nv50)
+{
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
+ int p, unit;
+
+ p = PIPE_SHADER_FRAGMENT;
+ for (unit = 0; unit < nv50->sampler_view_nr[p]; unit++) {
+ struct pipe_sampler_view *view = nv50->sampler_views[p][unit];
+ if (!view)
+ continue;
+ nouveau_reloc_emit(chan, nv50->screen->tic,
+ ((p * 32) + unit) * 32, NULL,
+ nv50_miptree(view->texture)->base.bo, 0, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RD, 0, 0);
+ }
+
+ p = PIPE_SHADER_VERTEX;
+ for (unit = 0; unit < nv50->sampler_view_nr[p]; unit++) {
+ struct pipe_sampler_view *view = nv50->sampler_views[p][unit];
+ if (!view)
+ continue;
+ nouveau_reloc_emit(chan, nv50->screen->tic,
+ ((p * 32) + unit) * 32, NULL,
+ nv50_miptree(view->texture)->base.bo, 0, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_RD, 0, 0);
+ }
+}
+
+struct nouveau_stateobj *
nv50_tex_validate(struct nv50_context *nv50)
{
struct nouveau_stateobj *so;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned p, start, push, nrlc;
-
- for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
- start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
- push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
- nrlc += nv50->miptree_nr[p];
+ unsigned p, m = 0, d = 0, r = 0;
+
+ for (p = 0; p < 3; ++p) {
+ unsigned nr = MAX2(nv50->sampler_view_nr[p],
+ nv50->state.sampler_view_nr[p]);
+ m += nr;
+ d += nr;
+ r += nv50->sampler_view_nr[p];
}
- start = start * 2 + 4 * PIPE_SHADER_TYPES + 2;
- push = push * 9 + 19 * PIPE_SHADER_TYPES + 2;
- nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+ m = m * 2 + 3 * 4 + 1;
+ d = d * 9 + 3 * 19 + 1;
+ r = r * 2 + 3 * 2;
- so = so_new(start, push, nrlc);
+ so = so_new(m, d, r);
- if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
- nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+ if (nv50_validate_textures(nv50, so, 0) == FALSE ||
+ nv50_validate_textures(nv50, so, 2) == FALSE) {
so_ref(NULL, &so);
NOUVEAU_ERR("failed tex validate\n");
- return;
+ return NULL;
}
so_method(so, tesla, 0x1330, 1); /* flush TIC */
so_data (so, 0);
- so_ref(so, &nv50->state.tic_upload);
- so_ref(NULL, &so);
+ return so;
}
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index b870302019..3475d3e432 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -7,7 +7,9 @@
*/
/* Texture image control block */
+#define NV50TIC_0_0_SWIZZLE_MASK 0x3ffc0000
#define NV50TIC_0_0_MAPA_MASK 0x38000000
+#define NV50TIC_0_0_MAPA_SHIFT 27
#define NV50TIC_0_0_MAPA_ZERO 0x00000000
#define NV50TIC_0_0_MAPA_C0 0x10000000
#define NV50TIC_0_0_MAPA_C1 0x18000000
@@ -15,6 +17,7 @@
#define NV50TIC_0_0_MAPA_C3 0x28000000
#define NV50TIC_0_0_MAPA_ONE 0x38000000
#define NV50TIC_0_0_MAPB_MASK 0x07000000
+#define NV50TIC_0_0_MAPB_SHIFT 24
#define NV50TIC_0_0_MAPB_ZERO 0x00000000
#define NV50TIC_0_0_MAPB_C0 0x02000000
#define NV50TIC_0_0_MAPB_C1 0x03000000
@@ -22,6 +25,7 @@
#define NV50TIC_0_0_MAPB_C3 0x05000000
#define NV50TIC_0_0_MAPB_ONE 0x07000000
#define NV50TIC_0_0_MAPG_MASK 0x00e00000
+#define NV50TIC_0_0_MAPG_SHIFT 21
#define NV50TIC_0_0_MAPG_ZERO 0x00000000
#define NV50TIC_0_0_MAPG_C0 0x00400000
#define NV50TIC_0_0_MAPG_C1 0x00600000
@@ -29,6 +33,7 @@
#define NV50TIC_0_0_MAPG_C3 0x00a00000
#define NV50TIC_0_0_MAPG_ONE 0x00e00000
#define NV50TIC_0_0_MAPR_MASK 0x001c0000
+#define NV50TIC_0_0_MAPR_SHIFT 18
#define NV50TIC_0_0_MAPR_ZERO 0x00000000
#define NV50TIC_0_0_MAPR_C0 0x00080000
#define NV50TIC_0_0_MAPR_C1 0x000c0000
@@ -89,22 +94,39 @@
#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff
#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0
-#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff
+#define NV50TIC_0_2_COLORSPACE_SRGB 0x00000400
+#define NV50TIC_0_2_TARGET_1D 0x00000000
+#define NV50TIC_0_2_TARGET_2D 0x00004000
+#define NV50TIC_0_2_TARGET_3D 0x00008000
+#define NV50TIC_0_2_TARGET_CUBE 0x0000c000
+#define NV50TIC_0_2_TARGET_1D_ARRAY 0x00010000
+#define NV50TIC_0_2_TARGET_2D_ARRAY 0x00014000
+#define NV50TIC_0_2_TARGET_BUFFER 0x00018000
+#define NV50TIC_0_2_TARGET_RECT 0x0001c000
+/* #define NV50TIC_0_0_TILE_MODE_LINEAR 0x00040000 */
+#define NV50TIC_0_2_TILE_MODE_Y_MASK 0x01c00000
+#define NV50TIC_0_2_TILE_MODE_Y_SHIFT 22
+#define NV50TIC_0_2_TILE_MODE_Z_MASK 0x0e000000
+#define NV50TIC_0_2_TILE_MODE_Z_SHIFT 25
+#define NV50TIC_0_2_NORMALIZED_COORDS 0x80000000
#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff
#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff
#define NV50TIC_0_4_WIDTH_SHIFT 0
-#define NV50TIC_0_5_DEPTH_MASK 0xffff0000
+#define NV50TIC_0_5_LAST_LEVEL_MASK 0xf0000000
+#define NV50TIC_0_5_LAST_LEVEL_SHIFT 28
+#define NV50TIC_0_5_DEPTH_MASK 0x0fff0000
#define NV50TIC_0_5_DEPTH_SHIFT 16
#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff
#define NV50TIC_0_5_HEIGHT_SHIFT 0
-
#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff
-#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff
-#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0
+#define NV50TIC_0_7_BASE_LEVEL_MASK 0x0000000f
+#define NV50TIC_0_7_BASE_LEVEL_SHIFT 0
+#define NV50TIC_0_7_MAX_LEVEL_MASK 0x000000f0
+#define NV50TIC_0_7_MAX_LEVEL_SHIFT 4
/* Texture sampler control block */
#define NV50TSC_1_0_WRAPS_MASK 0x00000007
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 7c360e9e73..9eb223eca6 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -121,11 +121,12 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
}
static struct pipe_transfer *
-nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
+nv50_transfer_new(struct pipe_context *pcontext, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
enum pipe_transfer_usage usage,
unsigned x, unsigned y, unsigned w, unsigned h)
{
+ struct pipe_screen *pscreen = pcontext->screen;
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = nv50_miptree(pt);
struct nv50_miptree_level *lvl = &mt->level[level];
@@ -186,7 +187,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
}
static void
-nv50_transfer_del(struct pipe_transfer *ptx)
+nv50_transfer_del(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
struct nv50_miptree *mt = nv50_miptree(ptx->texture);
@@ -196,7 +197,7 @@ nv50_transfer_del(struct pipe_transfer *ptx)
unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height);
if (ptx->usage & PIPE_TRANSFER_WRITE) {
- struct pipe_screen *pscreen = pt->screen;
+ struct pipe_screen *pscreen = pcontext->screen;
nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
tx->base.stride, tx->bo->tile_mode,
@@ -218,7 +219,7 @@ nv50_transfer_del(struct pipe_transfer *ptx)
}
static void *
-nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
+nv50_transfer_map(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
unsigned flags = 0;
@@ -236,7 +237,7 @@ nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
}
static void
-nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
+nv50_transfer_unmap(struct pipe_context *pcontext, struct pipe_transfer *ptx)
{
struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
@@ -244,12 +245,12 @@ nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
}
void
-nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
+nv50_init_transfer_functions(struct nv50_context *nv50)
{
- pscreen->get_tex_transfer = nv50_transfer_new;
- pscreen->tex_transfer_destroy = nv50_transfer_del;
- pscreen->transfer_map = nv50_transfer_map;
- pscreen->transfer_unmap = nv50_transfer_unmap;
+ nv50->pipe.get_tex_transfer = nv50_transfer_new;
+ nv50->pipe.tex_transfer_destroy = nv50_transfer_del;
+ nv50->pipe.transfer_map = nv50_transfer_map;
+ nv50->pipe.transfer_unmap = nv50_transfer_unmap;
}
void
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 1c8ee0b9ad..5047286806 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -25,53 +25,9 @@
#include "util/u_inlines.h"
#include "util/u_format.h"
+#include "nouveau/nouveau_util.h"
#include "nv50_context.h"
-static boolean
-nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
-
-static boolean
-nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
-
-#define NV50_USING_LOATHED_EDGEFLAG(ctx) ((ctx)->vertprog->cfg.edgeflag_in < 16)
-
-static INLINE unsigned
-nv50_prim(unsigned mode)
-{
- switch (mode) {
- case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS;
- case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES;
- case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP;
- case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP;
- case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES;
- case PIPE_PRIM_TRIANGLE_STRIP:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP;
- case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN;
- case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS;
- case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP;
- case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON;
- case PIPE_PRIM_LINES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY;
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY;
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY;
- default:
- break;
- }
-
- NOUVEAU_ERR("invalid primitive type %d\n", mode);
- return NV50TCL_VERTEX_BEGIN_POINTS;
-}
-
static INLINE uint32_t
nv50_vbo_type_to_hw(enum pipe_format format)
{
@@ -139,15 +95,16 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
uint32_t hw_type, hw_size;
enum pipe_format pf = ve->src_format;
const struct util_format_description *desc;
- unsigned size;
+ unsigned size, nr_components;
desc = util_format_description(pf);
assert(desc);
size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+ nr_components = util_format_get_nr_components(pf);
hw_type = nv50_vbo_type_to_hw(pf);
- hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
+ hw_size = nv50_vbo_size_to_hw(size, nr_components);
if (!hw_type || !hw_size) {
NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf));
@@ -161,250 +118,58 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
return (hw_type | hw_size);
}
-/* For instanced drawing from user buffers, hitting the FIFO repeatedly
- * with the same vertex data is probably worse than uploading all data.
- */
-static boolean
-nv50_upload_vtxbuf(struct nv50_context *nv50, unsigned i)
-{
- struct nv50_screen *nscreen = nv50->screen;
- struct pipe_screen *pscreen = &nscreen->base.base;
- struct pipe_buffer *buf = nscreen->strm_vbuf[i];
- struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- uint8_t *src;
- unsigned size = align(vb->buffer->size, 4096);
-
- if (buf && buf->size < size)
- pipe_buffer_reference(&nscreen->strm_vbuf[i], NULL);
-
- if (!nscreen->strm_vbuf[i]) {
- nscreen->strm_vbuf[i] = pipe_buffer_create(
- pscreen, 0, PIPE_BUFFER_USAGE_VERTEX, size);
- buf = nscreen->strm_vbuf[i];
- }
-
- src = pipe_buffer_map(pscreen, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ);
- if (!src)
- return FALSE;
- src += vb->buffer_offset;
-
- size = (vb->max_index + 1) * vb->stride + 16; /* + 16 is for stride 0 */
- if (vb->buffer_offset + size > vb->buffer->size)
- size = vb->buffer->size - vb->buffer_offset;
-
- pipe_buffer_write(pscreen, buf, vb->buffer_offset, size, src);
- pipe_buffer_unmap(pscreen, vb->buffer);
-
- vb->buffer = buf; /* don't pipe_reference, this is a private copy */
- return TRUE;
-}
-
-static void
-nv50_upload_user_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- if (nv50->vbo_fifo)
- nv50->dirty |= NV50_NEW_ARRAYS;
- if (!(nv50->dirty & NV50_NEW_ARRAYS))
- return;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i) {
- if (nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX)
- continue;
- nv50_upload_vtxbuf(nv50, i);
- }
-}
-
-static void
-nv50_set_static_vtxattr(struct nv50_context *nv50, unsigned i, void *data)
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- float v[4];
-
- util_format_read_4f(nv50->vtxelt[i].src_format,
- v, 0, data, 0, 0, 0, 1, 1);
-
- switch (nv50->vtxelt[i].nr_components) {
- case 4:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_4F_X(i), 4);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- OUT_RINGf (chan, v[2]);
- OUT_RINGf (chan, v[3]);
- break;
- case 3:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_3F_X(i), 3);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- OUT_RINGf (chan, v[2]);
- break;
- case 2:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_2F_X(i), 2);
- OUT_RINGf (chan, v[0]);
- OUT_RINGf (chan, v[1]);
- break;
- case 1:
- BEGIN_RING(chan, tesla, NV50TCL_VTX_ATTR_1F(i), 1);
- OUT_RINGf (chan, v[0]);
- break;
- default:
- assert(0);
- break;
- }
-}
-
-static unsigned
-init_per_instance_arrays_immd(struct nv50_context *nv50,
- unsigned startInstance,
- unsigned pos[16], unsigned step[16])
-{
- struct nouveau_bo *bo;
- unsigned i, b, count = 0;
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- ++count;
- b = nv50->vtxelt[i].vertex_buffer_index;
-
- pos[i] = nv50->vtxelt[i].src_offset +
- nv50->vtxbuf[b].buffer_offset +
- startInstance * nv50->vtxbuf[b].stride;
- step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
- if (!bo->map)
- nouveau_bo_map(bo, NOUVEAU_BO_RD);
-
- nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
- }
-
- return count;
-}
-
-static unsigned
-init_per_instance_arrays(struct nv50_context *nv50,
- unsigned startInstance,
- unsigned pos[16], unsigned step[16])
-{
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
+struct instance {
struct nouveau_bo *bo;
- struct nouveau_stateobj *so;
- unsigned i, b, count = 0;
- const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
- if (nv50->vbo_fifo)
- return init_per_instance_arrays_immd(nv50, startInstance,
- pos, step);
-
- so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- ++count;
- b = nv50->vtxelt[i].vertex_buffer_index;
-
- pos[i] = nv50->vtxelt[i].src_offset +
- nv50->vtxbuf[b].buffer_offset +
- startInstance * nv50->vtxbuf[b].stride;
-
- if (!startInstance) {
- step[i] = 0;
- continue;
- }
- step[i] = startInstance % nv50->vtxelt[i].instance_divisor;
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
- so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
- }
-
- if (count && startInstance) {
- so_ref (so, &nv50->state.instbuf); /* for flush notify */
- so_emit(chan, nv50->state.instbuf);
- }
- so_ref (NULL, &so);
-
- return count;
-}
+ unsigned delta;
+ unsigned stride;
+ unsigned step;
+ unsigned divisor;
+};
static void
-step_per_instance_arrays_immd(struct nv50_context *nv50,
- unsigned pos[16], unsigned step[16])
+instance_init(struct nv50_context *nv50, struct instance *a, unsigned first)
{
- struct nouveau_bo *bo;
- unsigned i, b;
+ int i;
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
- continue;
- if (++step[i] != nv50->vtxelt[i].instance_divisor)
- continue;
- b = nv50->vtxelt[i].vertex_buffer_index;
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
+ struct pipe_vertex_buffer *vb;
- step[i] = 0;
- pos[i] += nv50->vtxbuf[b].stride;
+ a[i].divisor = ve->instance_divisor;
+ if (a[i].divisor) {
+ vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- nv50_set_static_vtxattr(nv50, i, (uint8_t *)bo->map + pos[i]);
+ a[i].bo = nouveau_bo(vb->buffer);
+ a[i].stride = vb->stride;
+ a[i].step = first % a[i].divisor;
+ a[i].delta = vb->buffer_offset + ve->src_offset +
+ (first * a[i].stride);
+ }
}
}
static void
-step_per_instance_arrays(struct nv50_context *nv50,
- unsigned pos[16], unsigned step[16])
+instance_step(struct nv50_context *nv50, struct instance *a)
{
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- struct nouveau_bo *bo;
- struct nouveau_stateobj *so;
- unsigned i, b;
- const uint32_t rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
-
- if (nv50->vbo_fifo) {
- step_per_instance_arrays_immd(nv50, pos, step);
- return;
- }
-
- so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 2, nv50->vtxelt_nr * 2);
+ int i;
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- if (!nv50->vtxelt[i].instance_divisor)
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ if (!a[i].divisor)
continue;
- b = nv50->vtxelt[i].vertex_buffer_index;
- if (++step[i] == nv50->vtxelt[i].instance_divisor) {
- step[i] = 0;
- pos[i] += nv50->vtxbuf[b].stride;
+ BEGIN_RING(chan, tesla,
+ NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
+ OUT_RELOCh(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ OUT_RELOCl(chan, a[i].bo, a[i].delta, NOUVEAU_BO_RD |
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_GART);
+ if (++a[i].step == a[i].divisor) {
+ a[i].step = 0;
+ a[i].delta += a[i].stride;
}
-
- bo = nouveau_bo(nv50->vtxbuf[b].buffer);
-
- so_method(so, tesla, NV50TCL_VERTEX_ARRAY_START_HIGH(i), 2);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, bo, pos[i], rl | NOUVEAU_BO_LOW, 0, 0);
}
-
- so_ref (so, &nv50->state.instbuf); /* for flush notify */
- so_ref (NULL, &so);
-
- so_emit(chan, nv50->state.instbuf);
-}
-
-static INLINE void
-nv50_unmap_vbufs(struct nv50_context *nv50)
-{
- unsigned i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
- if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
}
void
@@ -415,198 +180,207 @@ nv50_draw_arrays_instanced(struct pipe_context *pipe,
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- unsigned i, nz_divisors;
- unsigned step[16], pos[16];
-
- if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50_upload_user_vbufs(nv50);
+ struct instance a[16];
+ unsigned prim = nv50_prim(mode);
- nv50_state_validate(nv50);
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, 10 + 16*3))
+ return;
- nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+ if (nv50->vbo_fifo) {
+ nv50_push_elements_instanced(pipe, NULL, 0, mode, start,
+ count, startInstance,
+ instanceCount);
+ return;
+ }
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
OUT_RING (chan, NV50_CB_AUX | (24 << 8));
OUT_RING (chan, startInstance);
+ while (instanceCount--) {
+ if (AVAIL_RING(chan) < (7 + 16*3)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, 7 + 16*3)) {
+ assert(0);
+ return;
+ }
+ }
+ instance_step(nv50, a);
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (nv50->vbo_fifo)
- nv50_push_arrays(nv50, start, count);
- else {
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, prim);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
OUT_RING (chan, start);
OUT_RING (chan, count);
- }
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- for (i = 1; i < instanceCount; i++) {
- if (nz_divisors) /* any non-zero array divisors ? */
- step_per_instance_arrays(nv50, pos, step);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode) | (1 << 28));
-
- if (nv50->vbo_fifo)
- nv50_push_arrays(nv50, start, count);
- else {
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
- }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- }
- nv50_unmap_vbufs(nv50);
- so_ref(NULL, &nv50->state.instbuf);
+ prim |= (1 << 28);
+ }
}
void
nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- boolean ret;
-
- nv50_state_validate(nv50);
-
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (nv50->vbo_fifo)
- ret = nv50_push_arrays(nv50, start, count);
- else {
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
- OUT_RING (chan, start);
- OUT_RING (chan, count);
- ret = TRUE;
- }
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- nv50_unmap_vbufs(nv50);
-
- /* XXX: not sure what to do if ret != TRUE: flush and retry?
- */
- assert(ret);
+ nv50_draw_arrays_instanced(pipe, mode, start, count, 0, 1);
}
-static INLINE boolean
-nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
- unsigned start, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
-
- map += start;
+struct inline_ctx {
+ struct nv50_context *nv50;
+ void *map;
+};
- if (nv50->vbo_fifo)
- return nv50_push_elements_u08(nv50, map, count);
+static void
+inline_elt08(void *priv, unsigned start, unsigned count)
+{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ uint8_t *map = (uint8_t *)ctx->map + start;
if (count & 1) {
BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
map++;
- count--;
+ count &= ~1;
}
- while (count) {
- unsigned nr = count > 2046 ? 2046 : count;
- int i;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
- for (i = 0; i < nr; i += 2)
- OUT_RING (chan, (map[i + 1] << 16) | map[i]);
+ count >>= 1;
+ if (!count)
+ return;
- count -= nr;
- map += nr;
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+ while (count--) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
}
- return TRUE;
}
-static INLINE boolean
-nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
- unsigned start, unsigned count)
+static void
+inline_elt16(void *priv, unsigned start, unsigned count)
{
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
-
- map += start;
-
- if (nv50->vbo_fifo)
- return nv50_push_elements_u16(nv50, map, count);
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+ uint16_t *map = (uint16_t *)ctx->map + start;
if (count & 1) {
BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
OUT_RING (chan, map[0]);
+ count &= ~1;
map++;
- count--;
}
- while (count) {
- unsigned nr = count > 2046 ? 2046 : count;
- int i;
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, nr >> 1);
- for (i = 0; i < nr; i += 2)
- OUT_RING (chan, (map[i + 1] << 16) | map[i]);
+ count >>= 1;
+ if (!count)
+ return;
- count -= nr;
- map += nr;
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U16, count);
+ while (count--) {
+ OUT_RING(chan, (map[1] << 16) | map[0]);
+ map += 2;
}
- return TRUE;
}
-static INLINE boolean
-nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
- unsigned start, unsigned count)
+static void
+inline_elt32(void *priv, unsigned start, unsigned count)
+{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, count);
+ OUT_RINGp (chan, (uint32_t *)ctx->map + start, count);
+}
+
+static void
+inline_edgeflag(void *priv, boolean enabled)
{
+ struct inline_ctx *ctx = priv;
+ struct nouveau_grobj *tesla = ctx->nv50->screen->tesla;
+ struct nouveau_channel *chan = tesla->channel;
+
+ BEGIN_RING(chan, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
+ OUT_RING (chan, enabled ? 1 : 0);
+}
+
+static void
+nv50_draw_elements_inline(struct pipe_context *pipe,
+ struct pipe_buffer *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned startInstance, unsigned instanceCount)
+{
+ struct pipe_screen *pscreen = pipe->screen;
+ struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct instance a[16];
+ struct inline_ctx ctx;
+ struct u_split_prim s;
+ boolean nzi = FALSE;
+ unsigned overhead;
+
+ overhead = 16*3; /* potential instance adjustments */
+ overhead += 4; /* Begin()/End() */
+ overhead += 4; /* potential edgeflag disable/reenable */
+ overhead += 3; /* potentially 3 VTX_ELT_U16/U32 packet headers */
+
+ s.priv = &ctx;
+ if (indexSize == 1)
+ s.emit = inline_elt08;
+ else
+ if (indexSize == 2)
+ s.emit = inline_elt16;
+ else
+ s.emit = inline_elt32;
+ s.edge = inline_edgeflag;
+
+ ctx.nv50 = nv50;
+ ctx.map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
+ assert(ctx.map);
+ if (!ctx.map)
+ return;
- map += start;
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, overhead + 6 + 3))
+ return;
- if (nv50->vbo_fifo)
- return nv50_push_elements_u32(nv50, map, count);
+ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
+ OUT_RING (chan, NV50_CB_AUX | (24 << 8));
+ OUT_RING (chan, startInstance);
+ while (instanceCount--) {
+ unsigned max_verts;
+ boolean done;
+
+ u_split_prim_init(&s, mode, start, count);
+ do {
+ if (AVAIL_RING(chan) < (overhead + 6)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, (overhead + 6))) {
+ assert(0);
+ return;
+ }
+ }
- while (count) {
- unsigned nr = count > 2047 ? 2047 : count;
+ max_verts = AVAIL_RING(chan) - overhead;
+ if (max_verts > 2047)
+ max_verts = 2047;
+ if (indexSize != 4)
+ max_verts <<= 1;
+ instance_step(nv50, a);
- BEGIN_RING_NI(chan, tesla, NV50TCL_VB_ELEMENT_U32, nr);
- OUT_RINGp (chan, map, nr);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
+ OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1<<28) : 0));
+ done = u_split_prim_next(&s, max_verts);
+ BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
+ OUT_RING (chan, 0);
+ } while (!done);
- count -= nr;
- map += nr;
+ nzi = TRUE;
}
- return TRUE;
-}
-static INLINE void
-nv50_draw_elements_inline(struct nv50_context *nv50,
- void *map, unsigned indexSize,
- unsigned start, unsigned count)
-{
- switch (indexSize) {
- case 1:
- nv50_draw_elements_inline_u08(nv50, map, start, count);
- break;
- case 2:
- nv50_draw_elements_inline_u16(nv50, map, start, count);
- break;
- case 4:
- nv50_draw_elements_inline_u32(nv50, map, start, count);
- break;
- }
+ pipe_buffer_unmap(pscreen, indexBuffer);
}
void
@@ -617,49 +391,68 @@ nv50_draw_elements_instanced(struct pipe_context *pipe,
unsigned startInstance, unsigned instanceCount)
{
struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_channel *chan = nv50->screen->tesla->channel;
struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nouveau_channel *chan = tesla->channel;
- struct pipe_screen *pscreen = pipe->screen;
- void *map;
- unsigned i, nz_divisors;
- unsigned step[16], pos[16];
+ struct instance a[16];
+ unsigned prim = nv50_prim(mode);
- map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
-
- if (!NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50_upload_user_vbufs(nv50);
-
- nv50_state_validate(nv50);
+ instance_init(nv50, a, startInstance);
+ if (!nv50_state_validate(nv50, 13 + 16*3))
+ return;
- nz_divisors = init_per_instance_arrays(nv50, startInstance, pos, step);
+ if (nv50->vbo_fifo) {
+ nv50_push_elements_instanced(pipe, indexBuffer, indexSize,
+ mode, start, count, startInstance,
+ instanceCount);
+ return;
+ } else
+ if (!(indexBuffer->usage & PIPE_BUFFER_USAGE_INDEX) || indexSize == 1) {
+ nv50_draw_elements_inline(pipe, indexBuffer, indexSize,
+ mode, start, count, startInstance,
+ instanceCount);
+ return;
+ }
BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2);
OUT_RING (chan, NV50_CB_AUX | (24 << 8));
OUT_RING (chan, startInstance);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
-
- for (i = 1; i < instanceCount; ++i) {
- if (nz_divisors) /* any non-zero array divisors ? */
- step_per_instance_arrays(nv50, pos, step);
+ while (instanceCount--) {
+ if (AVAIL_RING(chan) < (7 + 16*3)) {
+ FIRE_RING(chan);
+ if (!nv50_state_validate(nv50, 10 + 16*3)) {
+ assert(0);
+ return;
+ }
+ }
+ instance_step(nv50, a);
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode) | (1 << 28));
-
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
-
+ OUT_RING (chan, prim);
+ if (indexSize == 4) {
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
+ OUT_RING (chan, count);
+ nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+ start << 2, count << 2);
+ } else
+ if (indexSize == 2) {
+ unsigned vb_start = (start & ~1);
+ unsigned vb_end = (start + count + 1) & ~1;
+ unsigned dwords = (vb_end - vb_start) >> 1;
+
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+ OUT_RING (chan, ((start & 1) << 31) | count);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
+ OUT_RING (chan, dwords);
+ nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
+ vb_start << 1, dwords << 2);
+ BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
+ OUT_RING (chan, 0);
+ }
BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
OUT_RING (chan, 0);
- }
- nv50_unmap_vbufs(nv50);
- so_ref(NULL, &nv50->state.instbuf);
+ prim |= (1 << 28);
+ }
}
void
@@ -667,51 +460,8 @@ nv50_draw_elements(struct pipe_context *pipe,
struct pipe_buffer *indexBuffer, unsigned indexSize,
unsigned mode, unsigned start, unsigned count)
{
- struct nv50_context *nv50 = nv50_context(pipe);
- struct nouveau_channel *chan = nv50->screen->tesla->channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct pipe_screen *pscreen = pipe->screen;
- void *map;
-
- nv50_state_validate(nv50);
-
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
- BEGIN_RING(chan, tesla, 0x142c, 1);
- OUT_RING (chan, 0);
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
- OUT_RING (chan, nv50_prim(mode));
-
- if (!nv50->vbo_fifo && indexSize == 4) {
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x30000, 0);
- OUT_RING (chan, count);
- nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
- start << 2, count << 2);
- } else
- if (!nv50->vbo_fifo && indexSize == 2) {
- unsigned vb_start = (start & ~1);
- unsigned vb_end = (start + count + 1) & ~1;
- unsigned dwords = (vb_end - vb_start) >> 1;
-
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, ((start & 1) << 31) | count);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x30000, 0);
- OUT_RING (chan, dwords);
- nouveau_pushbuf_submit(chan, nouveau_bo(indexBuffer),
- vb_start << 1, dwords << 2);
- BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16_SETUP, 1);
- OUT_RING (chan, 0);
- } else {
- map = pipe_buffer_map(pscreen, indexBuffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- nv50_draw_elements_inline(nv50, map, indexSize, start, count);
- nv50_unmap_vbufs(nv50);
- pipe_buffer_unmap(pscreen, indexBuffer);
- }
-
- BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
- OUT_RING (chan, 0);
+ nv50_draw_elements_instanced(pipe, indexBuffer, indexSize,
+ mode, start, count, 0, 1);
}
static INLINE boolean
@@ -726,6 +476,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
float v[4];
int ret;
+ unsigned nr_components = util_format_get_nr_components(ve->src_format);
ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
if (ret)
@@ -736,9 +487,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
0, 0, 1, 1);
so = *pso;
if (!so)
- *pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
+ *pso = so = so_new(nv50->vtxelt->num_elements,
+ nv50->vtxelt->num_elements * 4, 0);
- switch (ve->nr_components) {
+ switch (nr_components) {
case 4:
so_method(so, tesla, NV50TCL_VTX_ATTR_4F_X(attrib), 4);
so_data (so, fui(v[0]));
@@ -775,6 +527,18 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
}
void
+nv50_vtxelt_construct(struct nv50_vtxelt_stateobj *cso)
+{
+ unsigned i;
+
+ for (i = 0; i < cso->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &cso->pipe[i];
+
+ cso->hw[i] = nv50_vbo_vtxelt_to_hw(ve);
+ }
+}
+
+struct nouveau_stateobj *
nv50_vbo_validate(struct nv50_context *nv50)
{
struct nouveau_grobj *tesla = nv50->screen->tesla;
@@ -783,30 +547,32 @@ nv50_vbo_validate(struct nv50_context *nv50)
/* don't validate if Gallium took away our buffers */
if (nv50->vtxbuf_nr == 0)
- return;
+ return NULL;
+
nv50->vbo_fifo = 0;
+ if (nv50->screen->force_push ||
+ nv50->vertprog->cfg.edgeflag_in < 16)
+ nv50->vbo_fifo = 0xffff;
- for (i = 0; i < nv50->vtxbuf_nr; ++i)
+ for (i = 0; i < nv50->vtxbuf_nr; i++) {
if (nv50->vtxbuf[i].stride &&
!(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
nv50->vbo_fifo = 0xffff;
+ }
- if (NV50_USING_LOATHED_EDGEFLAG(nv50))
- nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
-
- n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
+ n_ve = MAX2(nv50->vtxelt->num_elements, nv50->state.vtxelt_nr);
vtxattr = NULL;
- vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+ vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt->num_elements * 4);
vtxfmt = so_new(1, n_ve, 0);
so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
- for (i = 0; i < nv50->vtxelt_nr; i++) {
- struct pipe_vertex_element *ve = &nv50->vtxelt[i];
+ for (i = 0; i < nv50->vtxelt->num_elements; i++) {
+ struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i];
struct pipe_vertex_buffer *vb =
&nv50->vtxbuf[ve->vertex_buffer_index];
struct nouveau_bo *bo = nouveau_bo(vb->buffer);
- uint32_t hw = nv50_vbo_vtxelt_to_hw(ve);
+ uint32_t hw = nv50->vtxelt->hw[i];
if (!vb->stride &&
nv50_vbo_static_attrib(nv50, i, &vtxattr, ve, vb)) {
@@ -821,13 +587,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
}
if (nv50->vbo_fifo) {
- so_data (vtxfmt, hw |
- (ve->instance_divisor ? (1 << 4) : i));
+ so_data (vtxfmt, hw | (ve->instance_divisor ? (1 << 4) : i));
so_method(vtxbuf, tesla,
NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
continue;
}
+
so_data(vtxfmt, hw | i);
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
@@ -855,355 +621,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
so_data (vtxbuf, 0);
}
- nv50->state.vtxelt_nr = nv50->vtxelt_nr;
+ nv50->state.vtxelt_nr = nv50->vtxelt->num_elements;
- so_ref (vtxfmt, &nv50->state.vtxfmt);
so_ref (vtxbuf, &nv50->state.vtxbuf);
so_ref (vtxattr, &nv50->state.vtxattr);
so_ref (NULL, &vtxbuf);
- so_ref (NULL, &vtxfmt);
so_ref (NULL, &vtxattr);
+ return vtxfmt;
}
-typedef void (*pfn_push)(struct nouveau_channel *, void *);
-
-struct nv50_vbo_emitctx
-{
- pfn_push push[16];
- uint8_t *map[16];
- unsigned stride[16];
- unsigned nr_ve;
- unsigned vtx_dwords;
- unsigned vtx_max;
-
- float edgeflag;
- unsigned ve_edgeflag;
-};
-
-static INLINE void
-emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
-{
- unsigned i;
-
- for (i = 0; i < emit->nr_ve; ++i) {
- emit->push[i](chan, emit->map[i]);
- emit->map[i] += emit->stride[i];
- }
-}
-
-static INLINE void
-emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
- uint32_t vi)
-{
- unsigned i;
-
- for (i = 0; i < emit->nr_ve; ++i)
- emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
-}
-
-static INLINE boolean
-nv50_map_vbufs(struct nv50_context *nv50)
-{
- int i;
-
- for (i = 0; i < nv50->vtxbuf_nr; ++i) {
- struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
- unsigned size = vb->stride * (vb->max_index + 1) + 16;
-
- if (nouveau_bo(vb->buffer)->map)
- continue;
-
- size = vb->stride * (vb->max_index + 1) + 16;
- size = MIN2(size, vb->buffer->size);
- if (!size)
- size = vb->buffer->size;
-
- if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
- 0, size, NOUVEAU_BO_RD))
- break;
- }
-
- if (i == nv50->vtxbuf_nr)
- return TRUE;
- for (; i >= 0; --i)
- nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
- return FALSE;
-}
-
-static void
-emit_b32_1(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b32_2(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
-}
-
-static void
-emit_b32_3(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b32_4(struct nouveau_channel *chan, void *data)
-{
- uint32_t *v = data;
-
- OUT_RING(chan, v[0]);
- OUT_RING(chan, v[1]);
- OUT_RING(chan, v[2]);
- OUT_RING(chan, v[3]);
-}
-
-static void
-emit_b16_1(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b16_3(struct nouveau_channel *chan, void *data)
-{
- uint16_t *v = data;
-
- OUT_RING(chan, (v[1] << 16) | v[0]);
- OUT_RING(chan, v[2]);
-}
-
-static void
-emit_b08_1(struct nouveau_channel *chan, void *data)
-{
- uint8_t *v = data;
-
- OUT_RING(chan, v[0]);
-}
-
-static void
-emit_b08_3(struct nouveau_channel *chan, void *data)
-{
- uint8_t *v = data;
-
- OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
-}
-
-static boolean
-emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
- unsigned start)
-{
- unsigned i;
-
- if (nv50_map_vbufs(nv50) == FALSE)
- return FALSE;
-
- emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
-
- emit->edgeflag = 0.5f;
- emit->nr_ve = 0;
- emit->vtx_dwords = 0;
-
- for (i = 0; i < nv50->vtxelt_nr; ++i) {
- struct pipe_vertex_element *ve;
- struct pipe_vertex_buffer *vb;
- unsigned n, size;
- const struct util_format_description *desc;
-
- ve = &nv50->vtxelt[i];
- vb = &nv50->vtxbuf[ve->vertex_buffer_index];
- if (!(nv50->vbo_fifo & (1 << i)) || ve->instance_divisor)
- continue;
- n = emit->nr_ve++;
-
- emit->stride[n] = vb->stride;
- emit->map[n] = (uint8_t *)nouveau_bo(vb->buffer)->map +
- vb->buffer_offset +
- (start * vb->stride + ve->src_offset);
-
- desc = util_format_description(ve->src_format);
- assert(desc);
-
- size = util_format_get_component_bits(
- ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
-
- assert(ve->nr_components > 0 && ve->nr_components <= 4);
-
- /* It shouldn't be necessary to push the implicit 1s
- * for case 3 and size 8 cases 1, 2, 3.
- */
- switch (size) {
- default:
- NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
- return FALSE;
- case 32:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b32_1; break;
- case 2: emit->push[n] = emit_b32_2; break;
- case 3: emit->push[n] = emit_b32_3; break;
- case 4: emit->push[n] = emit_b32_4; break;
- }
- emit->vtx_dwords += ve->nr_components;
- break;
- case 16:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b16_1; break;
- case 2: emit->push[n] = emit_b32_1; break;
- case 3: emit->push[n] = emit_b16_3; break;
- case 4: emit->push[n] = emit_b32_2; break;
- }
- emit->vtx_dwords += (ve->nr_components + 1) >> 1;
- break;
- case 8:
- switch (ve->nr_components) {
- case 1: emit->push[n] = emit_b08_1; break;
- case 2: emit->push[n] = emit_b16_1; break;
- case 3: emit->push[n] = emit_b08_3; break;
- case 4: emit->push[n] = emit_b32_1; break;
- }
- emit->vtx_dwords += 1;
- break;
- }
- }
-
- emit->vtx_max = 512 / emit->vtx_dwords;
- if (emit->ve_edgeflag < 16)
- emit->vtx_max = 1;
-
- return TRUE;
-}
-
-static INLINE void
-set_edgeflag(struct nouveau_channel *chan,
- struct nouveau_grobj *tesla,
- struct nv50_vbo_emitctx *emit, uint32_t index)
-{
- unsigned i = emit->ve_edgeflag;
-
- if (i < 16) {
- float f = *((float *)(emit->map[i] + index * emit->stride[i]));
-
- if (emit->edgeflag != f) {
- emit->edgeflag = f;
-
- BEGIN_RING(chan, tesla, 0x15e4, 1);
- OUT_RING (chan, f ? 1 : 0);
- }
- }
-}
-
-static boolean
-nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
- if (emit_prepare(nv50, &emit, start) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx_next(chan, &emit);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}
-
-static boolean
-nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
-{
- struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
- struct nv50_vbo_emitctx emit;
-
- if (emit_prepare(nv50, &emit, 0) == FALSE)
- return FALSE;
-
- while (count) {
- unsigned i, dw, nr = MIN2(count, emit.vtx_max);
- dw = nr * emit.vtx_dwords;
-
- set_edgeflag(chan, tesla, &emit, *map);
-
- BEGIN_RING_NI(chan, tesla, NV50TCL_VERTEX_DATA, dw);
- for (i = 0; i < nr; ++i)
- emit_vtx(chan, &emit, *map++);
-
- count -= nr;
- }
-
- return TRUE;
-}