summaryrefslogtreecommitdiff
path: root/src/mesa/pipe/nv40
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2007-11-18 17:08:06 +1100
committerBen Skeggs <skeggsb@gmail.com>2007-11-18 17:34:06 +1100
commit2f33b5b56e9221f2613b34cd1a1a9d82d5ed4303 (patch)
tree9bcdd27b60eaf4c3d608b4dd2f582fcee7c39f11 /src/mesa/pipe/nv40
parent193c85ec7a1aec44eebc67c6224fb6ecbb4607a5 (diff)
nouveau: Very rough cut at gallium winsys + nv40 pipe driver.
Diffstat (limited to 'src/mesa/pipe/nv40')
-rw-r--r--src/mesa/pipe/nv40/Makefile30
-rw-r--r--src/mesa/pipe/nv40/nv40_clear.c21
-rw-r--r--src/mesa/pipe/nv40/nv40_context.c277
-rw-r--r--src/mesa/pipe/nv40/nv40_context.h111
-rw-r--r--src/mesa/pipe/nv40/nv40_dma.h62
-rw-r--r--src/mesa/pipe/nv40/nv40_draw.c63
-rw-r--r--src/mesa/pipe/nv40/nv40_fragprog.c642
-rw-r--r--src/mesa/pipe/nv40/nv40_miptree.c60
-rw-r--r--src/mesa/pipe/nv40/nv40_query.c98
-rw-r--r--src/mesa/pipe/nv40/nv40_region.c85
-rw-r--r--src/mesa/pipe/nv40/nv40_shader.h546
-rw-r--r--src/mesa/pipe/nv40/nv40_state.c674
-rw-r--r--src/mesa/pipe/nv40/nv40_state.h173
-rw-r--r--src/mesa/pipe/nv40/nv40_state_emit.c112
-rw-r--r--src/mesa/pipe/nv40/nv40_state_tex.c140
-rw-r--r--src/mesa/pipe/nv40/nv40_surface.c229
-rw-r--r--src/mesa/pipe/nv40/nv40_vbo.c222
-rw-r--r--src/mesa/pipe/nv40/nv40_vertprog.c594
-rw-r--r--src/mesa/pipe/nv40/nvgl_pipe.h198
19 files changed, 4337 insertions, 0 deletions
diff --git a/src/mesa/pipe/nv40/Makefile b/src/mesa/pipe/nv40/Makefile
new file mode 100644
index 0000000000..90c8542da4
--- /dev/null
+++ b/src/mesa/pipe/nv40/Makefile
@@ -0,0 +1,30 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = nv40
+
+DRIVER_SOURCES = \
+ nv40_clear.c \
+ nv40_context.c \
+ nv40_draw.c \
+ nv40_fragprog.c \
+ nv40_miptree.c \
+ nv40_query.c \
+ nv40_region.c \
+ nv40_state.c \
+ nv40_state_emit.c \
+ nv40_state_tex.c \
+ nv40_surface.c \
+ nv40_vbo.c \
+ nv40_vertprog.c
+
+C_SOURCES = \
+ $(COMMON_SOURCES) \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../Makefile.template
+
+symlinks:
+
diff --git a/src/mesa/pipe/nv40/nv40_clear.c b/src/mesa/pipe/nv40/nv40_clear.c
new file mode 100644
index 0000000000..f3b7a23689
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_clear.c
@@ -0,0 +1,21 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+
+void
+nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue)
+{
+ /*XXX: We're actually Z24_S8... */
+ if (ps->format == PIPE_FORMAT_S8_Z24) {
+ clearValue = (((clearValue & 0xff000000) >> 24) |
+ ((clearValue & 0x00ffffff) << 8));
+ }
+
+ pipe->region_fill(pipe, ps->region, 0, 0, 0, ps->width, ps->height,
+ clearValue);
+}
diff --git a/src/mesa/pipe/nv40/nv40_context.c b/src/mesa/pipe/nv40/nv40_context.c
new file mode 100644
index 0000000000..ff66095c5f
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_context.c
@@ -0,0 +1,277 @@
+#include "pipe/draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+static boolean
+nv40_is_format_supported(struct pipe_context *pipe, uint format)
+{
+ switch (format) {
+ case PIPE_FORMAT_U_A8_R8_G8_B8:
+ case PIPE_FORMAT_U_R5_G6_B5:
+ case PIPE_FORMAT_S8_Z24:
+ return TRUE;
+ default:
+ break;
+ };
+
+ return FALSE;
+}
+
+static const char *
+nv40_get_name(struct pipe_context *pipe)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ static char buffer[128];
+
+ snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset);
+ return buffer;
+}
+
+static const char *
+nv40_get_vendor(struct pipe_context *pipe)
+{
+ return "nouveau";
+}
+
+static int
+nv40_get_param(struct pipe_context *pipe, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return 16;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 0;
+ case PIPE_CAP_S3TC:
+ return 0;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 0;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return 13;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return 10;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 13;
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nv40_get_paramf(struct pipe_context *pipe, int param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ default:
+ NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
+ return 0.0;
+ }
+}
+
+static void
+nv40_flush(struct pipe_context *pipe, unsigned flags)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, 0x1fd8, 1);
+ OUT_RING (1);
+ }
+
+ if (flags & PIPE_FLUSH_WAIT) {
+ nvws->notifier_reset(nv40->sync, 0);
+ BEGIN_RING(curie, 0x104, 1);
+ OUT_RING (0);
+ BEGIN_RING(curie, 0x100, 1);
+ OUT_RING (0);
+ }
+
+ FIRE_RING();
+
+ if (flags & PIPE_FLUSH_WAIT)
+ nvws->notifier_wait(nv40->sync, 0, 0, 2000);
+}
+
+static void
+nv40_destroy(struct pipe_context *pipe)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ draw_destroy(nv40->draw);
+ free(nv40);
+}
+
+static boolean
+nv40_init_hwctx(struct nv40_context *nv40, int curie_class)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ int ret;
+
+ if ((ret = nvws->notifier_alloc(nvws, nv40->num_query_objects,
+ &nv40->query))) {
+ NOUVEAU_ERR("Error creating query notifier objects: %d\n", ret);
+ return FALSE;
+ }
+
+ if ((ret = nvws->grobj_alloc(nvws, curie_class,
+ &nv40->curie))) {
+ NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
+ return FALSE;
+ }
+
+ BEGIN_RING(curie, NV40TCL_DMA_NOTIFY, 1);
+ OUT_RING (nv40->sync->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_TEXTURE0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_VTXBUF0, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->gart->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_FENCE, 2);
+ OUT_RING (0);
+ OUT_RING (nv40->query->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_UNK01AC, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 2);
+ OUT_RING (nvws->channel->vram->handle);
+ OUT_RING (nvws->channel->vram->handle);
+
+ BEGIN_RING(curie, 0x1ea4, 3);
+ OUT_RING (0x00000010);
+ OUT_RING (0x01000100);
+ OUT_RING (0xff800006);
+
+ /* vtxprog output routing */
+ BEGIN_RING(curie, 0x1fc4, 1);
+ OUT_RING (0x06144321);
+ BEGIN_RING(curie, 0x1fc8, 2);
+ OUT_RING (0xedcba987);
+ OUT_RING (0x00000021);
+ BEGIN_RING(curie, 0x1fd0, 1);
+ OUT_RING (0x00171615);
+ BEGIN_RING(curie, 0x1fd4, 1);
+ OUT_RING (0x001b1a19);
+
+ BEGIN_RING(curie, 0x1ef8, 1);
+ OUT_RING (0x0020ffff);
+ BEGIN_RING(curie, 0x1d64, 1);
+ OUT_RING (0x00d30000);
+ BEGIN_RING(curie, 0x1e94, 1);
+ OUT_RING (0x00000001);
+
+ FIRE_RING ();
+ return TRUE;
+}
+
+#define GRCLASS4097_CHIPSETS 0x00000baf
+#define GRCLASS4497_CHIPSETS 0x00005450
+struct pipe_context *
+nv40_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws,
+ unsigned chipset)
+{
+ struct nv40_context *nv40;
+ int curie_class, ret;
+
+ if ((chipset & 0xf0) != 0x40) {
+ NOUVEAU_ERR("Not a NV4X chipset\n");
+ return NULL;
+ }
+
+ if (GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) {
+ curie_class = 0x4097;
+ } else
+ if (GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) {
+ curie_class = 0x4497;
+ } else {
+ NOUVEAU_ERR("Unknown NV4X chipset: NV%02x\n", chipset);
+ return NULL;
+ }
+
+ nv40 = CALLOC_STRUCT(nv40_context);
+ if (!nv40)
+ return NULL;
+ nv40->chipset = chipset;
+ nv40->nvws = nvws;
+
+ if ((ret = nvws->notifier_alloc(nvws, 1, &nv40->sync))) {
+ NOUVEAU_ERR("Error creating notifier object: %d\n", ret);
+ free(nv40);
+ return NULL;
+ }
+
+ nv40->num_query_objects = 32;
+ nv40->query_objects = calloc(nv40->num_query_objects,
+ sizeof(struct pipe_query_object *));
+ if (!nv40->query_objects) {
+ free(nv40);
+ return NULL;
+ }
+
+ if (!nv40_init_hwctx(nv40, curie_class)) {
+ free(nv40);
+ return NULL;
+ }
+
+ nv40->pipe.winsys = pipe_winsys;
+
+ nv40->pipe.destroy = nv40_destroy;
+ nv40->pipe.is_format_supported = nv40_is_format_supported;
+ nv40->pipe.get_name = nv40_get_name;
+ nv40->pipe.get_vendor = nv40_get_vendor;
+ nv40->pipe.get_param = nv40_get_param;
+ nv40->pipe.get_paramf = nv40_get_paramf;
+
+ nv40->pipe.draw_arrays = nv40_draw_arrays;
+ nv40->pipe.draw_elements = nv40_draw_elements;
+ nv40->pipe.clear = nv40_clear;
+
+ nv40->pipe.begin_query = nv40_query_begin;
+ nv40->pipe.end_query = nv40_query_end;
+ nv40->pipe.wait_query = nv40_query_wait;
+
+ nv40->pipe.mipmap_tree_layout = nv40_miptree_layout;
+
+ nv40->pipe.flush = nv40_flush;
+
+ nv40_init_region_functions(nv40);
+ nv40_init_surface_functions(nv40);
+ nv40_init_state_functions(nv40);
+
+ nv40->draw = draw_create();
+ assert(nv40->draw);
+ draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
+
+ return &nv40->pipe;
+}
+
+
diff --git a/src/mesa/pipe/nv40/nv40_context.h b/src/mesa/pipe/nv40/nv40_context.h
new file mode 100644
index 0000000000..63be38299f
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_context.h
@@ -0,0 +1,111 @@
+#ifndef __NV40_CONTEXT_H__
+#define __NV40_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/draw/draw_vertex.h"
+
+#include "pipe/nouveau/nouveau_winsys.h"
+
+#include "nv40_state.h"
+
+#define NOUVEAU_ERR(fmt, args...) \
+ fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args);
+#define NOUVEAU_MSG(fmt, args...) \
+ fprintf(stderr, "nouveau: "fmt, ##args);
+
+#define NV40_NEW_TEXTURE (1 << 0)
+#define NV40_NEW_VERTPROG (1 << 1)
+#define NV40_NEW_FRAGPROG (1 << 2)
+#define NV40_NEW_ARRAYS (1 << 3)
+
+struct nv40_context {
+ struct pipe_context pipe;
+ struct nouveau_winsys *nvws;
+
+ struct draw_context *draw;
+
+ int chipset;
+ struct nouveau_grobj *curie;
+ struct nouveau_notifier *sync;
+ uint32_t *pushbuf;
+
+ /* query objects */
+ struct nouveau_notifier *query;
+ struct pipe_query_object **query_objects;
+ uint num_query_objects;
+
+ uint32_t dirty;
+
+ struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_mipmap_tree *tex_miptree[PIPE_MAX_SAMPLERS];
+ uint32_t tex_dirty;
+
+ struct {
+ struct nv40_vertex_program *vp;
+ struct nv40_vertex_program *active_vp;
+
+ struct pipe_buffer_handle *constant_buf;
+ } vertprog;
+
+ struct {
+ struct nv40_fragment_program *fp;
+ struct nv40_fragment_program *active_fp;
+
+ struct pipe_buffer_handle *constant_buf;
+ } fragprog;
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX];
+ struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX];
+};
+
+
+extern void nv40_init_region_functions(struct nv40_context *nv40);
+extern void nv40_init_surface_functions(struct nv40_context *nv40);
+extern void nv40_init_state_functions(struct nv40_context *nv40);
+
+/* nv40_draw.c */
+extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
+
+/* nv40_miptree.c */
+extern boolean nv40_miptree_layout(struct pipe_context *,
+ struct pipe_mipmap_tree *);
+
+/* nv40_vertprog.c */
+extern void nv40_vertprog_translate(struct nv40_context *,
+ struct nv40_vertex_program *);
+extern void nv40_vertprog_bind(struct nv40_context *,
+ struct nv40_vertex_program *);
+
+/* nv40_fragprog.c */
+extern void nv40_fragprog_translate(struct nv40_context *,
+ struct nv40_fragment_program *);
+extern void nv40_fragprog_bind(struct nv40_context *,
+ struct nv40_fragment_program *);
+
+/* nv40_state.c and friends */
+extern void nv40_emit_hw_state(struct nv40_context *nv40);
+extern void nv40_state_tex_update(struct nv40_context *nv40);
+
+/* nv40_vbo.c */
+extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+ unsigned start, unsigned count);
+extern boolean nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer_handle *indexBuffer,
+ unsigned indexSize,
+ unsigned mode, unsigned start,
+ unsigned count);
+extern void nv40_vbo_arrays_update(struct nv40_context *nv40);
+
+/* nv40_clear.c */
+extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps,
+ unsigned clearValue);
+
+/* nv40_query.c */
+extern void nv40_query_begin(struct pipe_context *, struct pipe_query_object *);
+extern void nv40_query_end(struct pipe_context *, struct pipe_query_object *);
+extern void nv40_query_wait(struct pipe_context *, struct pipe_query_object *);
+
+#endif
diff --git a/src/mesa/pipe/nv40/nv40_dma.h b/src/mesa/pipe/nv40/nv40_dma.h
new file mode 100644
index 0000000000..3775ce6e72
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_dma.h
@@ -0,0 +1,62 @@
+#ifndef __NV40_DMA_H__
+#define __NV40_DMA_H__
+
+#include "pipe/nouveau/nouveau_winsys.h"
+
+#define BEGIN_RING(obj,mthd,size) do { \
+ nv40->pushbuf = nv40->nvws->begin_ring(nv40->obj, (mthd), (size)); \
+} while(0)
+
+#define BEGIN_RING_NI(obj,mthd,size) do { \
+ BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \
+} while(0)
+
+#define OUT_RING(data) do { \
+ (*nv40->pushbuf++) = (data); \
+} while(0)
+
+#define OUT_RINGp(src,size) do { \
+ memcpy(nv40->pushbuf, (src), (size) * 4); \
+ nv40->pushbuf += (size); \
+} while(0)
+
+#define OUT_RINGf(data) do { \
+ union { float v; uint32_t u; } c; \
+ c.v = (data); \
+ OUT_RING(c.u); \
+} while(0)
+
+#define FIRE_RING() do { \
+ nv40->nvws->fire_ring(nv40->nvws->channel); \
+} while(0)
+
+#define OUT_RELOC(bo,data,flags,vor,tor) do { \
+ nv40->nvws->out_reloc(nv40->nvws->channel, nv40->pushbuf, \
+ (struct nouveau_bo *)(bo), \
+ (data), (flags), (vor), (tor)); \
+ OUT_RING(0); \
+} while(0)
+
+/* Raw data + flags depending on FB/TT buffer */
+#define OUT_RELOCd(bo,data,flags,vor,tor) do { \
+ OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \
+} while(0)
+
+/* FB/TT object handle */
+#define OUT_RELOCo(bo,flags) do { \
+ OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \
+ nv40->nvws->channel->vram->handle, \
+ nv40->nvws->channel->gart->handle); \
+} while(0)
+
+/* Low 32-bits of offset */
+#define OUT_RELOCl(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \
+} while(0)
+
+/* High 32-bits of offset */
+#define OUT_RELOCh(bo,delta,flags) do { \
+ OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \
+} while(0)
+
+#endif
diff --git a/src/mesa/pipe/nv40/nv40_draw.c b/src/mesa/pipe/nv40/nv40_draw.c
new file mode 100644
index 0000000000..52ce493ea2
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_draw.c
@@ -0,0 +1,63 @@
+#include "pipe/draw/draw_private.h"
+#include "pipe/p_util.h"
+
+#include "nv40_context.h"
+
+struct nv40_draw_stage {
+ struct draw_stage draw;
+ struct nv40_context *nv40;
+};
+
+static void
+nv40_draw_begin(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv40_draw_end(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv40_draw_point(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv40_draw_line(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nv40_draw_reset_stipple_counter(struct draw_stage *draw)
+{
+ NOUVEAU_ERR("\n");
+}
+
+struct draw_stage *
+nv40_draw_render_stage(struct nv40_context *nv40)
+{
+ struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage);
+
+ nv40draw->nv40 = nv40;
+ nv40draw->draw.draw = nv40->draw;
+ nv40draw->draw.begin = nv40_draw_begin;
+ nv40draw->draw.point = nv40_draw_point;
+ nv40draw->draw.line = nv40_draw_line;
+ nv40draw->draw.tri = nv40_draw_tri;
+ nv40draw->draw.end = nv40_draw_end;
+ nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter;
+
+ return &nv40draw->draw;
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_fragprog.c b/src/mesa/pipe/nv40/nv40_fragprog.c
new file mode 100644
index 0000000000..48b783eebe
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_fragprog.c
@@ -0,0 +1,642 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/tgsi/exec/tgsi_token.h"
+#include "pipe/tgsi/exec/tgsi_parse.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 1
+#define MASK_Y 2
+#define MASK_Z 4
+#define MASK_W 8
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
+#define DEF_CTEST NV40_FP_OP_COND_TR
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
+
+static uint32_t
+passthrough_fp_data[] = {
+ 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800
+};
+
+static struct nv40_fragment_program
+passthrough_fp = {
+ .pipe = NULL,
+ .translated = TRUE,
+ .insn = passthrough_fp_data,
+ .insn_len = sizeof(passthrough_fp_data) / sizeof(uint32_t),
+ .buffer = NULL,
+ .uses_kil = 0,
+ .num_regs = 2,
+};
+
+struct nv40_fpc {
+ struct nv40_fragment_program *fp;
+
+ uint attrib_map[PIPE_MAX_SHADER_INPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+
+ uint depth_id;
+ uint colour_id;
+
+ boolean inst_has_const;
+ int inst_const_id;
+};
+
+static INLINE struct nv40_sreg
+nv40_sr_temp(struct nv40_fpc *fpc)
+{
+ int idx;
+
+ idx = fpc->temp_temp_count++;
+ idx += fpc->high_temp + 1;
+ return nv40_sr(0, NV40_FP_REG_TYPE_TEMP, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
+ (d), (m), (s0), (s1), (s2))
+#define tex(cc,s,o,u,d,m,s0,s1,s2) \
+ nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
+ (d), (m), (s0), none, none)
+#define temp(fpc) nv40_sr_temp((fpc))
+
+static void
+emit_src(struct nv40_fpc *fpc, uint32_t *hw, int pos, struct nv40_sreg src)
+{
+ uint32_t sr = 0;
+
+ sr |= (src.type << NV40_FP_REG_TYPE_SHIFT);
+ if (src.type == NV40_FP_REG_TYPE_INPUT) {
+ hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
+ } else
+ if (src.type == NV40_FP_REG_TYPE_CONST) {
+ fpc->inst_has_const = TRUE;
+ } else
+ if (src.type == NV40_FP_REG_TYPE_TEMP) {
+ sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
+ }
+
+ if (src.negate)
+ sr |= NV40_FP_REG_NEGATE;
+
+ if (src.abs)
+ hw[1] |= (1 << (29 + pos));
+
+ sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
+
+ hw[pos + 1] |= sr;
+}
+
+static void
+emit_dst(struct nv40_fpc *fpc, uint32_t *hw, struct nv40_sreg dst)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+
+ if (dst.output) {
+ if (dst.index == 1) {
+ fp->writes_depth = 1;
+ } else {
+ hw[0] |= NV40_FP_OP_UNK0_7;
+ }
+ } else {
+ if (fp->num_regs < (dst.index + 1))
+ fp->num_regs = dst.index + 1;
+ }
+ hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
+}
+
+static void
+nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fp->insn_len];
+
+ fpc->inst_has_const = FALSE;
+
+ if (op == NV40_FP_OP_OPCODE_KIL)
+ fp->uses_kil = TRUE;
+ hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
+ hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
+ hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
+
+ if (sat)
+ hw[0] |= NV40_FP_OP_OUT_SAT;
+
+ if (dst.cc_update)
+ hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
+ hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
+ hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
+ (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
+ (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
+ (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
+
+ emit_dst(fpc, hw, dst);
+ emit_src(fpc, hw, 0, s0);
+ emit_src(fpc, hw, 1, s1);
+ emit_src(fpc, hw, 2, s2);
+
+ fp->insn_len += 4;
+ if (fpc->inst_has_const) {
+ fp->consts[fp->num_consts].pipe_id = fpc->inst_const_id;
+ fp->consts[fp->num_consts].hw_id = fp->insn_len;
+ fp->num_consts++;
+ fp->insn_len += 4;
+ }
+}
+
+static void
+nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
+{
+ struct nv40_fragment_program *fp = fpc->fp;
+ uint32_t *hw = &fp->insn[fp->insn_len];
+
+ nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
+ hw[0] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
+{
+ struct nv40_sreg src;
+ uint type, index;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ type = NV40_FP_REG_TYPE_INPUT;
+ index = fpc->attrib_map[fsrc->SrcRegister.Index];
+ break;
+ case TGSI_FILE_CONSTANT:
+ type = NV40_FP_REG_TYPE_CONST;
+ index = fsrc->SrcRegister.Index;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ type = NV40_FP_REG_TYPE_TEMP;
+ index = fsrc->SrcRegister.Index + 1;
+ if (fpc->high_temp < index)
+ fpc->high_temp = index;
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src = nv40_sr(0, type, index);
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
+ int out, idx;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ out = 1;
+ if (fdst->DstRegister.Index == fpc->colour_id)
+ idx = 0;
+ else
+ idx = 1;
+ break;
+ case TGSI_FILE_TEMPORARY:
+ out = 0;
+ idx = fdst->DstRegister.Index + 1;
+ if (fpc->high_temp < idx)
+ fpc->high_temp = idx;
+ break;
+ case TGSI_FILE_NULL:
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+ break;
+ }
+
+ return nv40_sr(out, NV40_FP_REG_TYPE_TEMP, idx);
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv40_sreg src[3], dst, tmp;
+ struct nv40_sreg none = nv40_sr(0, NV40_FP_REG_TYPE_INPUT, 0);
+ int mask, sat, unit;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_RET)
+ return TRUE;
+
+ fpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(fpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ NOUVEAU_MSG("extra src attr %d\n",
+ fsrc->SrcRegister.Index);
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(fpc, fsrc);
+ } else {
+ src[i] = temp(fpc);
+ arith(fpc, 0, MOV, src[i], MASK_ALL,
+ tgsi_src(fpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ case TGSI_FILE_SAMPLER:
+ unit = fsrc->SrcRegister.Index;
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+ sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_CMP:
+ tmp = temp(fpc);
+ arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+ dst.cc_test = NV40_VP_INST_COND_LT;
+ arith(fpc, sat, MOV, dst, mask, src[1], none, none);
+ break;
+ case TGSI_OPCODE_COS:
+ arith(fpc, sat, COS, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
+ arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
+ swz(src[1], W, W, W, W), none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(fpc, sat, EX2, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(fpc, sat, FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(fpc, sat, FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_KILP:
+ /*XXX: Which is NV, which is ARB kil? ARB implemented here.
+ *XXX: Don't need temp, can update CC0 without writing dst
+ */
+ tmp = temp(fpc);
+ tmp.cc_update = 1;
+ arith(fpc, 0, MOV, tmp, MASK_ALL, src[0], none, none);
+ dst.cc_test = NV40_FP_OP_COND_LT;
+ arith(fpc, 0, KIL, dst, 0, none, none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(fpc, sat, LG2, dst, mask, src[0], none, none);
+ break;
+// case TGSI_OPCODE_LIT:
+ case TGSI_OPCODE_LRP:
+ tmp = temp(fpc);
+ arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(fpc, sat, MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, tmp, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(fpc, sat, EX2, dst, mask,
+ swz(tmp, X, X, X, X), none, none);
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(fpc, sat, RCP, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_RFL:
+ tmp = temp(fpc);
+ arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
+ arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
+ arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
+ swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
+ arith(fpc, sat, MAD, dst, mask,
+ swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
+ break;
+ case TGSI_OPCODE_RSQ:
+ tmp = temp(fpc);
+ arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
+ abs(swz(src[0], X, X, X, X)), none, none);
+ arith(fpc, sat, EX2, dst, mask,
+ neg(swz(tmp, X, X, X, X)), none, none);
+ break;
+ case TGSI_OPCODE_SCS:
+ if (mask & MASK_X) {
+ arith(fpc, sat, COS, dst, MASK_X,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ if (mask & MASK_Y) {
+ arith(fpc, sat, SIN, dst, MASK_Y,
+ swz(src[0], X, X, X, X), none, none);
+ }
+ break;
+ case TGSI_OPCODE_SIN:
+ arith(fpc, sat, SIN, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
+ break;
+ case TGSI_OPCODE_TEX:
+ tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_TXB:
+ tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
+ break;
+#if 0 /* XXX: reimplement on top of TEX */
+ case TGSI_OPCODE_TXP:
+ tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
+ break;
+#endif
+ case TGSI_OPCODE_XPD:
+ tmp = temp(fpc);
+ arith(fpc, 0, MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_FP_OP_INPUT_SRC_POSITION;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_FP_OP_INPUT_SRC_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_FP_OP_INPUT_SRC_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_FP_OP_INPUT_SRC_FOGC;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
+ SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad input semantic\n");
+ return FALSE;
+ }
+
+ fpc->attrib_map[fdec->u.DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+static boolean
+nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ fpc->depth_id = fdec->u.DeclarationRange.First;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ fpc->colour_id = fdec->u.DeclarationRange.First;
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+nv40_fragprog_translate(struct nv40_context *nv40,
+ struct nv40_fragment_program *fp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_fpc *fpc = NULL;
+ int i;
+
+ fpc = calloc(1, sizeof(struct nv40_fpc));
+ if (!fpc)
+ return;
+ fp->insn = calloc(1, 128*4*sizeof(uint32_t));
+ fpc->fp = fp;
+ fpc->high_temp = -1;
+ fp->num_regs = 2;
+
+ tgsi_parse_init(&parse, fp->pipe->tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
+ goto out_err;
+ break;
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_fragprog_parse_decl_output(fpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_fragprog_parse_instruction(fpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (fpc->inst_has_const == FALSE)
+ fp->insn[fp->insn_len - 4] |= 0x00000001;
+ else
+ fp->insn[fp->insn_len - 8] |= 0x00000001;
+ fp->insn[fp->insn_len++] = 0x00000001;
+
+ fp->translated = TRUE;
+ fp->on_hw = FALSE;
+out_err:
+ tgsi_parse_free(&parse);
+ free(fpc);
+}
+
+void
+nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
+{
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+ uint32_t fp_control;
+
+ if (!fp->translated) {
+ NOUVEAU_ERR("fragprog invalid, using passthrough shader\n");
+ fp = &passthrough_fp;
+ }
+
+ if (!fp->on_hw) {
+ if (!fp->buffer)
+ fp->buffer = ws->buffer_create(ws, 0x100);
+
+ nv40->pipe.winsys->buffer_data(nv40->pipe.winsys, fp->buffer,
+ fp->insn_len * sizeof(uint32_t),
+ fp->insn,
+ PIPE_BUFFER_USAGE_PIXEL);
+ fp->on_hw = TRUE;
+ }
+
+ fp_control = fp->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
+ if (fp->uses_kil)
+ fp_control |= NV40TCL_FP_CONTROL_KIL;
+ if (fp->writes_depth)
+ fp_control |= 0xe;
+
+ BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1);
+ OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
+ NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
+ BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1);
+ OUT_RING (fp_control);
+
+ nv40->fragprog.active_fp = fp;
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_miptree.c b/src/mesa/pipe/nv40/nv40_miptree.c
new file mode 100644
index 0000000000..6b85823d8c
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_miptree.c
@@ -0,0 +1,60 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+
+#include "nv40_context.h"
+
+boolean
+nv40_miptree_layout(struct pipe_context *pipe, struct pipe_mipmap_tree *mt)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ uint width, height, depth, offset;
+ boolean swizzled = FALSE;
+ int l;
+
+ mt->pitch = mt->width0;
+ mt->total_height = 0;
+
+ width = mt->width0;
+ height = mt->height0;
+ depth = mt->depth0;
+ offset = 0;
+ for (l = mt->first_level; l <= mt->last_level; l++) {
+ uint pitch, f;
+
+ mt->level[l].width = width;
+ mt->level[l].height = height;
+ mt->level[l].depth = depth;
+ mt->level[l].level_offset = offset;
+
+ if (!swizzled)
+ pitch = mt->width0;
+ else
+ pitch = width;
+
+ if (mt->target == PIPE_TEXTURE_CUBE)
+ mt->level[l].nr_images = 6;
+ else
+ if (mt->target == PIPE_TEXTURE_3D)
+ mt->level[l].nr_images = 3;
+ else
+ mt->level[l].nr_images = 1;
+ mt->level[l].image_offset =
+ malloc(mt->level[l].nr_images * sizeof(unsigned));
+
+ for (f = 0; f < mt->level[l].nr_images; f++) {
+ mt->level[l].image_offset[f] =
+ (offset - mt->level[l].level_offset) / mt->cpp;
+ mt->total_height += height;
+
+ offset += (pitch * mt->cpp * height);
+ }
+
+ width = MAX2(1, width >> 1);
+ height = MAX2(1, height >> 1);
+ depth = MAX2(1, depth >> 1);
+ }
+
+ return TRUE;
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_query.c b/src/mesa/pipe/nv40/nv40_query.c
new file mode 100644
index 0000000000..efd81e6640
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_query.c
@@ -0,0 +1,98 @@
+#include "pipe/p_context.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+static uint
+nv40_query_object_find(struct nv40_context *nv40, struct pipe_query_object *q)
+{
+ int id;
+
+ for (id = 0; id < nv40->num_query_objects; id++) {
+ if (nv40->query_objects[id] == q)
+ return id;
+ }
+
+ return -1;
+}
+
+void
+nv40_query_begin(struct pipe_context *pipe, struct pipe_query_object *q)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ int id;
+
+ assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ id = nv40_query_object_find(nv40, NULL);
+ assert(id >= 0);
+ nv40->query_objects[id] = q;
+
+ nv40->nvws->notifier_reset(nv40->query, id);
+ q->ready = 0;
+
+ BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
+ OUT_RING (1);
+ BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
+ OUT_RING (1);
+}
+
+static void
+nv40_query_update(struct pipe_context *pipe, struct pipe_query_object *q)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ int id;
+
+ id = nv40_query_object_find(nv40, q);
+ assert(id >= 0);
+
+ if (nv40->nvws->notifier_status(nv40->query, id) == 0) {
+ q->ready = 1;
+ q->count = nv40->nvws->notifier_retval(nv40->query, id);
+ nv40->query_objects[id] = NULL;
+ }
+}
+
+void
+nv40_query_end(struct pipe_context *pipe, struct pipe_query_object *q)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ int id;
+
+ id = nv40_query_object_find(nv40, q);
+ assert(id >= 0);
+
+ BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
+ OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+ ((id * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
+ FIRE_RING ();
+
+ /*XXX: Some apps spin waiting for GL_QUERY_RESULT_AVAILABLE_ARB.
+ * Core mesa won't ask the driver to update the query object's
+ * status in this case, so the app waits forever.. fix this some
+ * day.
+ */
+#if 0
+ nv40_query_update(pipe, q);
+#else
+ nv40_query_wait(pipe, q);
+#endif
+}
+
+void
+nv40_query_wait(struct pipe_context *pipe, struct pipe_query_object *q)
+{
+ nv40_query_update(pipe, q);
+ if (!q->ready) {
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ int id;
+
+ id = nv40_query_object_find(nv40, q);
+ assert(id >= 0);
+
+ nv40->nvws->notifier_wait(nv40->query, id, 0, 0);
+ nv40_query_update(pipe, q);
+ assert(q->ready);
+ }
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_region.c b/src/mesa/pipe/nv40/nv40_region.c
new file mode 100644
index 0000000000..f62bf89d18
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_region.c
@@ -0,0 +1,85 @@
+#include "pipe/p_defines.h"
+#include "pipe/p_winsys.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+static ubyte *
+nv40_region_map(struct pipe_context *pipe, struct pipe_region *region)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+
+ if (!region->map_refcount++) {
+ region->map = ws->buffer_map(ws, region->buffer,
+ PIPE_BUFFER_FLAG_WRITE |
+ PIPE_BUFFER_FLAG_READ);
+ }
+
+ return region->map;
+}
+
+static void
+nv40_region_unmap(struct pipe_context *pipe, struct pipe_region *region)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct pipe_winsys *ws = nv40->pipe.winsys;
+
+ if (!--region->map_refcount) {
+ ws->buffer_unmap(ws, region->buffer);
+ region->map = NULL;
+ }
+}
+
+static void
+nv40_region_data(struct pipe_context *pipe,
+ struct pipe_region *dst,
+ unsigned dst_offset,
+ unsigned dstx, unsigned dsty,
+ const void *src, unsigned src_pitch,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ nvws->region_data(nvws->nv, dst, dst_offset, dstx, dsty,
+ src, src_pitch, srcx, srcy, width, height);
+}
+
+
+static void
+nv40_region_copy(struct pipe_context *pipe, struct pipe_region *dst,
+ unsigned dst_offset, unsigned dstx, unsigned dsty,
+ struct pipe_region *src, unsigned src_offset,
+ unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ nvws->region_copy(nvws->nv, dst, dst_offset, dstx, dsty,
+ src, src_offset, srcx, srcy, width, height);
+}
+
+static void
+nv40_region_fill(struct pipe_context *pipe,
+ struct pipe_region *dst, unsigned dst_offset,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height, unsigned value)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nouveau_winsys *nvws = nv40->nvws;
+
+ nvws->region_fill(nvws->nv, dst, dst_offset, dstx, dsty,
+ width, height, value);
+}
+
+void
+nv40_init_region_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.region_map = nv40_region_map;
+ nv40->pipe.region_unmap = nv40_region_unmap;
+ nv40->pipe.region_data = nv40_region_data;
+ nv40->pipe.region_copy = nv40_region_copy;
+ nv40->pipe.region_fill = nv40_region_fill;
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_shader.h b/src/mesa/pipe/nv40/nv40_shader.h
new file mode 100644
index 0000000000..5b2cf3e293
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_shader.h
@@ -0,0 +1,546 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30. Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ * In some cases it is possible to put two instructions into one opcode
+ * slot. The rules for when this is OK is not entirely clear to me yet.
+ *
+ * There are separate writemasks and dest temp register fields for each
+ * grouping of instructions. There is however only one field with the
+ * ID of a result register. Writing to temp/result regs is selected by
+ * setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ * The source/dest temp register fields have been extended by 1 bit, to
+ * give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ * NV40 can use an address register to index into vertex attribute regs.
+ * This is done by putting the offset value into INPUT_SRC and setting
+ * the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ * There is a second condition code register on NV40, it's use is enabled
+ * by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ * TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS (1 << 23)
+#define NV40_VP_INST_SRC1_ABS (1 << 22)
+#define NV40_VP_INST_SRC0_ABS (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13)
+#define NV40_VP_INST_COND_SHIFT 10
+#define NV40_VP_INST_COND_MASK (0x7 << 10)
+# define NV40_VP_INST_COND_FL 0
+# define NV40_VP_INST_COND_LT 1
+# define NV40_VP_INST_COND_EQ 2
+# define NV40_VP_INST_COND_LE 3
+# define NV40_VP_INST_COND_GT 4
+# define NV40_VP_INST_COND_NE 5
+# define NV40_VP_INST_COND_GE 6
+# define NV40_VP_INST_COND_TR 7
+#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
+#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
+#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
+#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
+#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+ NV40_VP_INST_INDEX_INPUT | \
+ NV40_VP_INST_COND_REG_SELECT_1 | \
+ NV40_VP_INST_ADDR_REG_SELECT_1 | \
+ NV40_VP_INST_SRC2_ABS | \
+ NV40_VP_INST_SRC1_ABS | \
+ NV40_VP_INST_SRC0_ABS | \
+ NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+ NV40_VP_INST_COND_TEST_ENABLE | \
+ NV40_VP_INST_COND_MASK | \
+ NV40_VP_INST_COND_SWZ_ALL_MASK | \
+ NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT 22
+#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22)
+# define NV40_VP_INST_OP_NOP 0x00
+# define NV40_VP_INST_OP_MOV 0x01
+# define NV40_VP_INST_OP_MUL 0x02
+# define NV40_VP_INST_OP_ADD 0x03
+# define NV40_VP_INST_OP_MAD 0x04
+# define NV40_VP_INST_OP_DP3 0x05
+# define NV40_VP_INST_OP_DP4 0x07
+# define NV40_VP_INST_OP_DPH 0x06
+# define NV40_VP_INST_OP_DST 0x08
+# define NV40_VP_INST_OP_MIN 0x09
+# define NV40_VP_INST_OP_MAX 0x0A
+# define NV40_VP_INST_OP_SLT 0x0B
+# define NV40_VP_INST_OP_SGE 0x0C
+# define NV40_VP_INST_OP_ARL 0x0D
+# define NV40_VP_INST_OP_FRC 0x0E
+# define NV40_VP_INST_OP_FLR 0x0F
+# define NV40_VP_INST_OP_SEQ 0x10
+# define NV40_VP_INST_OP_SFL 0x11
+# define NV40_VP_INST_OP_SGT 0x12
+# define NV40_VP_INST_OP_SLE 0x13
+# define NV40_VP_INST_OP_SNE 0x14
+# define NV40_VP_INST_OP_STR 0x15
+# define NV40_VP_INST_OP_SSG 0x16
+# define NV40_VP_INST_OP_ARR 0x17
+# define NV40_VP_INST_OP_ARA 0x18
+# define NV40_VP_INST_OP_TXWHAT 0x19
+#define NV40_VP_INST_SCA_OPCODE_SHIFT 27
+#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27)
+# define NV40_VP_INST_OP_RCP 0x02
+# define NV40_VP_INST_OP_RCC 0x03
+# define NV40_VP_INST_OP_RSQ 0x04
+# define NV40_VP_INST_OP_EXP 0x05
+# define NV40_VP_INST_OP_LOG 0x06
+# define NV40_VP_INST_OP_LIT 0x07
+# define NV40_VP_INST_OP_BRA 0x09
+# define NV40_VP_INST_OP_CAL 0x0B
+# define NV40_VP_INST_OP_RET 0x0C
+# define NV40_VP_INST_OP_LG2 0x0D
+# define NV40_VP_INST_OP_EX2 0x0E
+# define NV40_VP_INST_OP_SIN 0x0F
+# define NV40_VP_INST_OP_COS 0x10
+# define NV40_VP_INST_OP_PUSHA 0x13
+# define NV40_VP_INST_OP_POPA 0x14
+#define NV40_VP_INST_CONST_SRC_SHIFT 12
+#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT 8
+#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
+# define NV40_VP_INST_IN_POS 0
+# define NV40_VP_INST_IN_WEIGHT 1
+# define NV40_VP_INST_IN_NORMAL 2
+# define NV40_VP_INST_IN_COL0 3
+# define NV40_VP_INST_IN_COL1 4
+# define NV40_VP_INST_IN_FOGC 5
+# define NV40_VP_INST_IN_TC0 8
+# define NV40_VP_INST_IN_TC(n) (8+n)
+#define NV40_VP_INST_SRC0H_SHIFT 0
+#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+ NV40_VP_INST_VEC_OPCODE_MASK | \
+ NV40_VP_INST_SCA_OPCODE_MASK | \
+ NV40_VP_INST_CONST_SRC_MASK | \
+ NV40_VP_INST_INPUT_SRC_MASK | \
+ NV40_VP_INST_SRC0H_MASK \
+ )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT 23
+#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT 6
+#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT 0
+#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT 0
+#define NV40_VP_INST_IADDRH_MASK (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT 29
+#define NV40_VP_INST_IADDRL_MASK (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT 21
+#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17)
+# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20)
+# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19)
+# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18)
+# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13)
+# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16)
+# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15)
+# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14)
+# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13)
+#define NV40_VP_INST_SCA_RESULT (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT 2
+#define NV40_VP_INST_DEST_MASK (31 << 2)
+# define NV40_VP_INST_DEST_POS 0
+# define NV40_VP_INST_DEST_COL0 1
+# define NV40_VP_INST_DEST_COL1 2
+# define NV40_VP_INST_DEST_BFC0 3
+# define NV40_VP_INST_DEST_BFC1 4
+# define NV40_VP_INST_DEST_FOGC 5
+# define NV40_VP_INST_DEST_PSZ 6
+# define NV40_VP_INST_DEST_TC0 7
+# define NV40_VP_INST_DEST_TC(n) (7+n)
+# define NV40_VP_INST_DEST_TEMP 0x1F
+#define NV40_VP_INST_INDEX_CONST (1 << 1)
+#define NV40_VP_INST_LAST (1 << 0)
+#define NV40_VP_INST3_KNOWN ( \
+ NV40_VP_INST_SRC2L_MASK |\
+ NV40_VP_INST_SCA_WRITEMASK_MASK |\
+ NV40_VP_INST_VEC_WRITEMASK_MASK |\
+ NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+ NV40_VP_INST_DEST_MASK |\
+ NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT 9
+#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK 0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT 11
+#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
+#define NV40_VP_SRC2_LOW_MASK 0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT 14
+#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT 12
+#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT 10
+#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT 8
+#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
+#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
+#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT 0
+#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
+# define NV40_VP_SRC_REG_TYPE_UNK0 0
+# define NV40_VP_SRC_REG_TYPE_TEMP 1
+# define NV40_VP_SRC_REG_TYPE_INPUT 2
+# define NV40_VP_SRC_REG_TYPE_CONST 3
+
+
+/*
+ * Each fragment program opcode appears to be comprised of 4 32-bit values.
+ *
+ * 0 - Opcode, output reg/mask, ATTRIB source
+ * 1 - Source 0
+ * 2 - Source 1
+ * 3 - Source 2
+ *
+ * There appears to be no special difference between result regs and temp regs.
+ * result.color == R0.xyzw
+ * result.depth == R1.z
+ * When the fragprog contains instructions to write depth,
+ * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1.
+ *
+ * Constants are inserted directly after the instruction that uses them.
+ *
+ * It appears that it's not possible to use two input registers in one
+ * instruction as the input sourcing is done in the instruction dword
+ * and not the source selection dwords. As such instructions such as:
+ *
+ * ADD result.color, fragment.color, fragment.texcoord[0];
+ *
+ * must be split into two MOV's and then an ADD (nvidia does this) but
+ * I'm not sure why it's not just one MOV and then source the second input
+ * in the ADD instruction..
+ *
+ * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary
+ * negation requires multiplication with a const.
+ *
+ * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and
+ * SWIZZLE_ONE.
+ *
+ * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as
+ * SWIZZLE_ZERO is implemented simply by not writing to the relevant components
+ * of the destination.
+ *
+ * Looping
+ * Loops appear to be fairly expensive on NV40 at least, the proprietary
+ * driver goes to a lot of effort to avoid using the native looping
+ * instructions. If the total number of *executed* instructions between
+ * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop.
+ * The maximum loop count is 255.
+ *
+ * Conditional execution
+ * TODO
+ *
+ * Non-native instructions:
+ * LIT
+ * LRP - MAD+MAD
+ * SUB - ADD, negate second source
+ * RSQ - LG2 + EX2
+ * POW - LG2 + MUL + EX2
+ * SCS - COS + SIN
+ * XPD
+ * DP2 - MUL + ADD
+ * NRM
+ */
+
+//== Opcode / Destination selection ==
+#define NV40_FP_OP_PROGRAM_END (1 << 0)
+#define NV40_FP_OP_OUT_REG_SHIFT 1
+#define NV40_FP_OP_OUT_REG_MASK (31 << 1)
+/* Needs to be set when writing outputs to get expected result.. */
+#define NV40_FP_OP_UNK0_7 (1 << 7)
+#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8)
+#define NV40_FP_OP_OUTMASK_SHIFT 9
+#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
+# define NV40_FP_OP_OUT_X (1 << 9)
+# define NV40_FP_OP_OUT_Y (1 <<10)
+# define NV40_FP_OP_OUT_Z (1 <<11)
+# define NV40_FP_OP_OUT_W (1 <<12)
+/* Uncertain about these, especially the input_src values.. it's possible that
+ * they can be dynamically changed.
+ */
+#define NV40_FP_OP_INPUT_SRC_SHIFT 13
+#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
+# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
+# define NV40_FP_OP_INPUT_SRC_COL0 0x1
+# define NV40_FP_OP_INPUT_SRC_COL1 0x2
+# define NV40_FP_OP_INPUT_SRC_FOGC 0x3
+# define NV40_FP_OP_INPUT_SRC_TC0 0x4
+# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
+# define NV40_FP_OP_INPUT_SRC_FACING 0xE
+#define NV40_FP_OP_TEX_UNIT_SHIFT 17
+#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17)
+#define NV40_FP_OP_PRECISION_SHIFT 22
+#define NV40_FP_OP_PRECISION_MASK (3 << 22)
+# define NV40_FP_PRECISION_FP32 0
+# define NV40_FP_PRECISION_FP16 1
+# define NV40_FP_PRECISION_FX12 2
+#define NV40_FP_OP_OPCODE_SHIFT 24
+#define NV40_FP_OP_OPCODE_MASK (0x3F << 24)
+# define NV40_FP_OP_OPCODE_NOP 0x00
+# define NV40_FP_OP_OPCODE_MOV 0x01
+# define NV40_FP_OP_OPCODE_MUL 0x02
+# define NV40_FP_OP_OPCODE_ADD 0x03
+# define NV40_FP_OP_OPCODE_MAD 0x04
+# define NV40_FP_OP_OPCODE_DP3 0x05
+# define NV40_FP_OP_OPCODE_DP4 0x06
+# define NV40_FP_OP_OPCODE_DST 0x07
+# define NV40_FP_OP_OPCODE_MIN 0x08
+# define NV40_FP_OP_OPCODE_MAX 0x09
+# define NV40_FP_OP_OPCODE_SLT 0x0A
+# define NV40_FP_OP_OPCODE_SGE 0x0B
+# define NV40_FP_OP_OPCODE_SLE 0x0C
+# define NV40_FP_OP_OPCODE_SGT 0x0D
+# define NV40_FP_OP_OPCODE_SNE 0x0E
+# define NV40_FP_OP_OPCODE_SEQ 0x0F
+# define NV40_FP_OP_OPCODE_FRC 0x10
+# define NV40_FP_OP_OPCODE_FLR 0x11
+# define NV40_FP_OP_OPCODE_KIL 0x12
+# define NV40_FP_OP_OPCODE_PK4B 0x13
+# define NV40_FP_OP_OPCODE_UP4B 0x14
+/* DDX/DDY can only write to XY */
+# define NV40_FP_OP_OPCODE_DDX 0x15
+# define NV40_FP_OP_OPCODE_DDY 0x16
+# define NV40_FP_OP_OPCODE_TEX 0x17
+# define NV40_FP_OP_OPCODE_TXP 0x18
+# define NV40_FP_OP_OPCODE_TXD 0x19
+# define NV40_FP_OP_OPCODE_RCP 0x1A
+# define NV40_FP_OP_OPCODE_EX2 0x1C
+# define NV40_FP_OP_OPCODE_LG2 0x1D
+# define NV40_FP_OP_OPCODE_COS 0x22
+# define NV40_FP_OP_OPCODE_SIN 0x23
+# define NV40_FP_OP_OPCODE_PK2H 0x24
+# define NV40_FP_OP_OPCODE_UP2H 0x25
+# define NV40_FP_OP_OPCODE_PK4UB 0x27
+# define NV40_FP_OP_OPCODE_UP4UB 0x28
+# define NV40_FP_OP_OPCODE_PK2US 0x29
+# define NV40_FP_OP_OPCODE_UP2US 0x2A
+# define NV40_FP_OP_OPCODE_DP2A 0x2E
+# define NV40_FP_OP_OPCODE_TXL 0x2F
+# define NV40_FP_OP_OPCODE_TXB 0x31
+# define NV40_FP_OP_OPCODE_DIV 0x3A
+# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C
+/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/
+# define NV40_FP_OP_BRA_OPCODE_BRK 0x0
+# define NV40_FP_OP_BRA_OPCODE_CAL 0x1
+# define NV40_FP_OP_BRA_OPCODE_IF 0x2
+# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3
+# define NV40_FP_OP_BRA_OPCODE_REP 0x4
+# define NV40_FP_OP_BRA_OPCODE_RET 0x5
+#define NV40_FP_OP_OUT_SAT (1 << 31)
+
+/* high order bits of SRC0 */
+#define NV40_FP_OP_OUT_ABS (1 << 29)
+#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
+#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
+#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
+#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
+#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
+#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
+#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
+#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
+#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
+#define NV40_FP_OP_COND_SHIFT 18
+#define NV40_FP_OP_COND_MASK (0x07 << 18)
+# define NV40_FP_OP_COND_FL 0
+# define NV40_FP_OP_COND_LT 1
+# define NV40_FP_OP_COND_EQ 2
+# define NV40_FP_OP_COND_LE 3
+# define NV40_FP_OP_COND_GT 4
+# define NV40_FP_OP_COND_NE 5
+# define NV40_FP_OP_COND_GE 6
+# define NV40_FP_OP_COND_TR 7
+
+/* high order bits of SRC1 */
+#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31)
+#define NV40_FP_OP_DST_SCALE_SHIFT 28
+#define NV40_FP_OP_DST_SCALE_MASK (3 << 28)
+#define NV40_FP_OP_DST_SCALE_1X 0
+#define NV40_FP_OP_DST_SCALE_2X 1
+#define NV40_FP_OP_DST_SCALE_4X 2
+#define NV40_FP_OP_DST_SCALE_8X 3
+#define NV40_FP_OP_DST_SCALE_INV_2X 5
+#define NV40_FP_OP_DST_SCALE_INV_4X 6
+#define NV40_FP_OP_DST_SCALE_INV_8X 7
+
+/* SRC1 LOOP */
+#define NV40_FP_OP_LOOP_INCR_SHIFT 19
+#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19)
+#define NV40_FP_OP_LOOP_INDEX_SHIFT 10
+#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10)
+#define NV40_FP_OP_LOOP_COUNT_SHIFT 2
+#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2)
+
+/* SRC1 IF */
+#define NV40_FP_OP_ELSE_ID_SHIFT 2
+#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2)
+
+/* SRC1 CAL */
+#define NV40_FP_OP_IADDR_SHIFT 2
+#define NV40_FP_OP_IADDR_MASK (0xFF << 2)
+
+/* SRC1 REP
+ * I have no idea why there are 3 count values here.. but they
+ * have always been filled with the same value in my tests so
+ * far..
+ */
+#define NV40_FP_OP_REP_COUNT1_SHIFT 2
+#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2)
+#define NV40_FP_OP_REP_COUNT2_SHIFT 10
+#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10)
+#define NV40_FP_OP_REP_COUNT3_SHIFT 19
+#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19)
+
+/* SRC2 REP/IF */
+#define NV40_FP_OP_END_ID_SHIFT 2
+#define NV40_FP_OP_END_ID_MASK (0xFF << 2)
+
+// SRC2 high-order
+#define NV40_FP_OP_INDEX_INPUT (1 << 30)
+#define NV40_FP_OP_ADDR_INDEX_SHIFT 19
+#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19)
+
+//== Register selection ==
+#define NV40_FP_REG_TYPE_SHIFT 0
+#define NV40_FP_REG_TYPE_MASK (3 << 0)
+# define NV40_FP_REG_TYPE_TEMP 0
+# define NV40_FP_REG_TYPE_INPUT 1
+# define NV40_FP_REG_TYPE_CONST 2
+#define NV40_FP_REG_SRC_SHIFT 2
+#define NV40_FP_REG_SRC_MASK (31 << 2)
+#define NV40_FP_REG_UNK_0 (1 << 8)
+#define NV40_FP_REG_SWZ_ALL_SHIFT 9
+#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
+#define NV40_FP_REG_SWZ_X_SHIFT 9
+#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
+#define NV40_FP_REG_SWZ_Y_SHIFT 11
+#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
+#define NV40_FP_REG_SWZ_Z_SHIFT 13
+#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
+#define NV40_FP_REG_SWZ_W_SHIFT 15
+#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
+# define NV40_FP_SWIZZLE_X 0
+# define NV40_FP_SWIZZLE_Y 1
+# define NV40_FP_SWIZZLE_Z 2
+# define NV40_FP_SWIZZLE_W 3
+#define NV40_FP_REG_NEGATE (1 << 17)
+
+struct nv40_sreg {
+ int output;
+ int type;
+ int index;
+
+ int dst_scale;
+
+ int negate;
+ int abs;
+ int swz[4];
+
+ int cc_update;
+ int cc_update_reg;
+ int cc_test;
+ int cc_test_reg;
+ int cc_swz[4];
+};
+
+static INLINE struct nv40_sreg
+nv40_sr(int out, int type, int index)
+{
+ struct nv40_sreg temp = {
+ .output = out,
+ .type = type,
+ .index = index,
+ .dst_scale = DEF_SCALE,
+ .abs = 0,
+ .negate = 0,
+ .swz = { 0, 1, 2, 3 },
+ .cc_update = 0,
+ .cc_update_reg = 0,
+ .cc_test = DEF_CTEST,
+ .cc_test_reg = 0,
+ .cc_swz = { 0, 1, 2, 3 },
+ };
+ return temp;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w)
+{
+ struct nv40_sreg dst = src;
+
+ dst.swz[SWZ_X] = src.swz[x];
+ dst.swz[SWZ_Y] = src.swz[y];
+ dst.swz[SWZ_Z] = src.swz[z];
+ dst.swz[SWZ_W] = src.swz[w];
+ return dst;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_neg(struct nv40_sreg src)
+{
+ src.negate = !src.negate;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_abs(struct nv40_sreg src)
+{
+ src.abs = 1;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_scale(struct nv40_sreg src, int scale)
+{
+ src.dst_scale = scale;
+ return src;
+}
+
+#endif
diff --git a/src/mesa/pipe/nv40/nv40_state.c b/src/mesa/pipe/nv40/nv40_state.c
new file mode 100644
index 0000000000..e38a5ea534
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_state.c
@@ -0,0 +1,674 @@
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+#include "nv40_state.h"
+
+#include "nvgl_pipe.h"
+
+static void *
+nv40_alpha_test_state_create(struct pipe_context *pipe,
+ const struct pipe_alpha_test_state *cso)
+{
+ struct nv40_alpha_test_state *at;
+
+ at = malloc(sizeof(struct nv40_alpha_test_state));
+
+ at->enabled = cso->enabled ? 1 : 0;
+ if (at->enabled) {
+ at->func = nvgl_comparison_op(cso->func);
+ at->ref = float_to_ubyte(cso->ref);
+ }
+
+ return (void *)at;
+}
+
+static void
+nv40_alpha_test_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_alpha_test_state *at = hwcso;
+
+ if (at->enabled) {
+ BEGIN_RING(curie, NV40TCL_ALPHA_TEST_ENABLE, 3);
+ OUT_RING (at->enabled);
+ OUT_RING (at->func);
+ OUT_RING (at->ref);
+ } else {
+ BEGIN_RING(curie, NV40TCL_ALPHA_TEST_ENABLE, 1);
+ OUT_RING (0);
+ }
+}
+
+static void
+nv40_alpha_test_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nv40_blend_state *cb;
+
+ cb = malloc(sizeof(struct nv40_blend_state));
+
+ cb->b_enable = cso->blend_enable ? 1 : 0;
+ if (cb->b_enable) {
+ cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_src_factor)));
+ cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) |
+ (nvgl_blend_func(cso->rgb_dst_factor)));
+ cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) |
+ (nvgl_blend_eqn(cso->rgb_func)));
+ }
+
+ cb->l_enable = cso->logicop_enable ? 1 : 0;
+ if (cb->l_enable) {
+ cb->l_op = nvgl_logicop_func(cso->logicop_func);
+ }
+
+ cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) |
+ ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) |
+ ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) |
+ ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0));
+
+ cb->d_enable = cso->dither ? 1 : 0;
+
+ return (void *)cb;
+}
+
+static void
+nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_blend_state *cb = hwcso;
+
+ BEGIN_RING(curie, NV40TCL_DITHER_ENABLE, 1);
+ OUT_RING (cb->d_enable);
+
+ if (cb->b_enable) {
+ BEGIN_RING(curie, NV40TCL_BLEND_ENABLE, 3);
+ OUT_RING (cb->b_enable);
+ OUT_RING (cb->b_srcfunc);
+ OUT_RING (cb->b_dstfunc);
+ BEGIN_RING(curie, NV40TCL_BLEND_EQUATION, 2);
+ OUT_RING (cb->b_eqn);
+ OUT_RING (cb->c_mask);
+ } else {
+ BEGIN_RING(curie, NV40TCL_BLEND_ENABLE, 1);
+ OUT_RING (0);
+ }
+
+ if (cb->l_enable) {
+ BEGIN_RING(curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
+ OUT_RING (cb->l_enable);
+ OUT_RING (cb->l_op);
+ } else {
+ BEGIN_RING(curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1);
+ OUT_RING (0);
+ }
+}
+
+static void
+nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_sampler_state_create(struct pipe_context *pipe,
+ const struct pipe_sampler_state *cso)
+{
+ struct nv40_sampler_state *ps;
+
+ ps = malloc(sizeof(struct nv40_sampler_state));
+
+ ps->wrap = ((nv40_tex_wrap_mode(cso->wrap_r) << 16) |
+ (nv40_tex_wrap_mode(cso->wrap_t) << 8) |
+ (nv40_tex_wrap_mode(cso->wrap_s) << 0));
+ ps->filt = ((nv40_tex_filter(cso->min_img_filter,
+ cso->min_mip_filter) << 16) |
+ (nv40_tex_filter(cso->mag_img_filter,
+ PIPE_TEX_MIPFILTER_NONE) << 24));
+ ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) |
+ (float_to_ubyte(cso->border_color[0]) << 16) |
+ (float_to_ubyte(cso->border_color[1]) << 8) |
+ (float_to_ubyte(cso->border_color[2]) << 0));
+
+ return (void *)ps;
+}
+
+static void
+nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit,
+ void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_sampler_state *ps = hwcso;
+
+ nv40->tex_sampler[unit] = ps;
+ nv40->tex_dirty |= (1 << unit);
+
+ nv40->dirty |= NV40_NEW_TEXTURE;
+}
+
+static void
+nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nv40_rasterizer_state *rs;
+
+ /*XXX: ignored:
+ * light_twoside
+ * offset_cw/ccw -nohw
+ * scissor
+ * point_smooth -nohw
+ * multisample
+ * offset_units / offset_scale
+ */
+ rs = malloc(sizeof(struct nv40_rasterizer_state));
+
+ rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01;
+
+ rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff;
+ rs->line_smooth_en = cso->line_smooth ? 1 : 0;
+ rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0;
+ rs->line_stipple = (cso->line_stipple_pattern << 16) |
+ cso->line_stipple_factor;
+
+ rs->point_size = *(uint32_t*)&cso->point_size;
+
+ rs->poly_smooth_en = cso->poly_smooth ? 1 : 0;
+ rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0;
+
+ if (cso->front_winding == PIPE_WINDING_CCW) {
+ rs->front_face = 0x0901;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw);
+ } else {
+ rs->front_face = 0x0900;
+ rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw);
+ rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw);
+ }
+
+ rs->cull_face_en = 0;
+ rs->cull_face = 0x0900;
+ switch (cso->cull_mode) {
+ case PIPE_WINDING_CCW:
+ rs->cull_face = 0x0901;
+ /* fall-through */
+ case PIPE_WINDING_CW:
+ rs->cull_face_en = 1;
+ break;
+ case PIPE_WINDING_NONE:
+ default:
+ break;
+ }
+
+ return (void *)rs;
+}
+
+static void
+nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_rasterizer_state *rs = hwcso;
+
+ BEGIN_RING(curie, NV40TCL_SHADE_MODEL, 1);
+ OUT_RING (rs->shade_model);
+
+ BEGIN_RING(curie, NV40TCL_LINE_WIDTH, 2);
+ OUT_RING (rs->line_width);
+ OUT_RING (rs->line_smooth_en);
+ BEGIN_RING(curie, NV40TCL_LINE_STIPPLE_ENABLE, 2);
+ OUT_RING (rs->line_stipple_en);
+ OUT_RING (rs->line_stipple);
+
+ BEGIN_RING(curie, NV40TCL_POINT_SIZE, 1);
+ OUT_RING (rs->point_size);
+
+ BEGIN_RING(curie, NV40TCL_POLYGON_MODE_FRONT, 6);
+ OUT_RING (rs->poly_mode_front);
+ OUT_RING (rs->poly_mode_back);
+ OUT_RING (rs->cull_face);
+ OUT_RING (rs->front_face);
+ OUT_RING (rs->poly_smooth_en);
+ OUT_RING (rs->cull_face_en);
+
+ BEGIN_RING(curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
+ OUT_RING (rs->poly_stipple_en);
+}
+
+static void
+nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_depth_stencil_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_state *cso)
+{
+ struct nv40_depth_stencil_state *zs;
+
+ /*XXX: ignored:
+ * depth.occlusion_count
+ * depth.clear
+ * stencil.clear_value
+ */
+ zs = malloc(sizeof(struct nv40_depth_stencil_state));
+
+ zs->depth.func = nvgl_comparison_op(cso->depth.func);
+ zs->depth.write_enable = cso->depth.writemask ? 1 : 0;
+ zs->depth.test_enable = cso->depth.enabled ? 1 : 0;
+
+ zs->stencil.back.enable = cso->stencil.back_enabled ? 1 : 0;
+ zs->stencil.back.wmask = cso->stencil.write_mask[1];
+ zs->stencil.back.func =
+ nvgl_comparison_op(cso->stencil.back_func);
+ zs->stencil.back.ref = cso->stencil.ref_value[1];
+ zs->stencil.back.vmask = cso->stencil.value_mask[1];
+ zs->stencil.back.fail = nvgl_stencil_op(cso->stencil.back_fail_op);
+ zs->stencil.back.zfail = nvgl_stencil_op(cso->stencil.back_zfail_op);
+ zs->stencil.back.zpass = nvgl_stencil_op(cso->stencil.back_zpass_op);
+
+ zs->stencil.front.enable= cso->stencil.front_enabled ? 1 : 0;
+ zs->stencil.front.wmask = cso->stencil.write_mask[0];
+ zs->stencil.front.func =
+ nvgl_comparison_op(cso->stencil.front_func);
+ zs->stencil.front.ref = cso->stencil.ref_value[0];
+ zs->stencil.front.vmask = cso->stencil.value_mask[0];
+ zs->stencil.front.fail = nvgl_stencil_op(cso->stencil.front_fail_op);
+ zs->stencil.front.zfail = nvgl_stencil_op(cso->stencil.front_zfail_op);
+ zs->stencil.front.zpass = nvgl_stencil_op(cso->stencil.front_zpass_op);
+
+ return (void *)zs;
+}
+
+static void
+nv40_depth_stencil_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_depth_stencil_state *zs = hwcso;
+
+ BEGIN_RING(curie, NV40TCL_DEPTH_FUNC, 3);
+ OUT_RINGp ((uint32_t *)&zs->depth, 3);
+ BEGIN_RING(curie, NV40TCL_STENCIL_BACK_ENABLE, 16);
+ OUT_RINGp ((uint32_t *)&zs->stencil.back, 8);
+ OUT_RINGp ((uint32_t *)&zs->stencil.front, 8);
+}
+
+static void
+nv40_depth_stencil_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_vertex_program *vp;
+
+ vp = calloc(1, sizeof(struct nv40_vertex_program));
+ vp->pipe = cso;
+
+ return (void *)vp;
+}
+
+static void
+nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_vertex_program *vp = hwcso;
+
+ nv40->vertprog.vp = vp;
+ nv40->dirty |= NV40_NEW_VERTPROG;
+}
+
+static void
+nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void *
+nv40_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ struct nv40_fragment_program *fp;
+
+ fp = calloc(1, sizeof(struct nv40_fragment_program));
+ fp->pipe = cso;
+
+ return (void *)fp;
+}
+
+static void
+nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nv40_fragment_program *fp = hwcso;
+
+ nv40->fragprog.fp = fp;
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+}
+
+static void
+nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ free(hwcso);
+}
+
+static void
+nv40_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ BEGIN_RING(curie, NV40TCL_BLEND_COLOR, 1);
+ OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) |
+ (float_to_ubyte(bcol->color[0]) << 16) |
+ (float_to_ubyte(bcol->color[1]) << 8) |
+ (float_to_ubyte(bcol->color[2]) << 0));
+}
+
+static void
+nv40_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ nv40->dirty |= NV40_NEW_VERTPROG;
+}
+
+static void
+nv40_set_clear_color_state(struct pipe_context *pipe,
+ const struct pipe_clear_color_state *ccol)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ BEGIN_RING(curie, NV40TCL_CLEAR_VALUE_COLOR, 1);
+ OUT_RING ((float_to_ubyte(ccol->color[3]) << 24) |
+ (float_to_ubyte(ccol->color[0]) << 16) |
+ (float_to_ubyte(ccol->color[1]) << 8) |
+ (float_to_ubyte(ccol->color[2]) << 0));
+}
+
+static void
+nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ const struct pipe_constant_buffer *buf )
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ nv40->vertprog.constant_buf = buf->buffer;
+ nv40->dirty |= NV40_NEW_VERTPROG;
+ } else
+ if (shader == PIPE_SHADER_FRAGMENT) {
+ nv40->fragprog.constant_buf = buf->buffer;
+ nv40->dirty |= NV40_NEW_FRAGPROG;
+ }
+}
+
+static void
+nv40_set_feedback_state(struct pipe_context *pipe,
+ const struct pipe_feedback_state *feedback)
+{
+ NOUVEAU_ERR("\n");
+}
+
+#define get_region(surf) ((surf) ? surf->region : NULL)
+static void
+nv40_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct pipe_region *region;
+ uint32_t rt_enable = 0, rt_format = 0;
+
+ if ((region = get_region(fb->cbufs[0]))) {
+ rt_enable |= NV40TCL_RT_ENABLE_COLOR0;
+
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1);
+ OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2);
+ OUT_RING (region->pitch * region->cpp);
+ OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ }
+
+ if ((region = get_region(fb->cbufs[1]))) {
+ rt_enable |= NV40TCL_RT_ENABLE_COLOR1;
+
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1);
+ OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2);
+ OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ OUT_RING (region->pitch * region->cpp);
+ }
+
+ if ((region = get_region(fb->cbufs[2]))) {
+ rt_enable |= NV40TCL_RT_ENABLE_COLOR2;
+
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1);
+ OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1);
+ OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1);
+ OUT_RING (region->pitch * region->cpp);
+ }
+
+ if ((region = get_region(fb->cbufs[3]))) {
+ rt_enable |= NV40TCL_RT_ENABLE_COLOR3;
+
+ BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1);
+ OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1);
+ OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+ BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1);
+ OUT_RING (region->pitch * region->cpp);
+ }
+
+ if ((region = get_region(fb->zbuf))) {
+ BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1);
+ OUT_RELOCo(region->buffer,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD);
+ BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1);
+ OUT_RELOCl(region->buffer, 0,
+ NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD);
+ BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1);
+ OUT_RING (region->pitch * region->cpp);
+ }
+
+ if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 |
+ NV40TCL_RT_ENABLE_COLOR3))
+ rt_enable |= NV40TCL_RT_ENABLE_MRT;
+ BEGIN_RING(curie, NV40TCL_RT_ENABLE, 1);
+ OUT_RING (rt_enable);
+
+ if (0) {
+#if 0
+ rt_format |= (log2width <<
+ NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT);
+ rt_format |= (log2height <<
+ NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+#endif
+ rt_format |= (NV40TCL_RT_FORMAT_TYPE_SWIZZLED <<
+ NV40TCL_RT_FORMAT_TYPE_SHIFT);
+ } else {
+ rt_format |= (NV40TCL_RT_FORMAT_TYPE_LINEAR <<
+ NV40TCL_RT_FORMAT_TYPE_SHIFT);
+ }
+
+ if (fb->cbufs[0]->format == PIPE_FORMAT_U_R5_G6_B5) {
+ rt_format |= (NV40TCL_RT_FORMAT_COLOR_R5G6B5 <<
+ NV40TCL_RT_FORMAT_COLOR_SHIFT);
+ } else {
+ rt_format |= (NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 <<
+ NV40TCL_RT_FORMAT_COLOR_SHIFT);
+ }
+
+ if (fb->zbuf && fb->zbuf->format == PIPE_FORMAT_U_Z16) {
+ rt_format |= (NV40TCL_RT_FORMAT_DEPTH_Z16 <<
+ NV40TCL_RT_FORMAT_DEPTH_SHIFT);
+ } else {
+ rt_format |= (NV40TCL_RT_FORMAT_DEPTH_Z24S8 <<
+ NV40TCL_RT_FORMAT_DEPTH_SHIFT);
+ }
+
+ BEGIN_RING(curie, NV40TCL_RT_HORIZ, 3);
+ OUT_RING ((fb->cbufs[0]->width << 16) | 0);
+ OUT_RING ((fb->cbufs[0]->height << 16) | 0);
+ OUT_RING (rt_format);
+ BEGIN_RING(curie, NV40TCL_VIEWPORT_HORIZ, 2);
+ OUT_RING ((fb->cbufs[0]->width << 16) | 0);
+ OUT_RING ((fb->cbufs[0]->height << 16) | 0);
+ BEGIN_RING(curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+ OUT_RING (((fb->cbufs[0]->width - 1) << 16) | 0);
+ OUT_RING (((fb->cbufs[0]->height - 1) << 16) | 0);
+}
+
+static void
+nv40_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ BEGIN_RING(curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
+ OUT_RINGp ((uint32_t *)stipple->stipple, 32);
+}
+
+static void
+nv40_set_sampler_units(struct pipe_context *pipe,
+ uint num_samplers, const uint *units)
+{
+}
+
+static void
+nv40_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *s)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ BEGIN_RING(curie, NV40TCL_SCISSOR_HORIZ, 2);
+ OUT_RING (((s->maxx - s->minx) << 16) | s->minx);
+ OUT_RING (((s->maxy - s->miny) << 16) | s->miny);
+}
+
+static void
+nv40_set_texture_state(struct pipe_context *pipe, unsigned unit,
+ struct pipe_mipmap_tree *miptree)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ nv40->tex_miptree[unit] = miptree;
+ nv40->tex_dirty |= unit;
+
+ nv40->dirty |= NV40_NEW_TEXTURE;
+}
+
+static void
+nv40_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ BEGIN_RING(curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ OUT_RINGf (vpt->translate[0]);
+ OUT_RINGf (vpt->translate[1]);
+ OUT_RINGf (vpt->translate[2]);
+ OUT_RINGf (vpt->translate[3]);
+ OUT_RINGf (vpt->scale[0]);
+ OUT_RINGf (vpt->scale[1]);
+ OUT_RINGf (vpt->scale[2]);
+ OUT_RINGf (vpt->scale[3]);
+}
+
+static void
+nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ nv40->vtxbuf[index] = *vb;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_vertex_element(struct pipe_context *pipe, unsigned index,
+ const struct pipe_vertex_element *ve)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+
+ nv40->vtxelt[index] = *ve;
+
+ nv40->dirty |= NV40_NEW_ARRAYS;
+}
+
+static void
+nv40_set_feedback_buffer(struct pipe_context *pipe, unsigned index,
+ const struct pipe_feedback_buffer *fbb)
+{
+ NOUVEAU_ERR("\n");
+}
+
+void
+nv40_init_state_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.create_alpha_test_state = nv40_alpha_test_state_create;
+ nv40->pipe.bind_alpha_test_state = nv40_alpha_test_state_bind;
+ nv40->pipe.delete_alpha_test_state = nv40_alpha_test_state_delete;
+
+ nv40->pipe.create_blend_state = nv40_blend_state_create;
+ nv40->pipe.bind_blend_state = nv40_blend_state_bind;
+ nv40->pipe.delete_blend_state = nv40_blend_state_delete;
+
+ nv40->pipe.create_sampler_state = nv40_sampler_state_create;
+ nv40->pipe.bind_sampler_state = nv40_sampler_state_bind;
+ nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
+
+ nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
+ nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
+ nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete;
+
+ nv40->pipe.create_depth_stencil_state = nv40_depth_stencil_state_create;
+ nv40->pipe.bind_depth_stencil_state = nv40_depth_stencil_state_bind;
+ nv40->pipe.delete_depth_stencil_state = nv40_depth_stencil_state_delete;
+
+ nv40->pipe.create_vs_state = nv40_vp_state_create;
+ nv40->pipe.bind_vs_state = nv40_vp_state_bind;
+ nv40->pipe.delete_vs_state = nv40_vp_state_delete;
+
+ nv40->pipe.create_fs_state = nv40_fp_state_create;
+ nv40->pipe.bind_fs_state = nv40_fp_state_bind;
+ nv40->pipe.delete_fs_state = nv40_fp_state_delete;
+
+ nv40->pipe.set_blend_color = nv40_set_blend_color;
+ nv40->pipe.set_clip_state = nv40_set_clip_state;
+ nv40->pipe.set_clear_color_state = nv40_set_clear_color_state;
+ nv40->pipe.set_constant_buffer = nv40_set_constant_buffer;
+// nv40->pipe.set_feedback_state = nv40_set_feedback_state;
+ nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state;
+ nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple;
+ nv40->pipe.set_sampler_units = nv40_set_sampler_units;
+ nv40->pipe.set_scissor_state = nv40_set_scissor_state;
+ nv40->pipe.set_texture_state = nv40_set_texture_state;
+ nv40->pipe.set_viewport_state = nv40_set_viewport_state;
+
+ nv40->pipe.set_vertex_buffer = nv40_set_vertex_buffer;
+ nv40->pipe.set_vertex_element = nv40_set_vertex_element;
+
+// nv40->pipe.set_feedback_buffer = nv40_set_feedback_buffer;
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_state.h b/src/mesa/pipe/nv40/nv40_state.h
new file mode 100644
index 0000000000..1535037f63
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_state.h
@@ -0,0 +1,173 @@
+#ifndef __NV40_STATE_H__
+#define __NV40_STATE_H__
+
+#include "pipe/p_state.h"
+
+struct nv40_alpha_test_state {
+ uint32_t enabled;
+ uint32_t func;
+ uint32_t ref;
+};
+
+struct nv40_blend_state {
+ uint32_t b_enable;
+ uint32_t b_srcfunc;
+ uint32_t b_dstfunc;
+ uint32_t b_eqn;
+
+ uint32_t l_enable;
+ uint32_t l_op;
+
+ uint32_t c_mask;
+
+ uint32_t d_enable;
+};
+
+struct nv40_sampler_state {
+ uint32_t wrap;
+ uint32_t filt;
+ uint32_t bcol;
+};
+
+struct nv40_rasterizer_state {
+ uint32_t shade_model;
+
+ uint32_t line_width;
+ uint32_t line_smooth_en;
+ uint32_t line_stipple_en;
+ uint32_t line_stipple;
+
+ uint32_t point_size;
+
+ uint32_t poly_smooth_en;
+ uint32_t poly_stipple_en;
+
+ uint32_t poly_mode_front;
+ uint32_t poly_mode_back;
+
+ uint32_t front_face;
+ uint32_t cull_face;
+ uint32_t cull_face_en;
+
+};
+
+struct nv40_vertex_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ boolean on_hw;
+ int start_ip;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct {
+ int pipe_id;
+ int hw_id;
+ float value[4];
+ } consts[256];
+ int num_consts;
+
+ uint32_t ir;
+ uint32_t or;
+};
+
+struct nv40_fragment_program {
+ const struct pipe_shader_state *pipe;
+
+ boolean translated;
+ boolean on_hw;
+
+ uint32_t *insn;
+ int insn_len;
+
+ struct {
+ int pipe_id;
+ int hw_id;
+ } consts[256];
+ int num_consts;
+
+ struct pipe_buffer_handle *buffer;
+
+ boolean uses_kil;
+ boolean writes_depth;
+ int num_regs;
+};
+
+struct nv40_depth_push {
+ uint32_t func;
+ uint32_t write_enable;
+ uint32_t test_enable;
+};
+
+struct nv40_stencil_push {
+ uint32_t enable;
+ uint32_t wmask;
+ uint32_t func;
+ uint32_t ref;
+ uint32_t vmask;
+ uint32_t fail;
+ uint32_t zfail;
+ uint32_t zpass;
+};
+
+struct nv40_depth_stencil_state {
+ struct nv40_depth_push depth;
+ union {
+ struct nv40_stencil_push back;
+ struct nv40_stencil_push front;
+ } stencil;
+};
+
+static INLINE unsigned
+nv40_tex_wrap_mode(unsigned wrap) {
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return NV40TCL_TEX_WRAP_S_REPEAT;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_CLAMP:
+ return NV40TCL_TEX_WRAP_S_CLAMP;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP;
+ default:
+ return NV40TCL_TEX_WRAP_S_REPEAT;
+ }
+}
+
+static INLINE unsigned
+nv40_tex_filter(unsigned f0, unsigned f1) {
+ switch (f0) {
+ case PIPE_TEX_FILTER_NEAREST:
+ switch (f1) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ return NV40TCL_TEX_FILTER_MIN_NEAREST;
+ }
+ case PIPE_TEX_FILTER_LINEAR:
+ default:
+ switch (f1) {
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ return NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ return NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR;
+ case PIPE_TEX_MIPFILTER_NONE:
+ default:
+ return NV40TCL_TEX_FILTER_MIN_LINEAR;
+ }
+ }
+}
+
+#endif
diff --git a/src/mesa/pipe/nv40/nv40_state_emit.c b/src/mesa/pipe/nv40/nv40_state_emit.c
new file mode 100644
index 0000000000..a29c70538f
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_state_emit.c
@@ -0,0 +1,112 @@
+#include "nv40_context.h"
+#include "nv40_dma.h"
+#include "nv40_state.h"
+
+static INLINE void
+nv40_state_update_fragprog(struct nv40_context *nv40)
+{
+ struct pipe_context *pipe = (struct pipe_context *)nv40;
+ struct nv40_fragment_program *fp = nv40->fragprog.fp;
+ float *map;
+ int i;
+
+ if (!fp->translated)
+ nv40_fragprog_translate(nv40, fp);
+
+ if (fp->num_consts) {
+ map = pipe->winsys->buffer_map(pipe->winsys,
+ nv40->fragprog.constant_buf,
+ PIPE_BUFFER_FLAG_READ);
+ for (i = 0; i < fp->num_consts; i++) {
+ uint pid = fp->consts[i].pipe_id;
+
+ if (pid == -1)
+ continue;
+
+ if (!memcmp(&fp->insn[fp->consts[i].hw_id], &map[pid*4],
+ 4 * sizeof(float)))
+ continue;
+
+ memcpy(&fp->insn[fp->consts[i].hw_id], &map[pid*4],
+ 4 * sizeof(float));
+ fp->on_hw = 0;
+ }
+ pipe->winsys->buffer_unmap(pipe->winsys,
+ nv40->fragprog.constant_buf);
+ }
+}
+
+static INLINE void
+nv40_state_update_vertprog(struct nv40_context *nv40)
+{
+ struct pipe_context *pipe = (struct pipe_context *)nv40;
+ struct nv40_vertex_program *vp = nv40->vertprog.vp;
+ float *map;
+ int i;
+
+ if (!nv40->vertprog.vp->translated)
+ nv40_vertprog_translate(nv40, nv40->vertprog.vp);
+
+ if (vp->num_consts) {
+ map = pipe->winsys->buffer_map(pipe->winsys,
+ nv40->vertprog.constant_buf,
+ PIPE_BUFFER_FLAG_READ);
+ for (i = 0; i < vp->num_consts; i++) {
+ uint pid = vp->consts[i].pipe_id;
+
+ if (pid >= 0) {
+ if (!memcmp(vp->consts[i].value, &map[pid*4],
+ 4 * sizeof(float)))
+ continue;
+ memcpy(vp->consts[i].value, &map[pid*4],
+ 4 * sizeof(float));
+ }
+
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+ OUT_RING (vp->consts[i].hw_id);
+ OUT_RINGp ((uint32_t *)vp->consts[i].value, 4);
+ }
+ pipe->winsys->buffer_unmap(pipe->winsys,
+ nv40->vertprog.constant_buf);
+ }
+}
+
+void
+nv40_emit_hw_state(struct nv40_context *nv40)
+{
+ if (nv40->dirty & NV40_NEW_FRAGPROG) {
+ struct nv40_fragment_program *cur = nv40->fragprog.fp;
+
+ nv40_state_update_fragprog(nv40);
+
+ if (cur->on_hw)
+ nv40->dirty &= ~NV40_NEW_FRAGPROG;
+
+ if (!cur->on_hw || cur != nv40->fragprog.active_fp)
+ nv40_fragprog_bind(nv40, cur);
+ }
+
+ if (nv40->dirty & NV40_NEW_TEXTURE)
+ nv40_state_tex_update(nv40);
+
+ if (nv40->dirty & (NV40_NEW_TEXTURE | NV40_NEW_FRAGPROG)) {
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
+ nv40->dirty &= ~(NV40_NEW_TEXTURE | NV40_NEW_FRAGPROG);
+ }
+
+ if (nv40->dirty & NV40_NEW_VERTPROG) {
+ nv40_state_update_vertprog(nv40);
+ if (nv40->vertprog.vp != nv40->vertprog.active_vp)
+ nv40_vertprog_bind(nv40, nv40->vertprog.vp);
+ nv40->dirty &= ~NV40_NEW_VERTPROG;
+ }
+
+ if (nv40->dirty & NV40_NEW_ARRAYS) {
+ nv40_vbo_arrays_update(nv40);
+ nv40->dirty &= ~NV40_NEW_ARRAYS;
+ }
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_state_tex.c b/src/mesa/pipe/nv40/nv40_state_tex.c
new file mode 100644
index 0000000000..a92d6250a2
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_state_tex.c
@@ -0,0 +1,140 @@
+#include "nv40_context.h"
+#include "nv40_dma.h"
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
+{ \
+ TRUE, \
+ PIPE_FORMAT_##m, \
+ NV40TCL_TEX_FORMAT_FORMAT_##tf, \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x << NV40TCL_TEX_SWIZZLE_S0_X_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0y << NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0z << NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S0_X_##ts0w << NV40TCL_TEX_SWIZZLE_S0_W_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S1_X_##ts1x << NV40TCL_TEX_SWIZZLE_S1_X_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S1_X_##ts1y << NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S1_X_##ts1z << NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT) | \
+ (NV40TCL_TEX_SWIZZLE_S1_X_##ts1w << NV40TCL_TEX_SWIZZLE_S1_W_SHIFT), \
+}
+
+struct nv40_texture_format {
+ boolean defined;
+ uint pipe;
+ int format;
+ int swizzle;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+ _(U_A8_R8_G8_B8, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W),
+ _(U_A1_R5_G5_B5, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W),
+ _(U_A4_R4_G4_B4, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W),
+ _(U_R5_G6_B5 , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W),
+ _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X),
+ _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X),
+ _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X),
+ _(U_A8_L8 , A8L8 , S1, S1, S1, S1, Z, W, X, Y),
+// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00),
+// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00),
+// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00),
+// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00),
+};
+
+static struct nv40_texture_format *
+nv40_tex_format(uint pipe_format)
+{
+ struct nv40_texture_format *tf = nv40_texture_formats;
+
+ while (tf->defined) {
+ if (tf->pipe == pipe_format)
+ return tf;
+ tf++;
+ }
+
+ return NULL;
+}
+
+static INLINE int
+nv40_tex_dims(uint pipe_target)
+{
+ switch (pipe_target) {
+ case PIPE_TEXTURE_1D: return 1;
+ case PIPE_TEXTURE_2D: return 2;
+ case PIPE_TEXTURE_3D: return 3;
+ case PIPE_TEXTURE_CUBE: return 2;
+ default:
+ NOUVEAU_ERR("AII unknown pipe target: %d\n", pipe_target);
+ return 2;
+ }
+}
+
+static void
+nv40_tex_unit_enable(struct nv40_context *nv40, int unit)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct nv40_sampler_state *ps = nv40->tex_sampler[unit];
+ struct pipe_mipmap_tree *mt = nv40->tex_miptree[unit];
+ struct nv40_texture_format *tf;
+ uint32_t txf, txs, txp;
+ int swizzled = 0; /*XXX: implement in region code? */
+
+ tf = nv40_tex_format(mt->format);
+ if (!tf->defined) {
+ NOUVEAU_ERR("Unsupported texture format: 0x%x\n", mt->format);
+ return;
+ }
+
+ txf = (tf->format | 0x80) << NV40TCL_TEX_FORMAT_FORMAT_SHIFT;
+ txf |= ((mt->last_level - mt->first_level + 1) <<
+ NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+ if (1) /* XXX */
+ txf |= NV40TCL_TEX_FORMAT_NO_BORDER;
+
+ txf |= (nv40_tex_dims(mt->target) << NV40TCL_TEX_FORMAT_DIMS_SHIFT);
+ if (0) /*XXX*/
+ txf |= NV40TCL_TEX_FORMAT_RECT;
+
+ if (swizzled) {
+ txp = 0;
+ } else {
+ txp = mt->pitch * mt->cpp;
+ txf |= NV40TCL_TEX_FORMAT_LINEAR;
+ }
+
+ txs = tf->swizzle;
+ if (mt->format == PIPE_FORMAT_U_A8_L8)
+ txs |= (1<<16); /*nfi*/
+
+ BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8);
+ OUT_RELOCl(mt->region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_RD);
+ OUT_RELOCd(mt->region->buffer, txf, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
+ NOUVEAU_BO_OR | NOUVEAU_BO_RD, NV40TCL_TEX_FORMAT_DMA0,
+ NV40TCL_TEX_FORMAT_DMA1);
+ OUT_RING (ps->wrap);
+ OUT_RING (NV40TCL_TEX_ENABLE_ENABLE |
+ (0x00078000) /* mipmap related? */);
+ OUT_RING (txs);
+ OUT_RING (ps->filt | 0x3fd6 /*voodoo*/);
+ OUT_RING ((mt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | mt->height0);
+ OUT_RING (ps->bcol);
+ BEGIN_RING(curie, NV40TCL_TEX_SIZE1(unit), 1);
+ OUT_RING ((mt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+}
+
+void
+nv40_state_tex_update(struct nv40_context *nv40)
+{
+ while (nv40->tex_dirty) {
+ int unit = ffs(nv40->tex_dirty) - 1;
+
+ if (nv40->tex_miptree[unit]) {
+ nv40_tex_unit_enable(nv40, unit);
+ } else {
+ BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1);
+ OUT_RING (0);
+ }
+
+ nv40->tex_dirty &= ~(1 << unit);
+ }
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_surface.c b/src/mesa/pipe/nv40/nv40_surface.c
new file mode 100644
index 0000000000..84e0d79268
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_surface.c
@@ -0,0 +1,229 @@
+
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv40_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_winsys.h"
+#include "pipe/p_inlines.h"
+
+
+#define CLIP_TILE \
+ do { \
+ if (x >= ps->width) \
+ return; \
+ if (y >= ps->height) \
+ return; \
+ if (x + w > ps->width) \
+ w = ps->width - x; \
+ if (y + h > ps->height) \
+ h = ps->height -y; \
+ } while(0)
+
+
+/**
+ * Note: this is exactly like a8r8g8b8_get_tile() in sp_surface.c
+ * Share it someday.
+ */
+static void
+nv40_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h, float *p)
+{
+ const unsigned *src
+ = ((const unsigned *) (ps->region->map + ps->offset))
+ + y * ps->region->pitch + x;
+ unsigned i, j;
+ unsigned w0 = w;
+
+ CLIP_TILE;
+
+ switch (ps->format) {
+ case PIPE_FORMAT_U_A8_R8_G8_B8:
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++) {
+ const unsigned pixel = src[j];
+ pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
+ pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
+ pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
+ pRow += 4;
+ }
+ src += ps->region->pitch;
+ p += w0 * 4;
+ }
+ break;
+ case PIPE_FORMAT_S8_Z24:
+ {
+ const float scale = 1.0 / (float) 0xffffff;
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++) {
+ const unsigned pixel = src[j];
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (pixel & 0xffffff) * scale;
+ pRow += 4;
+ }
+ src += ps->region->pitch;
+ p += w0 * 4;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+nv40_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h, const float *p)
+{
+ /* TODO */
+ assert(0);
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static void
+nv40_get_tile(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ void *p, int dst_stride)
+{
+ const uint cpp = ps->region->cpp;
+ const uint w0 = w;
+ const ubyte *pSrc;
+ ubyte *pDest;
+ uint i;
+
+ assert(ps->region->map);
+
+ CLIP_TILE;
+
+ if (dst_stride == 0) {
+ dst_stride = w0 * cpp;
+ }
+
+ pSrc = ps->region->map + ps->offset + (y * ps->region->pitch + x) * cpp;
+ pDest = (ubyte *) p;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w0 * cpp);
+ pDest += dst_stride;
+ pSrc += ps->region->pitch * cpp;
+ }
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static void
+nv40_put_tile(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const void *p, int src_stride)
+{
+ const uint cpp = ps->region->cpp;
+ const uint w0 = w;
+ const ubyte *pSrc;
+ ubyte *pDest;
+ uint i;
+
+ assert(ps->region->map);
+
+ CLIP_TILE;
+
+ if (src_stride == 0) {
+ src_stride = w0 * cpp;
+ }
+
+ pSrc = (const ubyte *) p;
+ pDest = ps->region->map + ps->offset + (y * ps->region->pitch + x) * cpp;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w0 * cpp);
+ pDest += ps->region->pitch * cpp;
+ pSrc += src_stride;
+ }
+}
+
+
+/*
+ * XXX note: same as code in sp_surface.c
+ */
+static struct pipe_surface *
+nv40_get_tex_surface(struct pipe_context *pipe,
+ struct pipe_mipmap_tree *mt,
+ unsigned face, unsigned level, unsigned zslice)
+{
+ struct pipe_surface *ps;
+ unsigned offset; /* in bytes */
+
+ offset = mt->level[level].level_offset;
+
+ if (mt->target == PIPE_TEXTURE_CUBE) {
+ offset += mt->level[level].image_offset[face] * mt->cpp;
+ }
+ else if (mt->target == PIPE_TEXTURE_3D) {
+ offset += mt->level[level].image_offset[zslice] * mt->cpp;
+ }
+ else {
+ assert(face == 0);
+ assert(zslice == 0);
+ }
+
+ ps = pipe->winsys->surface_alloc(pipe->winsys, mt->format);
+ if (ps) {
+ assert(ps->format);
+ assert(ps->refcount);
+ pipe_region_reference(&ps->region, mt->region);
+ ps->width = mt->level[level].width;
+ ps->height = mt->level[level].height;
+ ps->offset = offset;
+ }
+ return ps;
+}
+
+
+void
+nv40_init_surface_functions(struct nv40_context *nv40)
+{
+ nv40->pipe.get_tex_surface = nv40_get_tex_surface;
+ nv40->pipe.get_tile = nv40_get_tile;
+ nv40->pipe.put_tile = nv40_put_tile;
+ nv40->pipe.get_tile_rgba = nv40_get_tile_rgba;
+ nv40->pipe.put_tile_rgba = nv40_put_tile_rgba;
+}
diff --git a/src/mesa/pipe/nv40/nv40_vbo.c b/src/mesa/pipe/nv40/nv40_vbo.c
new file mode 100644
index 0000000000..aa930476b6
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_vbo.c
@@ -0,0 +1,222 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+#include "nv40_state.h"
+#include "nvgl_pipe.h"
+
+boolean
+nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
+ unsigned count)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ unsigned nr;
+
+ if (nv40->dirty)
+ nv40_emit_hw_state(nv40);
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ nr = (count & 0xff);
+ if (nr) {
+ BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
+ OUT_RING (((nr - 1) << 24) | start);
+ start += nr;
+ }
+
+ /*XXX: large arrays (nr>2047) will blow up */
+ nr = count >> 8;
+ if (nr) {
+ assert (nr <= 2047);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, nr);
+ while (nr--) {
+ OUT_RING(((0x100 - 1) << 24) | start);
+ start += 0x100;
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ pipe->flush(pipe, PIPE_FLUSH_WAIT);
+ return TRUE;
+}
+
+static INLINE void
+nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
+ unsigned start, unsigned count)
+{
+ uint8_t *elts = (uint8_t *)ib + start;
+ int push, i;
+
+ if (count & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; count--;
+ }
+
+ while (count) {
+ push = MIN2(count, 2046);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
+ unsigned start, unsigned count)
+{
+ uint16_t *elts = (uint16_t *)ib + start;
+ int push, i;
+
+ if (count & 1) {
+ BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
+ OUT_RING (elts[0]);
+ elts++; count--;
+ }
+
+ while (count) {
+ push = MIN2(count, 2046);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push);
+ for (i = 0; i < push; i+=2)
+ OUT_RING((elts[i+1] << 16) | elts[i]);
+
+ count -= push;
+ elts += push;
+ }
+}
+
+static INLINE void
+nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
+ unsigned start, unsigned count)
+{
+ uint32_t *elts = (uint32_t *)ib + start;
+ int push;
+
+ while (count) {
+ push = MIN2(count, 2047);
+
+ BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
+ OUT_RINGp (elts, push);
+
+ count -= push;
+ elts += push;
+ }
+}
+
+boolean
+nv40_draw_elements(struct pipe_context *pipe,
+ struct pipe_buffer_handle *indexBuffer, unsigned indexSize,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = (struct nv40_context *)pipe;
+ void *ib;
+
+ if (nv40->dirty)
+ nv40_emit_hw_state(nv40);
+
+ ib = pipe->winsys->buffer_map(pipe->winsys, indexBuffer,
+ PIPE_BUFFER_FLAG_READ);
+ if (!ib) {
+ NOUVEAU_ERR("Couldn't map index buffer!!\n");
+ return FALSE;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (nvgl_primitive(mode));
+
+ switch (indexSize) {
+ case 1:
+ nv40_draw_elements_u08(nv40, ib, start, count);
+ break;
+ case 2:
+ nv40_draw_elements_u16(nv40, ib, start, count);
+ break;
+ case 4:
+ nv40_draw_elements_u32(nv40, ib, start, count);
+ break;
+ default:
+ NOUVEAU_ERR("unsupported elt size %d\n", indexSize);
+ break;
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (0);
+
+ pipe->winsys->buffer_unmap(pipe->winsys, ib);
+ pipe->flush(pipe, PIPE_FLUSH_WAIT);
+ return TRUE;
+}
+
+static INLINE int
+nv40_vbo_format_to_ncomp(uint format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R32G32B32A32_FLOAT: return 4;
+ case PIPE_FORMAT_R32G32B32_FLOAT: return 3;
+ case PIPE_FORMAT_R32G32_FLOAT: return 2;
+ case PIPE_FORMAT_R32_FLOAT: return 1;
+ default:
+ NOUVEAU_ERR("AII, unknown vbo format %d\n", format);
+ return 1;
+ }
+}
+
+void
+nv40_vbo_arrays_update(struct nv40_context *nv40)
+{
+ struct nouveau_winsys *nvws = nv40->nvws;
+ struct nv40_vertex_program *vp = nv40->vertprog.vp;
+ uint32_t inputs, vtxfmt[16];
+ int hw, num_hw;
+
+ inputs = vp->ir;
+ for (hw = 0; hw < 16 && inputs; hw++) {
+ if (inputs & (1 << hw)) {
+ num_hw = hw;
+ inputs &= ~(1 << hw);
+ }
+ }
+ num_hw++;
+
+ inputs = vp->ir;
+ BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw);
+ for (hw = 0; hw < num_hw; hw++) {
+ struct pipe_vertex_element *ve;
+ struct pipe_vertex_buffer *vb;
+
+ if (!(inputs & (1 << hw))) {
+ OUT_RING(0);
+ vtxfmt[hw] = NV40TCL_VTXFMT_TYPE_FLOAT;
+ continue;
+ }
+
+ ve = &nv40->vtxelt[hw];
+ vb = &nv40->vtxbuf[ve->vertex_buffer_index];
+
+ OUT_RELOC(vb->buffer, vb->buffer_offset + ve->src_offset,
+ NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
+ NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0,
+ NV40TCL_VTXBUF_ADDRESS_DMA1);
+ vtxfmt[hw] = ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) |
+ (nv40_vbo_format_to_ncomp(ve->src_format) <<
+ NV40TCL_VTXFMT_SIZE_SHIFT) |
+ NV40TCL_VTXFMT_TYPE_FLOAT);
+ }
+
+ BEGIN_RING(curie, 0x1710, 1);
+ OUT_RING (0); /* vtx cache flush */
+ BEGIN_RING(curie, NV40TCL_VTXFMT(0), num_hw);
+ OUT_RINGp (vtxfmt, num_hw);
+}
+
diff --git a/src/mesa/pipe/nv40/nv40_vertprog.c b/src/mesa/pipe/nv40/nv40_vertprog.c
new file mode 100644
index 0000000000..be550e4743
--- /dev/null
+++ b/src/mesa/pipe/nv40/nv40_vertprog.c
@@ -0,0 +1,594 @@
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "pipe/tgsi/exec/tgsi_token.h"
+#include "pipe/tgsi/exec/tgsi_parse.h"
+
+#include "nv40_context.h"
+#include "nv40_dma.h"
+#include "nv40_state.h"
+
+#define SWZ_X 0
+#define SWZ_Y 1
+#define SWZ_Z 2
+#define SWZ_W 3
+#define MASK_X 8
+#define MASK_Y 4
+#define MASK_Z 2
+#define MASK_W 1
+#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
+#define DEF_SCALE 0
+#define DEF_CTEST 0
+#include "nv40_shader.h"
+
+#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
+#define neg(s) nv40_sr_neg((s))
+#define abs(s) nv40_sr_abs((s))
+
+static uint32_t
+passthrough_vp_data[] = {
+ 0x40041c6c, 0x0040010d, 0x8106c083, 0x6041ff84,
+ 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff81,
+};
+
+static struct nv40_vertex_program
+passthrough_vp = {
+ .pipe = NULL,
+ .translated = TRUE,
+
+ .insn = passthrough_vp_data,
+ .insn_len = sizeof(passthrough_vp_data) / sizeof(uint32_t),
+
+ .ir = 0x00000003,
+ .or = 0x00000001,
+};
+
+struct nv40_vpc {
+ struct nv40_vertex_program *vp;
+
+ uint output_map[PIPE_MAX_SHADER_OUTPUTS];
+
+ int high_temp;
+ int temp_temp_count;
+};
+
+static INLINE struct nv40_sreg
+nv40_sr_temp(struct nv40_vpc *vpc)
+{
+ int idx;
+
+ idx = vpc->temp_temp_count++;
+ idx += vpc->high_temp;
+ return nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP, idx);
+}
+
+static INLINE struct nv40_sreg
+nv40_sr_const(struct nv40_vpc *vpc, int pipe,
+ float x, float y, float z, float w)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ int idx = vp->num_consts;
+
+ vp->consts[idx].pipe_id = pipe;
+ vp->consts[idx].hw_id = idx;
+ vp->consts[idx].value[0] = x;
+ vp->consts[idx].value[1] = y;
+ vp->consts[idx].value[2] = z;
+ vp->consts[idx].value[3] = w;
+ vp->num_consts++;
+
+ return nv40_sr(0, NV40_VP_SRC_REG_TYPE_CONST, idx);
+}
+
+#define arith(cc,s,o,d,m,s0,s1,s2) \
+ nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2))
+#define temp(vpc) nv40_sr_temp((vpc))
+#define constant(v,p,x,y,z,w) nv40_sr_const((v), (p), (x), (y), (z), (w))
+
+static void
+emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t sr = 0;
+
+ sr |= (src.type << NV40_VP_SRC_REG_TYPE_SHIFT);
+ if (src.type == NV40_VP_SRC_REG_TYPE_INPUT) {
+ vp->ir |= (1 << src.index);
+ hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT);
+ } else
+ if (src.type == NV40_VP_SRC_REG_TYPE_CONST) {
+ hw[1] |= (src.index << NV40_VP_INST_CONST_SRC_SHIFT);
+ } else {
+ sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT);
+ }
+
+ if (src.negate)
+ sr |= NV40_VP_SRC_NEGATE;
+
+ if (src.abs)
+ hw[0] |= (1 << (21 + pos));
+
+ sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) |
+ (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) |
+ (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) |
+ (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT));
+
+ switch (pos) {
+ case 0:
+ hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >>
+ NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT;
+ hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) <<
+ NV40_VP_INST_SRC0L_SHIFT;
+ break;
+ case 1:
+ hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT;
+ break;
+ case 2:
+ hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >>
+ NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT;
+ hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) <<
+ NV40_VP_INST_SRC2L_SHIFT;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+static void
+emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+
+ if (dst.output == 0) {
+ hw[3] |= NV40_VP_INST_DEST_MASK;
+ if (slot == 0) {
+ hw[0] |= (dst.index <<
+ NV40_VP_INST_VEC_DEST_TEMP_SHIFT);
+ } else {
+ hw[3] |= (dst.index <<
+ NV40_VP_INST_SCA_DEST_TEMP_SHIFT);
+ }
+ } else {
+ switch (dst.index) {
+ case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break;
+ case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break;
+ case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break;
+ case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break;
+ case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break;
+ case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break;
+ case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break;
+ case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break;
+ case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break;
+ case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break;
+ case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break;
+ case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break;
+ case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break;
+ case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break;
+ default:
+ break;
+ }
+
+ hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT);
+ if (slot == 0) {
+ hw[0] |= NV40_VP_INST_VEC_RESULT;
+ hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK;
+ } else {
+ hw[3] |= NV40_VP_INST_SCA_RESULT;
+ hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK;
+ }
+ }
+}
+
+static void
+nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op,
+ struct nv40_sreg dst, int mask,
+ struct nv40_sreg s0, struct nv40_sreg s1,
+ struct nv40_sreg s2)
+{
+ struct nv40_vertex_program *vp = vpc->vp;
+ uint32_t *hw = &vp->insn[vp->insn_len];
+
+ hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT);
+ hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) |
+ (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) |
+ (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) |
+ (3 << NV40_VP_INST_COND_SWZ_W_SHIFT));
+
+ if (slot == 0) {
+ hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT);
+ hw[3] |= (NV40_VP_INST_SCA_RESULT |
+ NV40_VP_INST_SCA_DEST_TEMP_MASK);
+ hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT);
+ } else {
+ hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT);
+ hw[1] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20));
+ hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT);
+ }
+
+ emit_dst(vpc, hw, slot, dst);
+ emit_src(vpc, hw, 0, s0);
+ emit_src(vpc, hw, 1, s1);
+ emit_src(vpc, hw, 2, s2);
+
+ vp->insn_len += 4;
+}
+
+static INLINE struct nv40_sreg
+tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
+ struct nv40_sreg src;
+
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT,
+ fsrc->SrcRegister.Index);
+ break;
+ case TGSI_FILE_CONSTANT:
+ src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+ break;
+ case TGSI_FILE_TEMPORARY:
+ if (vpc->high_temp < fsrc->SrcRegister.Index)
+ vpc->high_temp = fsrc->SrcRegister.Index;
+ src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP,
+ fsrc->SrcRegister.Index);
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ break;
+ }
+
+ src.abs = fsrc->SrcRegisterExtMod.Absolute;
+ src.negate = fsrc->SrcRegister.Negate;
+ src.swz[0] = fsrc->SrcRegister.SwizzleX;
+ src.swz[1] = fsrc->SrcRegister.SwizzleY;
+ src.swz[2] = fsrc->SrcRegister.SwizzleZ;
+ src.swz[3] = fsrc->SrcRegister.SwizzleW;
+ return src;
+}
+
+static INLINE struct nv40_sreg
+tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
+ uint out, idx;
+
+ switch (fdst->DstRegister.File) {
+ case TGSI_FILE_OUTPUT:
+ out = 1;
+ idx = vpc->output_map[fdst->DstRegister.Index];
+ break;
+ case TGSI_FILE_TEMPORARY:
+ out = 0;
+ idx = fdst->DstRegister.Index;
+ if (vpc->high_temp < idx)
+ vpc->high_temp = idx;
+ break;
+ default:
+ NOUVEAU_ERR("bad dst file\n");
+ break;
+ }
+
+ return nv40_sr(out, NV40_VP_SRC_REG_TYPE_TEMP, idx);
+}
+
+static INLINE int
+tgsi_mask(uint tgsi)
+{
+ int mask = 0;
+
+ if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
+ if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
+ if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
+ if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
+ return mask;
+}
+
+static boolean
+nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
+ const struct tgsi_full_instruction *finst)
+{
+ struct nv40_sreg src[3], dst, tmp;
+ struct nv40_sreg none = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT, 0);
+ int mask;
+ int ai = -1, ci = -1;
+ int i;
+
+ if (finst->Instruction.Opcode == TGSI_OPCODE_RET)
+ return TRUE;
+
+ vpc->temp_temp_count = 0;
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ src[i] = tgsi_src(vpc, fsrc);
+ }
+ }
+
+ for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
+ const struct tgsi_full_src_register *fsrc;
+
+ fsrc = &finst->FullSrcRegisters[i];
+ switch (fsrc->SrcRegister.File) {
+ case TGSI_FILE_INPUT:
+ if (ai == -1 || ai == fsrc->SrcRegister.Index) {
+ ai = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ NOUVEAU_MSG("extra src attr %d\n",
+ fsrc->SrcRegister.Index);
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ if (ci == -1 || ci == fsrc->SrcRegister.Index) {
+ ci = fsrc->SrcRegister.Index;
+ src[i] = tgsi_src(vpc, fsrc);
+ } else {
+ src[i] = temp(vpc);
+ arith(vpc, 0, OP_MOV, src[i], MASK_ALL,
+ tgsi_src(vpc, fsrc), none, none);
+ }
+ break;
+ case TGSI_FILE_TEMPORARY:
+ /* handled above */
+ break;
+ default:
+ NOUVEAU_ERR("bad src file\n");
+ return FALSE;
+ }
+ }
+
+ dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
+ mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+
+ switch (finst->Instruction.Opcode) {
+ case TGSI_OPCODE_ABS:
+ arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none);
+ break;
+ case TGSI_OPCODE_ADD:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]);
+ break;
+ case TGSI_OPCODE_ARL:
+ arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_DP3:
+ arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DP4:
+ arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DPH:
+ arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_DST:
+ arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_EX2:
+ arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_EXP:
+ arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_FLR:
+ arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_FRC:
+ arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_LG2:
+ arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LIT:
+ arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_LOG:
+ arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_MAD:
+ arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]);
+ break;
+ case TGSI_OPCODE_MAX:
+ arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MIN:
+ arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_MOV:
+ arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none);
+ break;
+ case TGSI_OPCODE_MUL:
+ arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_POW:
+ tmp = temp(vpc);
+ arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none,
+ swz(src[0], X, X, X, X));
+ arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
+ swz(src[1], X, X, X, X), none);
+ arith(vpc, 1, OP_EX2, dst, mask, none, none,
+ swz(tmp, X, X, X, X));
+ break;
+ case TGSI_OPCODE_RCP:
+ arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_RSQ:
+ arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]);
+ break;
+ case TGSI_OPCODE_SGE:
+ arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SLT:
+ arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none);
+ break;
+ case TGSI_OPCODE_SUB:
+ arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1]));
+ break;
+ case TGSI_OPCODE_XPD:
+ tmp = temp(vpc);
+ arith(vpc, 0, OP_MUL, tmp, mask,
+ swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
+ arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W),
+ swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
+ neg(tmp));
+ break;
+ default:
+ NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean
+nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
+ const struct tgsi_full_declaration *fdec)
+{
+ int hw;
+
+ switch (fdec->Semantic.SemanticName) {
+ case TGSI_SEMANTIC_POSITION:
+ hw = NV40_VP_INST_DEST_POS;
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_COL0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_COL1;
+ } else {
+ NOUVEAU_ERR("bad colour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (fdec->Semantic.SemanticIndex == 0) {
+ hw = NV40_VP_INST_DEST_BFC0;
+ } else
+ if (fdec->Semantic.SemanticIndex == 1) {
+ hw = NV40_VP_INST_DEST_BFC1;
+ } else {
+ NOUVEAU_ERR("bad bcolour semantic index\n");
+ return FALSE;
+ }
+ break;
+ case TGSI_SEMANTIC_FOG:
+ hw = NV40_VP_INST_DEST_FOGC;
+ break;
+ case TGSI_SEMANTIC_PSIZE:
+ hw = NV40_VP_INST_DEST_PSZ;
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (fdec->Semantic.SemanticIndex <= 7) {
+ hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+ } else {
+ NOUVEAU_ERR("bad generic semantic index\n");
+ return FALSE;
+ }
+ break;
+ default:
+ NOUVEAU_ERR("bad output semantic\n");
+ return FALSE;
+ }
+
+ vpc->output_map[fdec->u.DeclarationRange.First] = hw;
+ return TRUE;
+}
+
+void
+nv40_vertprog_translate(struct nv40_context *nv40,
+ struct nv40_vertex_program *vp)
+{
+ struct tgsi_parse_context parse;
+ struct nv40_vpc *vpc = NULL;
+
+ vpc = calloc(1, sizeof(struct nv40_vpc));
+ if (!vpc)
+ return;
+ vp->insn = calloc(1, 128*4*sizeof(uint32_t));
+ vpc->vp = vp;
+ vpc->high_temp = -1;
+
+ tgsi_parse_init(&parse, vp->pipe->tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ const struct tgsi_full_declaration *fdec;
+ fdec = &parse.FullToken.FullDeclaration;
+ switch (fdec->Declaration.File) {
+ case TGSI_FILE_OUTPUT:
+ if (!nv40_vertprog_parse_decl_output(vpc, fdec))
+ goto out_err;
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ const struct tgsi_full_instruction *finst;
+
+ finst = &parse.FullToken.FullInstruction;
+ if (!nv40_vertprog_parse_instruction(vpc, finst))
+ goto out_err;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->insn[vp->insn_len - 1] |= NV40_VP_INST_LAST;
+#if 0
+ {
+ int i;
+ for (i = 0; i < vp->insn_len; i++)
+ NOUVEAU_ERR("inst[%d] = 0x%08x\n", i, vp->insn[i]);
+ }
+#endif
+ vp->translated = TRUE;
+out_err:
+ tgsi_parse_free(&parse);
+ free(vpc);
+}
+
+void
+nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp)
+{
+ int i;
+
+ if (!vp->translated) {
+ NOUVEAU_ERR("vtxprog invalid, using passthrough shader\n");
+ vp = &passthrough_vp;
+ }
+
+ if (!vp->on_hw) {
+ if (nv40->vertprog.active_vp)
+ nv40->vertprog.active_vp->on_hw = FALSE;
+ vp->on_hw = TRUE;
+ vp->start_ip = 0;
+
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+ OUT_RING (vp->start_ip);
+ for (i = 0; i < vp->insn_len; i += 4) {
+ BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+ OUT_RINGp (&vp->insn[i], 4);
+ }
+ }
+
+ BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1);
+ OUT_RING (vp->start_ip);
+ BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2);
+ OUT_RING (vp->ir);
+ OUT_RING (vp->or);
+
+ nv40->vertprog.active_vp = vp;
+}
diff --git a/src/mesa/pipe/nv40/nvgl_pipe.h b/src/mesa/pipe/nv40/nvgl_pipe.h
new file mode 100644
index 0000000000..15ff318023
--- /dev/null
+++ b/src/mesa/pipe/nv40/nvgl_pipe.h
@@ -0,0 +1,198 @@
+#ifndef __NVGL_PIPE_H__
+#define __NVGL_PIPE_H__
+
+#include <GL/gl.h>
+
+static INLINE unsigned
+nvgl_blend_func(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return GL_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return GL_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return GL_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return GL_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return GL_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return GL_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return GL_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return GL_CONSTANT_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO:
+ return GL_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return GL_ONE_MINUS_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return GL_ONE_MINUS_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return GL_ONE_MINUS_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return GL_ONE_MINUS_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return GL_ONE_MINUS_CONSTANT_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return GL_ONE_MINUS_CONSTANT_ALPHA;
+ default:
+ return GL_ONE;
+ }
+}
+
+static INLINE unsigned
+nvgl_blend_eqn(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return GL_FUNC_ADD;
+ case PIPE_BLEND_SUBTRACT:
+ return GL_FUNC_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return GL_FUNC_REVERSE_SUBTRACT;
+ case PIPE_BLEND_MIN:
+ return GL_MIN;
+ case PIPE_BLEND_MAX:
+ return GL_MAX;
+ default:
+ return GL_FUNC_ADD;
+ }
+}
+
+static INLINE unsigned
+nvgl_logicop_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_LOGICOP_CLEAR:
+ return GL_CLEAR;
+ case PIPE_LOGICOP_NOR:
+ return GL_NOR;
+ case PIPE_LOGICOP_AND_INVERTED:
+ return GL_AND_INVERTED;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return GL_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE:
+ return GL_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT:
+ return GL_INVERT;
+ case PIPE_LOGICOP_XOR:
+ return GL_XOR;
+ case PIPE_LOGICOP_NAND:
+ return GL_NAND;
+ case PIPE_LOGICOP_AND:
+ return GL_AND;
+ case PIPE_LOGICOP_EQUIV:
+ return GL_EQUIV;
+ case PIPE_LOGICOP_NOOP:
+ return GL_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return GL_OR_INVERTED;
+ case PIPE_LOGICOP_COPY:
+ return GL_COPY;
+ case PIPE_LOGICOP_OR_REVERSE:
+ return GL_OR_REVERSE;
+ case PIPE_LOGICOP_OR:
+ return GL_OR;
+ case PIPE_LOGICOP_SET:
+ return GL_SET;
+ default:
+ return GL_CLEAR;
+ }
+}
+
+static INLINE unsigned
+nvgl_comparison_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_FUNC_NEVER:
+ return GL_NEVER;
+ case PIPE_FUNC_LESS:
+ return GL_LESS;
+ case PIPE_FUNC_EQUAL:
+ return GL_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return GL_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return GL_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return GL_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return GL_GEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return GL_ALWAYS;
+ default:
+ return GL_NEVER;
+ }
+}
+
+static INLINE unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_POLYGON_MODE_FILL:
+ return GL_FILL;
+ case PIPE_POLYGON_MODE_LINE:
+ return GL_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return GL_POINT;
+ default:
+ return GL_FILL;
+ }
+}
+
+static INLINE unsigned
+nvgl_stencil_op(unsigned op)
+{
+ switch (op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return GL_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return GL_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return GL_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return GL_INCR;
+ case PIPE_STENCIL_OP_DECR:
+ return GL_DECR;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return GL_INCR_WRAP;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return GL_DECR_WRAP;
+ case PIPE_STENCIL_OP_INVERT:
+ return GL_INVERT;
+ default:
+ return GL_KEEP;
+ }
+}
+
+static INLINE unsigned
+nvgl_primitive(unsigned prim) {
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return GL_POINTS + 1;
+ case PIPE_PRIM_LINES:
+ return GL_LINES + 1;
+ case PIPE_PRIM_LINE_LOOP:
+ return GL_LINE_LOOP + 1;
+ case PIPE_PRIM_LINE_STRIP:
+ return GL_LINE_STRIP + 1;
+ case PIPE_PRIM_TRIANGLES:
+ return GL_TRIANGLES + 1;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return GL_TRIANGLE_STRIP + 1;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return GL_TRIANGLE_FAN + 1;
+ case PIPE_PRIM_QUADS:
+ return GL_QUADS + 1;
+ case PIPE_PRIM_QUAD_STRIP:
+ return GL_QUAD_STRIP + 1;
+ case PIPE_PRIM_POLYGON:
+ return GL_POLYGON + 1;
+ default:
+ return GL_POINTS + 1;
+ }
+}
+
+#endif