From f9d09a2e7859a2cf025d71b7c3cb189edb6688c4 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 21 Feb 2010 15:13:12 +0100
Subject: nv30, nv40: move last files to nvfx/ and rm -rf nv30 nv40

This is the last nvfx unification patch.

nv[34]0_fragtex.c are moved to the common directory
nv[34]0_shader.h are renamed to nv[34]0_vertprog.h and moved to
the common directory

The separate nv30 and nv40 directories are removed from the build
system
---
 src/gallium/drivers/nvfx/Makefile        |   2 +
 src/gallium/drivers/nvfx/nv30_fragtex.c  | 147 +++++++++++++++++++++++++
 src/gallium/drivers/nvfx/nv30_vertprog.h | 169 +++++++++++++++++++++++++++++
 src/gallium/drivers/nvfx/nv40_fragtex.c  | 174 ++++++++++++++++++++++++++++++
 src/gallium/drivers/nvfx/nv40_vertprog.h | 177 +++++++++++++++++++++++++++++++
 src/gallium/drivers/nvfx/nvfx_draw.c     |   4 +-
 src/gallium/drivers/nvfx/nvfx_vertprog.c |   4 +-
 7 files changed, 673 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/drivers/nvfx/nv30_fragtex.c
 create mode 100644 src/gallium/drivers/nvfx/nv30_vertprog.h
 create mode 100644 src/gallium/drivers/nvfx/nv40_fragtex.c
 create mode 100644 src/gallium/drivers/nvfx/nv40_vertprog.h

(limited to 'src/gallium/drivers/nvfx')

diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index 51fa34cfad..e912177b21 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -9,6 +9,8 @@ C_SOURCES = \
 	nvfx_draw.c \
 	nvfx_fragprog.c \
 	nvfx_fragtex.c \
+	nv30_fragtex.c \
+	nv40_fragtex.c \
 	nvfx_miptree.c \
 	nvfx_query.c \
 	nvfx_screen.c \
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
new file mode 100644
index 0000000000..2b56f45492
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -0,0 +1,147 @@
+#include "util/u_format.h"
+
+#include "nvfx_context.h"
+#include "nouveau/nouveau_util.h"
+#include "nvfx_tex.h"
+
+void
+nv30_sampler_state_init(struct pipe_context *pipe,
+			  struct nvfx_sampler_state *ps,
+			  const struct pipe_sampler_state *cso)
+{
+	if (cso->max_anisotropy >= 8) {
+		ps->en |= NV34TCL_TX_ENABLE_ANISO_8X;
+	} else
+	if (cso->max_anisotropy >= 4) {
+		ps->en |= NV34TCL_TX_ENABLE_ANISO_4X;
+	} else
+	if (cso->max_anisotropy >= 2) {
+		ps->en |= NV34TCL_TX_ENABLE_ANISO_2X;
+	}
+
+	{
+		float limit;
+
+		limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+		ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+		limit = CLAMP(cso->max_lod, 0.0, 15.0);
+		ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/;
+
+		limit = CLAMP(cso->min_lod, 0.0, 15.0);
+		ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/;
+	}
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
+{                                                                              \
+  TRUE,                                                                        \
+  PIPE_FORMAT_##m,                                                             \
+  NV34TCL_TX_FORMAT_FORMAT_##tf,                                               \
+  (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y |           \
+   NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w |           \
+   NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y |           \
+   NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w)            \
+}
+
+struct nv30_texture_format {
+	boolean defined;
+	uint	pipe;
+	int     format;
+	int     swizzle;
+};
+
+static struct nv30_texture_format
+nv30_texture_formats[] = {
+	_(B8G8R8X8_UNORM, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
+	_(B8G8R8A8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	_(B5G5R5A1_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	_(B4G4R4A4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	_(B5G6R5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W),
+	_(L8_UNORM      , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X),
+	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X),
+	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X),
+	_(L8A8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y),
+	_(Z16_UNORM     , R5G6B5  ,   S1,   S1,   S1,  ONE, X, X, X, X),
+	_(S8Z24_UNORM   , A8R8G8B8,   S1,   S1,   S1,  ONE, X, X, X, X),
+	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W),
+	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	_(DXT5_RGBA     , DXT5    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
+	{},
+};
+
+static struct nv30_texture_format *
+nv30_fragtex_format(uint pipe_format)
+{
+	struct nv30_texture_format *tf = nv30_texture_formats;
+
+	while (tf->defined) {
+		if (tf->pipe == pipe_format)
+			return tf;
+		tf++;
+	}
+
+	NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+	return NULL;
+}
+
+
+struct nouveau_stateobj *
+nv30_fragtex_build(struct nvfx_context *nvfx, int unit)
+{
+	struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+	struct nvfx_miptree *nv30mt = nvfx->tex_miptree[unit];
+	struct pipe_texture *pt = &nv30mt->base;
+	struct nouveau_bo *bo = nouveau_bo(nv30mt->buffer);
+	struct nv30_texture_format *tf;
+	struct nouveau_stateobj *so;
+	uint32_t txf, txs;
+	unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+	tf = nv30_fragtex_format(pt->format);
+	if (!tf)
+		return NULL;
+
+	txf  = tf->format;
+	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
+	txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+	txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+	txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+	txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
+
+	switch (pt->target) {
+	case PIPE_TEXTURE_CUBE:
+		txf |= NV34TCL_TX_FORMAT_CUBIC;
+		/* fall-through */
+	case PIPE_TEXTURE_2D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+		break;
+	case PIPE_TEXTURE_3D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+		break;
+	case PIPE_TEXTURE_1D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+		break;
+	default:
+		NOUVEAU_ERR("Unknown target %d\n", pt->target);
+		return NULL;
+	}
+
+	txs = tf->swizzle;
+
+	so = so_new(1, 8, 2);
+	so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
+	so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+	so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
+		      NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+	so_data  (so, ps->wrap);
+	so_data  (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
+	so_data  (so, txs);
+	so_data  (so, ps->filt | 0x2000 /*voodoo*/);
+	so_data  (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+		       pt->height0);
+	so_data  (so, ps->bcol);
+
+	return so;
+}
diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h
new file mode 100644
index 0000000000..ec0444c07f
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv30_vertprog.h
@@ -0,0 +1,169 @@
+#ifndef __NV30_SHADER_H__
+#define __NV30_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * 128bit opcodes, split into 4 32-bit ones for ease of use.
+ *
+ * Non-native instructions
+ *   ABS - MOV + NV40_VP_INST0_DEST_ABS
+ *   POW - EX2 + MUL + LG2
+ *   SUB - ADD, second source negated
+ *   SWZ - MOV
+ *   XPD -
+ *
+ * Register access
+ *   - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
+ *   - Only one CONST can be accessed per-instruction (move extras into TEMPs)
+ *
+ * Relative Addressing
+ *   According to the value returned for
+ *   MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
+ *
+ *   there are only two address registers available.  The destination in the
+ *   ARL instruction is set to TEMP <n> (The temp isn't actually written).
+ *
+ *   When using vanilla ARB_v_p, the proprietary driver will squish both the
+ *   available ADDRESS regs into the first hardware reg in the X and Y
+ *   components.
+ *
+ *   To use an address reg as an index into consts, the CONST_SRC is set to
+ *   (const_base + offset) and INDEX_CONST is set.
+ *
+ *   To access the second address reg use ADDR_REG_SELECT_1. A particular
+ *   component of the address regs is selected with ADDR_SWZ.
+ *
+ *   Only one address register can be accessed per instruction.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details) Conditional
+ * execution of an instruction is enabled by setting COND_TEST_ENABLE, and
+ * selecting the condition which will allow the test to pass with
+ * COND_{FL,LT,...}.  It is possible to swizzle the values in the condition
+ * register, which allows for testing against an individual component.
+ *
+ * Branching:
+ *
+ *   The BRA/CAL instructions seem to follow a slightly different opcode
+ *   layout.  The destination instruction ID (IADDR) overlaps a source field.
+ *   Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO
+ *   command, and is incremented automatically on each UPLOAD_INST FIFO
+ *   command.
+ *
+ *   Conditional branching is achieved by using the condition tests described
+ *   above.  There doesn't appear to be dedicated looping instructions, but
+ *   this can be done using a temp reg + conditional branching.
+ *
+ *   Subroutines may be uploaded before the main program itself, but the first
+ *   executed instruction is determined by the PROGRAM_START_ID FIFO command.
+ *
+ */
+
+/* DWORD 0 */
+
+#define NV30_VP_INST_ADDR_REG_SELECT_1        (1 << 24)
+#define NV30_VP_INST_SRC2_ABS           (1 << 23) /* guess */
+#define NV30_VP_INST_SRC1_ABS           (1 << 22) /* guess */
+#define NV30_VP_INST_SRC0_ABS           (1 << 21) /* guess */
+#define NV30_VP_INST_VEC_RESULT         (1 << 20)
+#define NV30_VP_INST_DEST_TEMP_ID_SHIFT        16
+#define NV30_VP_INST_DEST_TEMP_ID_MASK        (0x0F << 16)
+#define NV30_VP_INST_COND_UPDATE_ENABLE        (1<<15)
+#define NV30_VP_INST_VEC_DEST_TEMP_MASK      (0xF << 16)
+#define NV30_VP_INST_COND_TEST_ENABLE        (1<<14)
+#define NV30_VP_INST_COND_SHIFT          11
+#define NV30_VP_INST_COND_MASK          (0x07 << 11)
+#define NV30_VP_INST_COND_SWZ_X_SHIFT        9
+#define NV30_VP_INST_COND_SWZ_X_MASK        (0x03 <<  9)
+#define NV30_VP_INST_COND_SWZ_Y_SHIFT        7
+#define NV30_VP_INST_COND_SWZ_Y_MASK        (0x03 <<  7)
+#define NV30_VP_INST_COND_SWZ_Z_SHIFT        5
+#define NV30_VP_INST_COND_SWZ_Z_MASK        (0x03 <<  5)
+#define NV30_VP_INST_COND_SWZ_W_SHIFT        3
+#define NV30_VP_INST_COND_SWZ_W_MASK        (0x03 <<  3)
+#define NV30_VP_INST_COND_SWZ_ALL_SHIFT        3
+#define NV30_VP_INST_COND_SWZ_ALL_MASK        (0xFF <<  3)
+#define NV30_VP_INST_ADDR_SWZ_SHIFT        1
+#define NV30_VP_INST_ADDR_SWZ_MASK        (0x03 <<  1)
+#define NV30_VP_INST_SCA_OPCODEH_SHIFT        0
+#define NV30_VP_INST_SCA_OPCODEH_MASK        (0x01 <<  0)
+
+/* DWORD 1 */
+#define NV30_VP_INST_SCA_OPCODEL_SHIFT        28
+#define NV30_VP_INST_SCA_OPCODEL_MASK        (0x0F << 28)
+#define NV30_VP_INST_VEC_OPCODE_SHIFT        23
+#define NV30_VP_INST_VEC_OPCODE_MASK        (0x1F << 23)
+#define NV30_VP_INST_CONST_SRC_SHIFT        14
+#define NV30_VP_INST_CONST_SRC_MASK        (0xFF << 14)
+#define NV30_VP_INST_INPUT_SRC_SHIFT        9    /*NV20*/
+#define NV30_VP_INST_INPUT_SRC_MASK        (0x0F <<  9)  /*NV20*/
+#define NV30_VP_INST_SRC0H_SHIFT        0    /*NV20*/
+#define NV30_VP_INST_SRC0H_MASK          (0x1FF << 0)  /*NV20*/
+
+/* Please note: the IADDR fields overlap other fields because they are used
+ * only for branch instructions.  See Branching: label above
+ *
+ * DWORD 2
+ */
+#define NV30_VP_INST_SRC0L_SHIFT        26    /*NV20*/
+#define NV30_VP_INST_SRC0L_MASK         (0x3F  <<26)  /* NV30_VP_SRC0_LOW_MASK << 26 */
+#define NV30_VP_INST_SRC1_SHIFT         11    /*NV20*/
+#define NV30_VP_INST_SRC1_MASK          (0x7FFF<<11)  /*NV20*/
+#define NV30_VP_INST_SRC2H_SHIFT        0    /*NV20*/
+#define NV30_VP_INST_SRC2H_MASK          (0x7FF << 0)  /* NV30_VP_SRC2_HIGH_MASK >> 4*/
+#define NV30_VP_INST_IADDR_SHIFT        2
+#define NV30_VP_INST_IADDR_MASK          (0xF <<  28)   /* NV30_VP_SRC2_LOW_MASK << 28 */
+
+/* DWORD 3 */
+#define NV30_VP_INST_SRC2L_SHIFT        28    /*NV20*/
+#define NV30_VP_INST_SRC2L_MASK          (0x0F  <<28)  /*NV20*/
+#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT      24
+#define NV30_VP_INST_STEMP_WRITEMASK_MASK      (0x0F << 24)
+#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT      20
+#define NV30_VP_INST_VTEMP_WRITEMASK_MASK      (0x0F << 20)
+#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT      16
+#define NV30_VP_INST_SDEST_WRITEMASK_MASK      (0x0F << 16)
+#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT      12    /*NV20*/
+#define NV30_VP_INST_VDEST_WRITEMASK_MASK      (0x0F << 12)  /*NV20*/
+#define NV30_VP_INST_DEST_SHIFT        2
+#define NV30_VP_INST_DEST_MASK        (0x0F <<  2)
+#  define NV30_VP_INST_DEST_POS  0
+#  define NV30_VP_INST_DEST_BFC0  1
+#  define NV30_VP_INST_DEST_BFC1  2
+#  define NV30_VP_INST_DEST_COL0  3
+#  define NV30_VP_INST_DEST_COL1  4
+#  define NV30_VP_INST_DEST_FOGC  5
+#  define NV30_VP_INST_DEST_PSZ   6
+#  define NV30_VP_INST_DEST_TC(n)  (8+n)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV30_VP_SRC0_HIGH_SHIFT                                                6
+#define NV30_VP_SRC0_HIGH_MASK                                        0x00007FC0
+#define NV30_VP_SRC0_LOW_MASK                                         0x0000003F
+#define NV30_VP_SRC2_HIGH_SHIFT                                                4
+#define NV30_VP_SRC2_HIGH_MASK                                        0x00007FF0
+#define NV30_VP_SRC2_LOW_MASK                                         0x0000000F
+
+
+/* Source-register definition - matches NV20 exactly */
+#define NV30_VP_SRC_NEGATE          (1<<14)
+#define NV30_VP_SRC_SWZ_X_SHIFT        12
+#define NV30_VP_SRC_REG_SWZ_X_MASK        (0x03  <<12)
+#define NV30_VP_SRC_SWZ_Y_SHIFT        10
+#define NV30_VP_SRC_REG_SWZ_Y_MASK        (0x03  <<10)
+#define NV30_VP_SRC_SWZ_Z_SHIFT        8
+#define NV30_VP_SRC_REG_SWZ_Z_MASK        (0x03  << 8)
+#define NV30_VP_SRC_SWZ_W_SHIFT        6
+#define NV30_VP_SRC_REG_SWZ_W_MASK        (0x03  << 6)
+#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT        6
+#define NV30_VP_SRC_REG_SWZ_ALL_MASK        (0xFF  << 6)
+#define NV30_VP_SRC_TEMP_SRC_SHIFT        2
+#define NV30_VP_SRC_REG_TEMP_ID_MASK        (0x0F  << 0)
+#define NV30_VP_SRC_REG_TYPE_SHIFT        0
+#define NV30_VP_SRC_REG_TYPE_MASK        (0x03  << 0)
+#define NV30_VP_SRC_REG_TYPE_TEMP  1
+#define NV30_VP_SRC_REG_TYPE_INPUT  2
+#define NV30_VP_SRC_REG_TYPE_CONST  3 /* guess */
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
new file mode 100644
index 0000000000..5889b5e40d
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -0,0 +1,174 @@
+#include "util/u_format.h"
+#include "nvfx_context.h"
+#include "nvfx_tex.h"
+
+void
+nv40_sampler_state_init(struct pipe_context *pipe,
+			  struct nvfx_sampler_state *ps,
+			  const struct pipe_sampler_state *cso)
+{
+	if (cso->max_anisotropy >= 2) {
+		/* no idea, binary driver sets it, works without it.. meh.. */
+		ps->wrap |= (1 << 5);
+
+		if (cso->max_anisotropy >= 16) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X;
+		} else
+		if (cso->max_anisotropy >= 12) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X;
+		} else
+		if (cso->max_anisotropy >= 10) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X;
+		} else
+		if (cso->max_anisotropy >= 8) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X;
+		} else
+		if (cso->max_anisotropy >= 6) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X;
+		} else
+		if (cso->max_anisotropy >= 4) {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X;
+		} else {
+			ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X;
+		}
+	}
+
+	{
+		float limit;
+
+		limit = CLAMP(cso->lod_bias, -16.0, 15.0);
+		ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff;
+
+		limit = CLAMP(cso->max_lod, 0.0, 15.0);
+		ps->en |= (int)(limit * 256.0) << 7;
+
+		limit = CLAMP(cso->min_lod, 0.0, 15.0);
+		ps->en |= (int)(limit * 256.0) << 19;
+	}
+}
+
+#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw)            \
+{                                                                              \
+  TRUE,                                                                        \
+  PIPE_FORMAT_##m,                                                             \
+  NV40TCL_TEX_FORMAT_FORMAT_##tf,                                              \
+  (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y |         \
+   NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w |         \
+   NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y |         \
+   NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w),         \
+  ((NV34TCL_TX_FILTER_SIGNED_RED*sx) | (NV34TCL_TX_FILTER_SIGNED_GREEN*sy) |       \
+   (NV34TCL_TX_FILTER_SIGNED_BLUE*sz) | (NV34TCL_TX_FILTER_SIGNED_ALPHA*sw))       \
+}
+
+struct nv40_texture_format {
+	boolean defined;
+	uint	pipe;
+	int     format;
+	int     swizzle;
+	int     sign;
+};
+
+static struct nv40_texture_format
+nv40_texture_formats[] = {
+	_(B8G8R8X8_UNORM, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0),
+	_(B8G8R8A8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	_(B5G5R5A1_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	_(B4G4R4A4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	_(B5G6R5_UNORM  , R5G6B5  ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0),
+	_(L8_UNORM      , L8      ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0),
+	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X, 0, 0, 0, 0),
+	_(R16_SNORM     , A16     , ZERO, ZERO,   S1,  ONE, X, X, X, Y, 1, 1, 1, 1),
+	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X, 0, 0, 0, 0),
+	_(L8A8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y, 0, 0, 0, 0),
+	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0),
+	_(S8Z24_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X, 0, 0, 0, 0),
+	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0),
+	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	_(DXT5_RGBA     , DXT5    ,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
+	{},
+};
+
+static struct nv40_texture_format *
+nv40_fragtex_format(uint pipe_format)
+{
+	struct nv40_texture_format *tf = nv40_texture_formats;
+
+	while (tf->defined) {
+		if (tf->pipe == pipe_format)
+			return tf;
+		tf++;
+	}
+
+	NOUVEAU_ERR("unknown texture format %s\n", util_format_name(pipe_format));
+	return NULL;
+}
+
+
+struct nouveau_stateobj *
+nv40_fragtex_build(struct nvfx_context *nvfx, int unit)
+{
+	struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
+	struct nvfx_miptree *nv40mt = nvfx->tex_miptree[unit];
+	struct nouveau_bo *bo = nouveau_bo(nv40mt->buffer);
+	struct pipe_texture *pt = &nv40mt->base;
+	struct nv40_texture_format *tf;
+	struct nouveau_stateobj *so;
+	uint32_t txf, txs, txp;
+	unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+
+	tf = nv40_fragtex_format(pt->format);
+	if (!tf)
+		assert(0);
+
+	txf  = ps->fmt;
+	txf |= tf->format | 0x8000;
+	txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT);
+
+	if (1) /* XXX */
+		txf |= NV34TCL_TX_FORMAT_NO_BORDER;
+
+	switch (pt->target) {
+	case PIPE_TEXTURE_CUBE:
+		txf |= NV34TCL_TX_FORMAT_CUBIC;
+		/* fall-through */
+	case PIPE_TEXTURE_2D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_2D;
+		break;
+	case PIPE_TEXTURE_3D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_3D;
+		break;
+	case PIPE_TEXTURE_1D:
+		txf |= NV34TCL_TX_FORMAT_DIMS_1D;
+		break;
+	default:
+		NOUVEAU_ERR("Unknown target %d\n", pt->target);
+		return NULL;
+	}
+
+	if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+		txp = 0;
+	} else {
+		txp  = nv40mt->level[0].pitch;
+		txf |= NV40TCL_TEX_FORMAT_LINEAR;
+	}
+
+	txs = tf->swizzle;
+
+	so = so_new(2, 9, 2);
+	so_method(so, nvfx->screen->eng3d, NV34TCL_TX_OFFSET(unit), 8);
+	so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
+	so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
+		      NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
+	so_data  (so, ps->wrap);
+	so_data  (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
+	so_data  (so, txs);
+	so_data  (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
+	so_data  (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+		       pt->height0);
+	so_data  (so, ps->bcol);
+	so_method(so, nvfx->screen->eng3d, NV40TCL_TEX_SIZE1(unit), 1);
+	so_data  (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+
+	return so;
+}
diff --git a/src/gallium/drivers/nvfx/nv40_vertprog.h b/src/gallium/drivers/nvfx/nv40_vertprog.h
new file mode 100644
index 0000000000..7337293bab
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nv40_vertprog.h
@@ -0,0 +1,177 @@
+#ifndef __NV40_SHADER_H__
+#define __NV40_SHADER_H__
+
+/* Vertex programs instruction set
+ *
+ * The NV40 instruction set is very similar to NV30.  Most fields are in
+ * a slightly different position in the instruction however.
+ *
+ * Merged instructions
+ *     In some cases it is possible to put two instructions into one opcode
+ *     slot.  The rules for when this is OK is not entirely clear to me yet.
+ *
+ *     There are separate writemasks and dest temp register fields for each
+ *     grouping of instructions.  There is however only one field with the
+ *     ID of a result register.  Writing to temp/result regs is selected by
+ *     setting VEC_RESULT/SCA_RESULT.
+ *
+ * Temporary registers
+ *     The source/dest temp register fields have been extended by 1 bit, to
+ *     give a total of 32 temporary registers.
+ *
+ * Relative Addressing
+ *     NV40 can use an address register to index into vertex attribute regs.
+ *     This is done by putting the offset value into INPUT_SRC and setting
+ *     the INDEX_INPUT flag.
+ *
+ * Conditional execution (see NV_vertex_program{2,3} for details)
+ *     There is a second condition code register on NV40, it's use is enabled
+ *     by setting the COND_REG_SELECT_1 flag.
+ *
+ * Texture lookup
+ *     TODO
+ */
+
+/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
+#define NV40_VP_INST_VEC_RESULT                                        (1 << 30)
+/* uncertain.. */
+#define NV40_VP_INST_COND_UPDATE_ENABLE                        ((1 << 14)|1<<29)
+/* use address reg as index into attribs */
+#define NV40_VP_INST_INDEX_INPUT                                       (1 << 27)
+#define NV40_VP_INST_COND_REG_SELECT_1                                 (1 << 25)
+#define NV40_VP_INST_ADDR_REG_SELECT_1                                 (1 << 24)
+#define NV40_VP_INST_SRC2_ABS                                          (1 << 23)
+#define NV40_VP_INST_SRC1_ABS                                          (1 << 22)
+#define NV40_VP_INST_SRC0_ABS                                          (1 << 21)
+#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT                                      15
+#define NV40_VP_INST_VEC_DEST_TEMP_MASK                             (0x1F << 15)
+#define NV40_VP_INST_COND_TEST_ENABLE                                  (1 << 13)
+#define NV40_VP_INST_COND_SHIFT                                               10
+#define NV40_VP_INST_COND_MASK                                       (0x7 << 10)
+#define NV40_VP_INST_COND_SWZ_X_SHIFT                                          8
+#define NV40_VP_INST_COND_SWZ_X_MASK                                    (3 << 8)
+#define NV40_VP_INST_COND_SWZ_Y_SHIFT                                          6
+#define NV40_VP_INST_COND_SWZ_Y_MASK                                    (3 << 6)
+#define NV40_VP_INST_COND_SWZ_Z_SHIFT                                          4
+#define NV40_VP_INST_COND_SWZ_Z_MASK                                    (3 << 4)
+#define NV40_VP_INST_COND_SWZ_W_SHIFT                                          2
+#define NV40_VP_INST_COND_SWZ_W_MASK                                    (3 << 2)
+#define NV40_VP_INST_COND_SWZ_ALL_SHIFT                                        2
+#define NV40_VP_INST_COND_SWZ_ALL_MASK                               (0xFF << 2)
+#define NV40_VP_INST_ADDR_SWZ_SHIFT                                            0
+#define NV40_VP_INST_ADDR_SWZ_MASK                                   (0x03 << 0)
+#define NV40_VP_INST0_KNOWN ( \
+                NV40_VP_INST_INDEX_INPUT | \
+                NV40_VP_INST_COND_REG_SELECT_1 | \
+                NV40_VP_INST_ADDR_REG_SELECT_1 | \
+                NV40_VP_INST_SRC2_ABS | \
+                NV40_VP_INST_SRC1_ABS | \
+                NV40_VP_INST_SRC0_ABS | \
+                NV40_VP_INST_VEC_DEST_TEMP_MASK | \
+                NV40_VP_INST_COND_TEST_ENABLE | \
+                NV40_VP_INST_COND_MASK | \
+                NV40_VP_INST_COND_SWZ_ALL_MASK | \
+                NV40_VP_INST_ADDR_SWZ_MASK)
+
+/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
+#define NV40_VP_INST_VEC_OPCODE_SHIFT                                         22
+#define NV40_VP_INST_VEC_OPCODE_MASK                                (0x1F << 22)
+#define NV40_VP_INST_SCA_OPCODE_SHIFT                                         27
+#define NV40_VP_INST_SCA_OPCODE_MASK                                (0x1F << 27)
+#define NV40_VP_INST_CONST_SRC_SHIFT                                          12
+#define NV40_VP_INST_CONST_SRC_MASK                                 (0xFF << 12)
+#define NV40_VP_INST_INPUT_SRC_SHIFT                                           8
+#define NV40_VP_INST_INPUT_SRC_MASK                                  (0x0F << 8)
+#define NV40_VP_INST_SRC0H_SHIFT                                               0
+#define NV40_VP_INST_SRC0H_MASK                                      (0xFF << 0)
+#define NV40_VP_INST1_KNOWN ( \
+                NV40_VP_INST_VEC_OPCODE_MASK | \
+                NV40_VP_INST_SCA_OPCODE_MASK | \
+                NV40_VP_INST_CONST_SRC_MASK  | \
+                NV40_VP_INST_INPUT_SRC_MASK  | \
+                NV40_VP_INST_SRC0H_MASK \
+                )
+
+/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
+#define NV40_VP_INST_SRC0L_SHIFT                                              23
+#define NV40_VP_INST_SRC0L_MASK                                    (0x1FF << 23)
+#define NV40_VP_INST_SRC1_SHIFT                                                6
+#define NV40_VP_INST_SRC1_MASK                                    (0x1FFFF << 6)
+#define NV40_VP_INST_SRC2H_SHIFT                                               0
+#define NV40_VP_INST_SRC2H_MASK                                      (0x3F << 0)
+#define NV40_VP_INST_IADDRH_SHIFT                                              0
+#define NV40_VP_INST_IADDRH_MASK                                     (0x1F << 0)
+
+/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
+#define NV40_VP_INST_IADDRL_SHIFT                                             29
+#define NV40_VP_INST_IADDRL_MASK                                       (7 << 29)
+#define NV40_VP_INST_SRC2L_SHIFT                                              21
+#define NV40_VP_INST_SRC2L_MASK                                    (0x7FF << 21)
+#define NV40_VP_INST_SCA_WRITEMASK_SHIFT                                      17
+#define NV40_VP_INST_SCA_WRITEMASK_MASK                              (0xF << 17)
+#    define NV40_VP_INST_SCA_WRITEMASK_X                               (1 << 20)
+#    define NV40_VP_INST_SCA_WRITEMASK_Y                               (1 << 19)
+#    define NV40_VP_INST_SCA_WRITEMASK_Z                               (1 << 18)
+#    define NV40_VP_INST_SCA_WRITEMASK_W                               (1 << 17)
+#define NV40_VP_INST_VEC_WRITEMASK_SHIFT                                      13
+#define NV40_VP_INST_VEC_WRITEMASK_MASK                              (0xF << 13)
+#    define NV40_VP_INST_VEC_WRITEMASK_X                               (1 << 16)
+#    define NV40_VP_INST_VEC_WRITEMASK_Y                               (1 << 15)
+#    define NV40_VP_INST_VEC_WRITEMASK_Z                               (1 << 14)
+#    define NV40_VP_INST_VEC_WRITEMASK_W                               (1 << 13)
+#define NV40_VP_INST_SCA_RESULT                                        (1 << 12)
+#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT                                       7
+#define NV40_VP_INST_SCA_DEST_TEMP_MASK                              (0x1F << 7)
+#define NV40_VP_INST_DEST_SHIFT                                                2
+#define NV40_VP_INST_DEST_MASK                                         (31 << 2)
+#    define NV40_VP_INST_DEST_POS                                              0
+#    define NV40_VP_INST_DEST_COL0                                             1
+#    define NV40_VP_INST_DEST_COL1                                             2
+#    define NV40_VP_INST_DEST_BFC0                                             3
+#    define NV40_VP_INST_DEST_BFC1                                             4
+#    define NV40_VP_INST_DEST_FOGC                                             5
+#    define NV40_VP_INST_DEST_PSZ                                              6
+#    define NV40_VP_INST_DEST_TC0                                              7
+#    define NV40_VP_INST_DEST_TC(n)                                        (7+n)
+#    define NV40_VP_INST_DEST_TEMP                                          0x1F
+#define NV40_VP_INST_INDEX_CONST                                        (1 << 1)
+#define NV40_VP_INST3_KNOWN ( \
+                NV40_VP_INST_SRC2L_MASK |\
+                NV40_VP_INST_SCA_WRITEMASK_MASK |\
+                NV40_VP_INST_VEC_WRITEMASK_MASK |\
+                NV40_VP_INST_SCA_DEST_TEMP_MASK |\
+                NV40_VP_INST_DEST_MASK |\
+                NV40_VP_INST_INDEX_CONST)
+
+/* Useful to split the source selection regs into their pieces */
+#define NV40_VP_SRC0_HIGH_SHIFT                                                9
+#define NV40_VP_SRC0_HIGH_MASK                                        0x0001FE00
+#define NV40_VP_SRC0_LOW_MASK                                         0x000001FF
+#define NV40_VP_SRC2_HIGH_SHIFT                                               11
+#define NV40_VP_SRC2_HIGH_MASK                                        0x0001F800
+#define NV40_VP_SRC2_LOW_MASK                                         0x000007FF
+
+/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
+#define NV40_VP_SRC_NEGATE                                             (1 << 16)
+#define NV40_VP_SRC_SWZ_X_SHIFT                                               14
+#define NV40_VP_SRC_SWZ_X_MASK                                         (3 << 14)
+#define NV40_VP_SRC_SWZ_Y_SHIFT                                               12
+#define NV40_VP_SRC_SWZ_Y_MASK                                         (3 << 12)
+#define NV40_VP_SRC_SWZ_Z_SHIFT                                               10
+#define NV40_VP_SRC_SWZ_Z_MASK                                         (3 << 10)
+#define NV40_VP_SRC_SWZ_W_SHIFT                                                8
+#define NV40_VP_SRC_SWZ_W_MASK                                          (3 << 8)
+#define NV40_VP_SRC_SWZ_ALL_SHIFT                                              8
+#define NV40_VP_SRC_SWZ_ALL_MASK                                     (0xFF << 8)
+#define NV40_VP_SRC_TEMP_SRC_SHIFT                                             2
+#define NV40_VP_SRC_TEMP_SRC_MASK                                    (0x1F << 2)
+#define NV40_VP_SRC_REG_TYPE_SHIFT                                             0
+#define NV40_VP_SRC_REG_TYPE_MASK                                       (3 << 0)
+#    define NV40_VP_SRC_REG_TYPE_UNK0                                          0
+#    define NV40_VP_SRC_REG_TYPE_TEMP                                          1
+#    define NV40_VP_SRC_REG_TYPE_INPUT                                         2
+#    define NV40_VP_SRC_REG_TYPE_CONST                                         3
+
+#include "nvfx_shader.h"
+
+#endif
diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 8700e14329..7308f0667c 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -9,8 +9,8 @@
 
 #include "nvfx_context.h"
 #define NVFX_SHADER_NO_FUCKEDNESS
-#include "nv30/nv30_shader.h"
-#include "nv40/nv40_shader.h"
+#include "nv30_vertprog.h"
+#include "nv40_vertprog.h"
 
 /* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
  * often at all.  Uses "quadro style" vertex submission + a fixed vertex
diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 730361a982..3d0e8c23a1 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -33,8 +33,8 @@
 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
 #define DEF_SCALE 0
 #define DEF_CTEST 0
-#include "nv30/nv30_shader.h"
-#include "nv40/nv40_shader.h"
+#include "nv30_vertprog.h"
+#include "nv40_vertprog.h"
 
 #define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
 #define neg(s) nvfx_sr_neg((s))
-- 
cgit v1.2.3