diff options
author | Ben Skeggs <skeggsb@gmail.com> | 2007-11-18 17:08:06 +1100 |
---|---|---|
committer | Ben Skeggs <skeggsb@gmail.com> | 2007-11-18 17:34:06 +1100 |
commit | 2f33b5b56e9221f2613b34cd1a1a9d82d5ed4303 (patch) | |
tree | 9bcdd27b60eaf4c3d608b4dd2f582fcee7c39f11 /src/mesa/pipe/nv40 | |
parent | 193c85ec7a1aec44eebc67c6224fb6ecbb4607a5 (diff) |
nouveau: Very rough cut at gallium winsys + nv40 pipe driver.
Diffstat (limited to 'src/mesa/pipe/nv40')
-rw-r--r-- | src/mesa/pipe/nv40/Makefile | 30 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_clear.c | 21 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_context.c | 277 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_context.h | 111 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_dma.h | 62 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_draw.c | 63 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_fragprog.c | 642 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_miptree.c | 60 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_query.c | 98 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_region.c | 85 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_shader.h | 546 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_state.c | 674 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_state.h | 173 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_state_emit.c | 112 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_state_tex.c | 140 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_surface.c | 229 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_vbo.c | 222 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nv40_vertprog.c | 594 | ||||
-rw-r--r-- | src/mesa/pipe/nv40/nvgl_pipe.h | 198 |
19 files changed, 4337 insertions, 0 deletions
diff --git a/src/mesa/pipe/nv40/Makefile b/src/mesa/pipe/nv40/Makefile new file mode 100644 index 0000000000..90c8542da4 --- /dev/null +++ b/src/mesa/pipe/nv40/Makefile @@ -0,0 +1,30 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +DRIVER_SOURCES = \ + nv40_clear.c \ + nv40_context.c \ + nv40_draw.c \ + nv40_fragprog.c \ + nv40_miptree.c \ + nv40_query.c \ + nv40_region.c \ + nv40_state.c \ + nv40_state_emit.c \ + nv40_state_tex.c \ + nv40_surface.c \ + nv40_vbo.c \ + nv40_vertprog.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../Makefile.template + +symlinks: + diff --git a/src/mesa/pipe/nv40/nv40_clear.c b/src/mesa/pipe/nv40/nv40_clear.c new file mode 100644 index 0000000000..f3b7a23689 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_clear.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_dma.h" + + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + /*XXX: We're actually Z24_S8... */ + if (ps->format == PIPE_FORMAT_S8_Z24) { + clearValue = (((clearValue & 0xff000000) >> 24) | + ((clearValue & 0x00ffffff) << 8)); + } + + pipe->region_fill(pipe, ps->region, 0, 0, 0, ps->width, ps->height, + clearValue); +} diff --git a/src/mesa/pipe/nv40/nv40_context.c b/src/mesa/pipe/nv40/nv40_context.c new file mode 100644 index 0000000000..ff66095c5f --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_context.c @@ -0,0 +1,277 @@ +#include "pipe/draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_dma.h" + +static boolean +nv40_is_format_supported(struct pipe_context *pipe, uint format) +{ + switch (format) { + case PIPE_FORMAT_U_A8_R8_G8_B8: + case PIPE_FORMAT_U_R5_G6_B5: + case PIPE_FORMAT_S8_Z24: + return TRUE; + default: + break; + }; + + return FALSE; +} + +static const char * +nv40_get_name(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", nv40->chipset); + return buffer; +} + +static const char * +nv40_get_vendor(struct pipe_context *pipe) +{ + return "nouveau"; +} + +static int +nv40_get_param(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_get_paramf(struct pipe_context *pipe, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nouveau_winsys *nvws = nv40->nvws; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (1); + } + + if (flags & PIPE_FLUSH_WAIT) { + nvws->notifier_reset(nv40->sync, 0); + BEGIN_RING(curie, 0x104, 1); + OUT_RING (0); + BEGIN_RING(curie, 0x100, 1); + OUT_RING (0); + } + + FIRE_RING(); + + if (flags & PIPE_FLUSH_WAIT) + nvws->notifier_wait(nv40->sync, 0, 0, 2000); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + draw_destroy(nv40->draw); + free(nv40); +} + +static boolean +nv40_init_hwctx(struct nv40_context *nv40, int curie_class) +{ + struct nouveau_winsys *nvws = nv40->nvws; + int ret; + + if ((ret = nvws->notifier_alloc(nvws, nv40->num_query_objects, + &nv40->query))) { + NOUVEAU_ERR("Error creating query notifier objects: %d\n", ret); + return FALSE; + } + + if ((ret = nvws->grobj_alloc(nvws, curie_class, + &nv40->curie))) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + BEGIN_RING(curie, NV40TCL_DMA_NOTIFY, 1); + OUT_RING (nv40->sync->handle); + BEGIN_RING(curie, NV40TCL_DMA_TEXTURE0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(curie, NV40TCL_DMA_VTXBUF0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(curie, NV40TCL_DMA_FENCE, 2); + OUT_RING (0); + OUT_RING (nv40->query->handle); + BEGIN_RING(curie, NV40TCL_DMA_UNK01AC, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + + BEGIN_RING(curie, 0x1ea4, 3); + OUT_RING (0x00000010); + OUT_RING (0x01000100); + OUT_RING (0xff800006); + + /* vtxprog output routing */ + BEGIN_RING(curie, 0x1fc4, 1); + OUT_RING (0x06144321); + BEGIN_RING(curie, 0x1fc8, 2); + OUT_RING (0xedcba987); + OUT_RING (0x00000021); + BEGIN_RING(curie, 0x1fd0, 1); + OUT_RING (0x00171615); + BEGIN_RING(curie, 0x1fd4, 1); + OUT_RING (0x001b1a19); + + BEGIN_RING(curie, 0x1ef8, 1); + OUT_RING (0x0020ffff); + BEGIN_RING(curie, 0x1d64, 1); + OUT_RING (0x00d30000); + BEGIN_RING(curie, 0x1e94, 1); + OUT_RING (0x00000001); + + FIRE_RING (); + return TRUE; +} + +#define GRCLASS4097_CHIPSETS 0x00000baf +#define GRCLASS4497_CHIPSETS 0x00005450 +struct pipe_context * +nv40_create(struct pipe_winsys *pipe_winsys, struct nouveau_winsys *nvws, + unsigned chipset) +{ + struct nv40_context *nv40; + int curie_class, ret; + + if ((chipset & 0xf0) != 0x40) { + NOUVEAU_ERR("Not a NV4X chipset\n"); + return NULL; + } + + if (GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) { + curie_class = 0x4097; + } else + if (GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) { + curie_class = 0x4497; + } else { + NOUVEAU_ERR("Unknown NV4X chipset: NV%02x\n", chipset); + return NULL; + } + + nv40 = CALLOC_STRUCT(nv40_context); + if (!nv40) + return NULL; + nv40->chipset = chipset; + nv40->nvws = nvws; + + if ((ret = nvws->notifier_alloc(nvws, 1, &nv40->sync))) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + free(nv40); + return NULL; + } + + nv40->num_query_objects = 32; + nv40->query_objects = calloc(nv40->num_query_objects, + sizeof(struct pipe_query_object *)); + if (!nv40->query_objects) { + free(nv40); + return NULL; + } + + if (!nv40_init_hwctx(nv40, curie_class)) { + free(nv40); + return NULL; + } + + nv40->pipe.winsys = pipe_winsys; + + nv40->pipe.destroy = nv40_destroy; + nv40->pipe.is_format_supported = nv40_is_format_supported; + nv40->pipe.get_name = nv40_get_name; + nv40->pipe.get_vendor = nv40_get_vendor; + nv40->pipe.get_param = nv40_get_param; + nv40->pipe.get_paramf = nv40_get_paramf; + + nv40->pipe.draw_arrays = nv40_draw_arrays; + nv40->pipe.draw_elements = nv40_draw_elements; + nv40->pipe.clear = nv40_clear; + + nv40->pipe.begin_query = nv40_query_begin; + nv40->pipe.end_query = nv40_query_end; + nv40->pipe.wait_query = nv40_query_wait; + + nv40->pipe.mipmap_tree_layout = nv40_miptree_layout; + + nv40->pipe.flush = nv40_flush; + + nv40_init_region_functions(nv40); + nv40_init_surface_functions(nv40); + nv40_init_state_functions(nv40); + + nv40->draw = draw_create(); + assert(nv40->draw); + draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + + return &nv40->pipe; +} + + diff --git a/src/mesa/pipe/nv40/nv40_context.h b/src/mesa/pipe/nv40/nv40_context.h new file mode 100644 index 0000000000..63be38299f --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_context.h @@ -0,0 +1,111 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/draw/draw_vertex.h" + +#include "pipe/nouveau/nouveau_winsys.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV40_NEW_TEXTURE (1 << 0) +#define NV40_NEW_VERTPROG (1 << 1) +#define NV40_NEW_FRAGPROG (1 << 2) +#define NV40_NEW_ARRAYS (1 << 3) + +struct nv40_context { + struct pipe_context pipe; + struct nouveau_winsys *nvws; + + struct draw_context *draw; + + int chipset; + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + uint32_t *pushbuf; + + /* query objects */ + struct nouveau_notifier *query; + struct pipe_query_object **query_objects; + uint num_query_objects; + + uint32_t dirty; + + struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct pipe_mipmap_tree *tex_miptree[PIPE_MAX_SAMPLERS]; + uint32_t tex_dirty; + + struct { + struct nv40_vertex_program *vp; + struct nv40_vertex_program *active_vp; + + struct pipe_buffer_handle *constant_buf; + } vertprog; + + struct { + struct nv40_fragment_program *fp; + struct nv40_fragment_program *active_fp; + + struct pipe_buffer_handle *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vtxelt[PIPE_ATTRIB_MAX]; +}; + + +extern void nv40_init_region_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_state_functions(struct nv40_context *nv40); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); + +/* nv40_miptree.c */ +extern boolean nv40_miptree_layout(struct pipe_context *, + struct pipe_mipmap_tree *); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_translate(struct nv40_context *, + struct nv40_vertex_program *); +extern void nv40_vertprog_bind(struct nv40_context *, + struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_translate(struct nv40_context *, + struct nv40_fragment_program *); +extern void nv40_fragprog_bind(struct nv40_context *, + struct nv40_fragment_program *); + +/* nv40_state.c and friends */ +extern void nv40_emit_hw_state(struct nv40_context *nv40); +extern void nv40_state_tex_update(struct nv40_context *nv40); + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer_handle *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); +extern void nv40_vbo_arrays_update(struct nv40_context *nv40); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv40_query.c */ +extern void nv40_query_begin(struct pipe_context *, struct pipe_query_object *); +extern void nv40_query_end(struct pipe_context *, struct pipe_query_object *); +extern void nv40_query_wait(struct pipe_context *, struct pipe_query_object *); + +#endif diff --git a/src/mesa/pipe/nv40/nv40_dma.h b/src/mesa/pipe/nv40/nv40_dma.h new file mode 100644 index 0000000000..3775ce6e72 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_dma.h @@ -0,0 +1,62 @@ +#ifndef __NV40_DMA_H__ +#define __NV40_DMA_H__ + +#include "pipe/nouveau/nouveau_winsys.h" + +#define BEGIN_RING(obj,mthd,size) do { \ + nv40->pushbuf = nv40->nvws->begin_ring(nv40->obj, (mthd), (size)); \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do { \ + BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ +} while(0) + +#define OUT_RING(data) do { \ + (*nv40->pushbuf++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + memcpy(nv40->pushbuf, (src), (size) * 4); \ + nv40->pushbuf += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define FIRE_RING() do { \ + nv40->nvws->fire_ring(nv40->nvws->channel); \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do { \ + nv40->nvws->out_reloc(nv40->nvws->channel, nv40->pushbuf, \ + (struct nouveau_bo *)(bo), \ + (data), (flags), (vor), (tor)); \ + OUT_RING(0); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + nv40->nvws->channel->vram->handle, \ + nv40->nvws->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +#endif diff --git a/src/mesa/pipe/nv40/nv40_draw.c b/src/mesa/pipe/nv40/nv40_draw.c new file mode 100644 index 0000000000..52ce493ea2 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_draw.c @@ -0,0 +1,63 @@ +#include "pipe/draw/draw_private.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" + +struct nv40_draw_stage { + struct draw_stage draw; + struct nv40_context *nv40; +}; + +static void +nv40_draw_begin(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_end(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv40_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ + struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage); + + nv40draw->nv40 = nv40; + nv40draw->draw.draw = nv40->draw; + nv40draw->draw.begin = nv40_draw_begin; + nv40draw->draw.point = nv40_draw_point; + nv40draw->draw.line = nv40_draw_line; + nv40draw->draw.tri = nv40_draw_tri; + nv40draw->draw.end = nv40_draw_end; + nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter; + + return &nv40draw->draw; +} + diff --git a/src/mesa/pipe/nv40/nv40_fragprog.c b/src/mesa/pipe/nv40/nv40_fragprog.c new file mode 100644 index 0000000000..48b783eebe --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_fragprog.c @@ -0,0 +1,642 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/tgsi/exec/tgsi_token.h" +#include "pipe/tgsi/exec/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_dma.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +static uint32_t +passthrough_fp_data[] = { + 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800 +}; + +static struct nv40_fragment_program +passthrough_fp = { + .pipe = NULL, + .translated = TRUE, + .insn = passthrough_fp_data, + .insn_len = sizeof(passthrough_fp_data) / sizeof(uint32_t), + .buffer = NULL, + .uses_kil = 0, + .num_regs = 2, +}; + +struct nv40_fpc { + struct nv40_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + int high_temp; + int temp_temp_count; + + uint depth_id; + uint colour_id; + + boolean inst_has_const; + int inst_const_id; +}; + +static INLINE struct nv40_sreg +nv40_sr_temp(struct nv40_fpc *fpc) +{ + int idx; + + idx = fpc->temp_temp_count++; + idx += fpc->high_temp + 1; + return nv40_sr(0, NV40_FP_REG_TYPE_TEMP, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) +#define temp(fpc) nv40_sr_temp((fpc)) + +static void +emit_src(struct nv40_fpc *fpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + uint32_t sr = 0; + + sr |= (src.type << NV40_FP_REG_TYPE_SHIFT); + if (src.type == NV40_FP_REG_TYPE_INPUT) { + hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + } else + if (src.type == NV40_FP_REG_TYPE_CONST) { + fpc->inst_has_const = TRUE; + } else + if (src.type == NV40_FP_REG_TYPE_TEMP) { + sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + } + + if (src.negate) + sr |= NV40_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, uint32_t *hw, struct nv40_sreg dst) +{ + struct nv40_fragment_program *fp = fpc->fp; + + if (dst.output) { + if (dst.index == 1) { + fp->writes_depth = 1; + } else { + hw[0] |= NV40_FP_OP_UNK0_7; + } + } else { + if (fp->num_regs < (dst.index + 1)) + fp->num_regs = dst.index + 1; + } + hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fp->insn_len]; + + fpc->inst_has_const = FALSE; + + if (op == NV40_FP_OP_OPCODE_KIL) + fp->uses_kil = TRUE; + hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV40_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, hw, dst); + emit_src(fpc, hw, 0, s0); + emit_src(fpc, hw, 1, s1); + emit_src(fpc, hw, 2, s2); + + fp->insn_len += 4; + if (fpc->inst_has_const) { + fp->consts[fp->num_consts].pipe_id = fpc->inst_const_id; + fp->consts[fp->num_consts].hw_id = fp->insn_len; + fp->num_consts++; + fp->insn_len += 4; + } +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fp->insn_len]; + + nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + hw[0] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv40_sreg src; + uint type, index; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + type = NV40_FP_REG_TYPE_INPUT; + index = fpc->attrib_map[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_CONSTANT: + type = NV40_FP_REG_TYPE_CONST; + index = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_TEMPORARY: + type = NV40_FP_REG_TYPE_TEMP; + index = fsrc->SrcRegister.Index + 1; + if (fpc->high_temp < index) + fpc->high_temp = index; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src = nv40_sr(0, type, index); + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + int out, idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + out = 1; + if (fdst->DstRegister.Index == fpc->colour_id) + idx = 0; + else + idx = 1; + break; + case TGSI_FILE_TEMPORARY: + out = 0; + idx = fdst->DstRegister.Index + 1; + if (fpc->high_temp < idx) + fpc->high_temp = idx; + break; + case TGSI_FILE_NULL: + break; + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + break; + } + + return nv40_sr(out, NV40_FP_REG_TYPE_TEMP, idx); +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(0, NV40_FP_REG_TYPE_INPUT, 0); + int mask, sat, unit; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_RET) + return TRUE; + + fpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KILP: + /*XXX: Which is NV, which is ARB kil? ARB implemented here. + *XXX: Don't need temp, can update CC0 without writing dst + */ + tmp = temp(fpc); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, MASK_ALL, src[0], none, none); + dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; +#if 0 /* XXX: reimplement on top of TEX */ + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; +#endif + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fpc->depth_id = fdec->u.DeclarationRange.First; + break; + case TGSI_SEMANTIC_COLOR: + fpc->colour_id = fdec->u.DeclarationRange.First; + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + return TRUE; +} + +void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + int i; + + fpc = calloc(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fp->insn = calloc(1, 128*4*sizeof(uint32_t)); + fpc->fp = fp; + fpc->high_temp = -1; + fp->num_regs = 2; + + tgsi_parse_init(&parse, fp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + if (fpc->inst_has_const == FALSE) + fp->insn[fp->insn_len - 4] |= 0x00000001; + else + fp->insn[fp->insn_len - 8] |= 0x00000001; + fp->insn[fp->insn_len++] = 0x00000001; + + fp->translated = TRUE; + fp->on_hw = FALSE; +out_err: + tgsi_parse_free(&parse); + free(fpc); +} + +void +nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + uint32_t fp_control; + + if (!fp->translated) { + NOUVEAU_ERR("fragprog invalid, using passthrough shader\n"); + fp = &passthrough_fp; + } + + if (!fp->on_hw) { + if (!fp->buffer) + fp->buffer = ws->buffer_create(ws, 0x100); + + nv40->pipe.winsys->buffer_data(nv40->pipe.winsys, fp->buffer, + fp->insn_len * sizeof(uint32_t), + fp->insn, + PIPE_BUFFER_USAGE_PIXEL); + fp->on_hw = TRUE; + } + + fp_control = fp->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + if (fp->uses_kil) + fp_control |= NV40TCL_FP_CONTROL_KIL; + if (fp->writes_depth) + fp_control |= 0xe; + + BEGIN_RING(curie, NV40TCL_FP_ADDRESS, 1); + OUT_RELOC (fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + BEGIN_RING(curie, NV40TCL_FP_CONTROL, 1); + OUT_RING (fp_control); + + nv40->fragprog.active_fp = fp; +} + diff --git a/src/mesa/pipe/nv40/nv40_miptree.c b/src/mesa/pipe/nv40/nv40_miptree.c new file mode 100644 index 0000000000..6b85823d8c --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_miptree.c @@ -0,0 +1,60 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" + +boolean +nv40_miptree_layout(struct pipe_context *pipe, struct pipe_mipmap_tree *mt) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + uint width, height, depth, offset; + boolean swizzled = FALSE; + int l; + + mt->pitch = mt->width0; + mt->total_height = 0; + + width = mt->width0; + height = mt->height0; + depth = mt->depth0; + offset = 0; + for (l = mt->first_level; l <= mt->last_level; l++) { + uint pitch, f; + + mt->level[l].width = width; + mt->level[l].height = height; + mt->level[l].depth = depth; + mt->level[l].level_offset = offset; + + if (!swizzled) + pitch = mt->width0; + else + pitch = width; + + if (mt->target == PIPE_TEXTURE_CUBE) + mt->level[l].nr_images = 6; + else + if (mt->target == PIPE_TEXTURE_3D) + mt->level[l].nr_images = 3; + else + mt->level[l].nr_images = 1; + mt->level[l].image_offset = + malloc(mt->level[l].nr_images * sizeof(unsigned)); + + for (f = 0; f < mt->level[l].nr_images; f++) { + mt->level[l].image_offset[f] = + (offset - mt->level[l].level_offset) / mt->cpp; + mt->total_height += height; + + offset += (pitch * mt->cpp * height); + } + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + return TRUE; +} + diff --git a/src/mesa/pipe/nv40/nv40_query.c b/src/mesa/pipe/nv40/nv40_query.c new file mode 100644 index 0000000000..efd81e6640 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_query.c @@ -0,0 +1,98 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" +#include "nv40_dma.h" + +static uint +nv40_query_object_find(struct nv40_context *nv40, struct pipe_query_object *q) +{ + int id; + + for (id = 0; id < nv40->num_query_objects; id++) { + if (nv40->query_objects[id] == q) + return id; + } + + return -1; +} + +void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query_object *q) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + int id; + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + id = nv40_query_object_find(nv40, NULL); + assert(id >= 0); + nv40->query_objects[id] = q; + + nv40->nvws->notifier_reset(nv40->query, id); + q->ready = 0; + + BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (1); +} + +static void +nv40_query_update(struct pipe_context *pipe, struct pipe_query_object *q) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + int id; + + id = nv40_query_object_find(nv40, q); + assert(id >= 0); + + if (nv40->nvws->notifier_status(nv40->query, id) == 0) { + q->ready = 1; + q->count = nv40->nvws->notifier_retval(nv40->query, id); + nv40->query_objects[id] = NULL; + } +} + +void +nv40_query_end(struct pipe_context *pipe, struct pipe_query_object *q) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + int id; + + id = nv40_query_object_find(nv40, q); + assert(id >= 0); + + BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + ((id * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING (); + + /*XXX: Some apps spin waiting for GL_QUERY_RESULT_AVAILABLE_ARB. + * Core mesa won't ask the driver to update the query object's + * status in this case, so the app waits forever.. fix this some + * day. + */ +#if 0 + nv40_query_update(pipe, q); +#else + nv40_query_wait(pipe, q); +#endif +} + +void +nv40_query_wait(struct pipe_context *pipe, struct pipe_query_object *q) +{ + nv40_query_update(pipe, q); + if (!q->ready) { + struct nv40_context *nv40 = (struct nv40_context *)pipe; + int id; + + id = nv40_query_object_find(nv40, q); + assert(id >= 0); + + nv40->nvws->notifier_wait(nv40->query, id, 0, 0); + nv40_query_update(pipe, q); + assert(q->ready); + } +} + diff --git a/src/mesa/pipe/nv40/nv40_region.c b/src/mesa/pipe/nv40/nv40_region.c new file mode 100644 index 0000000000..f62bf89d18 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_region.c @@ -0,0 +1,85 @@ +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + +#include "nv40_context.h" +#include "nv40_dma.h" + +static ubyte * +nv40_region_map(struct pipe_context *pipe, struct pipe_region *region) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct pipe_winsys *ws = nv40->pipe.winsys; + + if (!region->map_refcount++) { + region->map = ws->buffer_map(ws, region->buffer, + PIPE_BUFFER_FLAG_WRITE | + PIPE_BUFFER_FLAG_READ); + } + + return region->map; +} + +static void +nv40_region_unmap(struct pipe_context *pipe, struct pipe_region *region) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct pipe_winsys *ws = nv40->pipe.winsys; + + if (!--region->map_refcount) { + ws->buffer_unmap(ws, region->buffer); + region->map = NULL; + } +} + +static void +nv40_region_data(struct pipe_context *pipe, + struct pipe_region *dst, + unsigned dst_offset, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_pitch, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->region_data(nvws->nv, dst, dst_offset, dstx, dsty, + src, src_pitch, srcx, srcy, width, height); +} + + +static void +nv40_region_copy(struct pipe_context *pipe, struct pipe_region *dst, + unsigned dst_offset, unsigned dstx, unsigned dsty, + struct pipe_region *src, unsigned src_offset, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->region_copy(nvws->nv, dst, dst_offset, dstx, dsty, + src, src_offset, srcx, srcy, width, height); +} + +static void +nv40_region_fill(struct pipe_context *pipe, + struct pipe_region *dst, unsigned dst_offset, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nouveau_winsys *nvws = nv40->nvws; + + nvws->region_fill(nvws->nv, dst, dst_offset, dstx, dsty, + width, height, value); +} + +void +nv40_init_region_functions(struct nv40_context *nv40) +{ + nv40->pipe.region_map = nv40_region_map; + nv40->pipe.region_unmap = nv40_region_unmap; + nv40->pipe.region_data = nv40_region_data; + nv40->pipe.region_copy = nv40_region_copy; + nv40->pipe.region_fill = nv40_region_fill; +} + diff --git a/src/mesa/pipe/nv40/nv40_shader.h b/src/mesa/pipe/nv40/nv40_shader.h new file mode 100644 index 0000000000..5b2cf3e293 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_shader.h @@ -0,0 +1,546 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXWHAT 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (31 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_UNK0_7 (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X 0 +#define NV40_FP_OP_DST_SCALE_2X 1 +#define NV40_FP_OP_DST_SCALE_4X 2 +#define NV40_FP_OP_DST_SCALE_8X 3 +#define NV40_FP_OP_DST_SCALE_INV_2X 5 +#define NV40_FP_OP_DST_SCALE_INV_4X 6 +#define NV40_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_UNK_0 (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +struct nv40_sreg { + int output; + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int out, int type, int index) +{ + struct nv40_sreg temp = { + .output = out, + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ + struct nv40_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/mesa/pipe/nv40/nv40_state.c b/src/mesa/pipe/nv40/nv40_state.c new file mode 100644 index 0000000000..e38a5ea534 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_state.c @@ -0,0 +1,674 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_dma.h" +#include "nv40_state.h" + +#include "nvgl_pipe.h" + +static void * +nv40_alpha_test_state_create(struct pipe_context *pipe, + const struct pipe_alpha_test_state *cso) +{ + struct nv40_alpha_test_state *at; + + at = malloc(sizeof(struct nv40_alpha_test_state)); + + at->enabled = cso->enabled ? 1 : 0; + if (at->enabled) { + at->func = nvgl_comparison_op(cso->func); + at->ref = float_to_ubyte(cso->ref); + } + + return (void *)at; +} + +static void +nv40_alpha_test_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_alpha_test_state *at = hwcso; + + if (at->enabled) { + BEGIN_RING(curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + OUT_RING (at->enabled); + OUT_RING (at->func); + OUT_RING (at->ref); + } else { + BEGIN_RING(curie, NV40TCL_ALPHA_TEST_ENABLE, 1); + OUT_RING (0); + } +} + +static void +nv40_alpha_test_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv40_blend_state *cb; + + cb = malloc(sizeof(struct nv40_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + if (cb->b_enable) { + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + cb->b_eqn = ((nvgl_blend_eqn(cso->alpha_func) << 16) | + (nvgl_blend_eqn(cso->rgb_func))); + } + + cb->l_enable = cso->logicop_enable ? 1 : 0; + if (cb->l_enable) { + cb->l_op = nvgl_logicop_func(cso->logicop_func); + } + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_blend_state *cb = hwcso; + + BEGIN_RING(curie, NV40TCL_DITHER_ENABLE, 1); + OUT_RING (cb->d_enable); + + if (cb->b_enable) { + BEGIN_RING(curie, NV40TCL_BLEND_ENABLE, 3); + OUT_RING (cb->b_enable); + OUT_RING (cb->b_srcfunc); + OUT_RING (cb->b_dstfunc); + BEGIN_RING(curie, NV40TCL_BLEND_EQUATION, 2); + OUT_RING (cb->b_eqn); + OUT_RING (cb->c_mask); + } else { + BEGIN_RING(curie, NV40TCL_BLEND_ENABLE, 1); + OUT_RING (0); + } + + if (cb->l_enable) { + BEGIN_RING(curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (cb->l_enable); + OUT_RING (cb->l_op); + } else { + BEGIN_RING(curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); + OUT_RING (0); + } +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv40_sampler_state *ps; + + ps = malloc(sizeof(struct nv40_sampler_state)); + + ps->wrap = ((nv40_tex_wrap_mode(cso->wrap_r) << 16) | + (nv40_tex_wrap_mode(cso->wrap_t) << 8) | + (nv40_tex_wrap_mode(cso->wrap_s) << 0)); + ps->filt = ((nv40_tex_filter(cso->min_img_filter, + cso->min_mip_filter) << 16) | + (nv40_tex_filter(cso->mag_img_filter, + PIPE_TEX_MIPFILTER_NONE) << 24)); + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned unit, + void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_sampler_state *ps = hwcso; + + nv40->tex_sampler[unit] = ps; + nv40->tex_dirty |= (1 << unit); + + nv40->dirty |= NV40_NEW_TEXTURE; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv40_rasterizer_state *rs; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = malloc(sizeof(struct nv40_rasterizer_state)); + + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + rs->line_stipple_en = cso->line_stipple_enable ? 1 : 0; + rs->line_stipple = (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor; + + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + rs->poly_stipple_en = cso->poly_stipple_enable ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = 0x0901; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = 0x0900; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + rs->cull_face_en = 0; + rs->cull_face = 0x0900; + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face = 0x0901; + /* fall-through */ + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + break; + case PIPE_WINDING_NONE: + default: + break; + } + + return (void *)rs; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_rasterizer_state *rs = hwcso; + + BEGIN_RING(curie, NV40TCL_SHADE_MODEL, 1); + OUT_RING (rs->shade_model); + + BEGIN_RING(curie, NV40TCL_LINE_WIDTH, 2); + OUT_RING (rs->line_width); + OUT_RING (rs->line_smooth_en); + BEGIN_RING(curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + OUT_RING (rs->line_stipple_en); + OUT_RING (rs->line_stipple); + + BEGIN_RING(curie, NV40TCL_POINT_SIZE, 1); + OUT_RING (rs->point_size); + + BEGIN_RING(curie, NV40TCL_POLYGON_MODE_FRONT, 6); + OUT_RING (rs->poly_mode_front); + OUT_RING (rs->poly_mode_back); + OUT_RING (rs->cull_face); + OUT_RING (rs->front_face); + OUT_RING (rs->poly_smooth_en); + OUT_RING (rs->cull_face_en); + + BEGIN_RING(curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (rs->poly_stipple_en); +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_depth_stencil_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_state *cso) +{ + struct nv40_depth_stencil_state *zs; + + /*XXX: ignored: + * depth.occlusion_count + * depth.clear + * stencil.clear_value + */ + zs = malloc(sizeof(struct nv40_depth_stencil_state)); + + zs->depth.func = nvgl_comparison_op(cso->depth.func); + zs->depth.write_enable = cso->depth.writemask ? 1 : 0; + zs->depth.test_enable = cso->depth.enabled ? 1 : 0; + + zs->stencil.back.enable = cso->stencil.back_enabled ? 1 : 0; + zs->stencil.back.wmask = cso->stencil.write_mask[1]; + zs->stencil.back.func = + nvgl_comparison_op(cso->stencil.back_func); + zs->stencil.back.ref = cso->stencil.ref_value[1]; + zs->stencil.back.vmask = cso->stencil.value_mask[1]; + zs->stencil.back.fail = nvgl_stencil_op(cso->stencil.back_fail_op); + zs->stencil.back.zfail = nvgl_stencil_op(cso->stencil.back_zfail_op); + zs->stencil.back.zpass = nvgl_stencil_op(cso->stencil.back_zpass_op); + + zs->stencil.front.enable= cso->stencil.front_enabled ? 1 : 0; + zs->stencil.front.wmask = cso->stencil.write_mask[0]; + zs->stencil.front.func = + nvgl_comparison_op(cso->stencil.front_func); + zs->stencil.front.ref = cso->stencil.ref_value[0]; + zs->stencil.front.vmask = cso->stencil.value_mask[0]; + zs->stencil.front.fail = nvgl_stencil_op(cso->stencil.front_fail_op); + zs->stencil.front.zfail = nvgl_stencil_op(cso->stencil.front_zfail_op); + zs->stencil.front.zpass = nvgl_stencil_op(cso->stencil.front_zpass_op); + + return (void *)zs; +} + +static void +nv40_depth_stencil_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_depth_stencil_state *zs = hwcso; + + BEGIN_RING(curie, NV40TCL_DEPTH_FUNC, 3); + OUT_RINGp ((uint32_t *)&zs->depth, 3); + BEGIN_RING(curie, NV40TCL_STENCIL_BACK_ENABLE, 16); + OUT_RINGp ((uint32_t *)&zs->stencil.back, 8); + OUT_RINGp ((uint32_t *)&zs->stencil.front, 8); +} + +static void +nv40_depth_stencil_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_vertex_program *vp; + + vp = calloc(1, sizeof(struct nv40_vertex_program)); + vp->pipe = cso; + + return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_vertex_program *vp = hwcso; + + nv40->vertprog.vp = vp; + nv40->dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_fragment_program *fp; + + fp = calloc(1, sizeof(struct nv40_fragment_program)); + fp->pipe = cso; + + return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nv40_fragment_program *fp = hwcso; + + nv40->fragprog.fp = fp; + nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + BEGIN_RING(curie, NV40TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0)); +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + nv40->dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_set_clear_color_state(struct pipe_context *pipe, + const struct pipe_clear_color_state *ccol) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + BEGIN_RING(curie, NV40TCL_CLEAR_VALUE_COLOR, 1); + OUT_RING ((float_to_ubyte(ccol->color[3]) << 24) | + (float_to_ubyte(ccol->color[0]) << 16) | + (float_to_ubyte(ccol->color[1]) << 8) | + (float_to_ubyte(ccol->color[2]) << 0)); +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + if (shader == PIPE_SHADER_VERTEX) { + nv40->vertprog.constant_buf = buf->buffer; + nv40->dirty |= NV40_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv40->fragprog.constant_buf = buf->buffer; + nv40->dirty |= NV40_NEW_FRAGPROG; + } +} + +static void +nv40_set_feedback_state(struct pipe_context *pipe, + const struct pipe_feedback_state *feedback) +{ + NOUVEAU_ERR("\n"); +} + +#define get_region(surf) ((surf) ? surf->region : NULL) +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + struct nouveau_winsys *nvws = nv40->nvws; + struct pipe_region *region; + uint32_t rt_enable = 0, rt_format = 0; + + if ((region = get_region(fb->cbufs[0]))) { + rt_enable |= NV40TCL_RT_ENABLE_COLOR0; + + BEGIN_RING(curie, NV40TCL_DMA_COLOR0, 1); + OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR0_PITCH, 2); + OUT_RING (region->pitch * region->cpp); + OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + if ((region = get_region(fb->cbufs[1]))) { + rt_enable |= NV40TCL_RT_ENABLE_COLOR1; + + BEGIN_RING(curie, NV40TCL_DMA_COLOR1, 1); + OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR1_OFFSET, 2); + OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (region->pitch * region->cpp); + } + + if ((region = get_region(fb->cbufs[2]))) { + rt_enable |= NV40TCL_RT_ENABLE_COLOR2; + + BEGIN_RING(curie, NV40TCL_DMA_COLOR2, 1); + OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR2_OFFSET, 1); + OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR2_PITCH, 1); + OUT_RING (region->pitch * region->cpp); + } + + if ((region = get_region(fb->cbufs[3]))) { + rt_enable |= NV40TCL_RT_ENABLE_COLOR3; + + BEGIN_RING(curie, NV40TCL_DMA_COLOR3, 1); + OUT_RELOCo(region->buffer, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR3_OFFSET, 1); + OUT_RELOCl(region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(curie, NV40TCL_COLOR3_PITCH, 1); + OUT_RING (region->pitch * region->cpp); + } + + if ((region = get_region(fb->zbuf))) { + BEGIN_RING(curie, NV40TCL_DMA_ZETA, 1); + OUT_RELOCo(region->buffer, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD); + BEGIN_RING(curie, NV40TCL_ZETA_OFFSET, 1); + OUT_RELOCl(region->buffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR | NOUVEAU_BO_RD); + BEGIN_RING(curie, NV40TCL_ZETA_PITCH, 1); + OUT_RING (region->pitch * region->cpp); + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + BEGIN_RING(curie, NV40TCL_RT_ENABLE, 1); + OUT_RING (rt_enable); + + if (0) { +#if 0 + rt_format |= (log2width << + NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT); + rt_format |= (log2height << + NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT); +#endif + rt_format |= (NV40TCL_RT_FORMAT_TYPE_SWIZZLED << + NV40TCL_RT_FORMAT_TYPE_SHIFT); + } else { + rt_format |= (NV40TCL_RT_FORMAT_TYPE_LINEAR << + NV40TCL_RT_FORMAT_TYPE_SHIFT); + } + + if (fb->cbufs[0]->format == PIPE_FORMAT_U_R5_G6_B5) { + rt_format |= (NV40TCL_RT_FORMAT_COLOR_R5G6B5 << + NV40TCL_RT_FORMAT_COLOR_SHIFT); + } else { + rt_format |= (NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 << + NV40TCL_RT_FORMAT_COLOR_SHIFT); + } + + if (fb->zbuf && fb->zbuf->format == PIPE_FORMAT_U_Z16) { + rt_format |= (NV40TCL_RT_FORMAT_DEPTH_Z16 << + NV40TCL_RT_FORMAT_DEPTH_SHIFT); + } else { + rt_format |= (NV40TCL_RT_FORMAT_DEPTH_Z24S8 << + NV40TCL_RT_FORMAT_DEPTH_SHIFT); + } + + BEGIN_RING(curie, NV40TCL_RT_HORIZ, 3); + OUT_RING ((fb->cbufs[0]->width << 16) | 0); + OUT_RING ((fb->cbufs[0]->height << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(curie, NV40TCL_VIEWPORT_HORIZ, 2); + OUT_RING ((fb->cbufs[0]->width << 16) | 0); + OUT_RING ((fb->cbufs[0]->height << 16) | 0); + BEGIN_RING(curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((fb->cbufs[0]->width - 1) << 16) | 0); + OUT_RING (((fb->cbufs[0]->height - 1) << 16) | 0); +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + BEGIN_RING(curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + OUT_RINGp ((uint32_t *)stipple->stipple, 32); +} + +static void +nv40_set_sampler_units(struct pipe_context *pipe, + uint num_samplers, const uint *units) +{ +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + BEGIN_RING(curie, NV40TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny); +} + +static void +nv40_set_texture_state(struct pipe_context *pipe, unsigned unit, + struct pipe_mipmap_tree *miptree) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + nv40->tex_miptree[unit] = miptree; + nv40->tex_dirty |= unit; + + nv40->dirty |= NV40_NEW_TEXTURE; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + BEGIN_RING(curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8); + OUT_RINGf (vpt->translate[0]); + OUT_RINGf (vpt->translate[1]); + OUT_RINGf (vpt->translate[2]); + OUT_RINGf (vpt->translate[3]); + OUT_RINGf (vpt->scale[0]); + OUT_RINGf (vpt->scale[1]); + OUT_RINGf (vpt->scale[2]); + OUT_RINGf (vpt->scale[3]); +} + +static void +nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_buffer *vb) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + nv40->vtxbuf[index] = *vb; + + nv40->dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_element(struct pipe_context *pipe, unsigned index, + const struct pipe_vertex_element *ve) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + + nv40->vtxelt[index] = *ve; + + nv40->dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_feedback_buffer(struct pipe_context *pipe, unsigned index, + const struct pipe_feedback_buffer *fbb) +{ + NOUVEAU_ERR("\n"); +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_alpha_test_state = nv40_alpha_test_state_create; + nv40->pipe.bind_alpha_test_state = nv40_alpha_test_state_bind; + nv40->pipe.delete_alpha_test_state = nv40_alpha_test_state_delete; + + nv40->pipe.create_blend_state = nv40_blend_state_create; + nv40->pipe.bind_blend_state = nv40_blend_state_bind; + nv40->pipe.delete_blend_state = nv40_blend_state_delete; + + nv40->pipe.create_sampler_state = nv40_sampler_state_create; + nv40->pipe.bind_sampler_state = nv40_sampler_state_bind; + nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; + + nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; + nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; + nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + + nv40->pipe.create_depth_stencil_state = nv40_depth_stencil_state_create; + nv40->pipe.bind_depth_stencil_state = nv40_depth_stencil_state_bind; + nv40->pipe.delete_depth_stencil_state = nv40_depth_stencil_state_delete; + + nv40->pipe.create_vs_state = nv40_vp_state_create; + nv40->pipe.bind_vs_state = nv40_vp_state_bind; + nv40->pipe.delete_vs_state = nv40_vp_state_delete; + + nv40->pipe.create_fs_state = nv40_fp_state_create; + nv40->pipe.bind_fs_state = nv40_fp_state_bind; + nv40->pipe.delete_fs_state = nv40_fp_state_delete; + + nv40->pipe.set_blend_color = nv40_set_blend_color; + nv40->pipe.set_clip_state = nv40_set_clip_state; + nv40->pipe.set_clear_color_state = nv40_set_clear_color_state; + nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; +// nv40->pipe.set_feedback_state = nv40_set_feedback_state; + nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; + nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; + nv40->pipe.set_sampler_units = nv40_set_sampler_units; + nv40->pipe.set_scissor_state = nv40_set_scissor_state; + nv40->pipe.set_texture_state = nv40_set_texture_state; + nv40->pipe.set_viewport_state = nv40_set_viewport_state; + + nv40->pipe.set_vertex_buffer = nv40_set_vertex_buffer; + nv40->pipe.set_vertex_element = nv40_set_vertex_element; + +// nv40->pipe.set_feedback_buffer = nv40_set_feedback_buffer; +} + diff --git a/src/mesa/pipe/nv40/nv40_state.h b/src/mesa/pipe/nv40/nv40_state.h new file mode 100644 index 0000000000..1535037f63 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_state.h @@ -0,0 +1,173 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" + +struct nv40_alpha_test_state { + uint32_t enabled; + uint32_t func; + uint32_t ref; +}; + +struct nv40_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + uint32_t b_eqn; + + uint32_t l_enable; + uint32_t l_op; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv40_sampler_state { + uint32_t wrap; + uint32_t filt; + uint32_t bcol; +}; + +struct nv40_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + uint32_t line_stipple_en; + uint32_t line_stipple; + + uint32_t point_size; + + uint32_t poly_smooth_en; + uint32_t poly_stipple_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + +}; + +struct nv40_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + boolean on_hw; + int start_ip; + + uint32_t *insn; + int insn_len; + + struct { + int pipe_id; + int hw_id; + float value[4]; + } consts[256]; + int num_consts; + + uint32_t ir; + uint32_t or; +}; + +struct nv40_fragment_program { + const struct pipe_shader_state *pipe; + + boolean translated; + boolean on_hw; + + uint32_t *insn; + int insn_len; + + struct { + int pipe_id; + int hw_id; + } consts[256]; + int num_consts; + + struct pipe_buffer_handle *buffer; + + boolean uses_kil; + boolean writes_depth; + int num_regs; +}; + +struct nv40_depth_push { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; +}; + +struct nv40_stencil_push { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; +}; + +struct nv40_depth_stencil_state { + struct nv40_depth_push depth; + union { + struct nv40_stencil_push back; + struct nv40_stencil_push front; + } stencil; +}; + +static INLINE unsigned +nv40_tex_wrap_mode(unsigned wrap) { + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return NV40TCL_TEX_WRAP_S_REPEAT; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return NV40TCL_TEX_WRAP_S_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + default: + return NV40TCL_TEX_WRAP_S_REPEAT; + } +} + +static INLINE unsigned +nv40_tex_filter(unsigned f0, unsigned f1) { + switch (f0) { + case PIPE_TEX_FILTER_NEAREST: + switch (f1) { + case PIPE_TEX_MIPFILTER_NEAREST: + return NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: + default: + return NV40TCL_TEX_FILTER_MIN_NEAREST; + } + case PIPE_TEX_FILTER_LINEAR: + default: + switch (f1) { + case PIPE_TEX_MIPFILTER_NEAREST: + return NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: + default: + return NV40TCL_TEX_FILTER_MIN_LINEAR; + } + } +} + +#endif diff --git a/src/mesa/pipe/nv40/nv40_state_emit.c b/src/mesa/pipe/nv40/nv40_state_emit.c new file mode 100644 index 0000000000..a29c70538f --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_state_emit.c @@ -0,0 +1,112 @@ +#include "nv40_context.h" +#include "nv40_dma.h" +#include "nv40_state.h" + +static INLINE void +nv40_state_update_fragprog(struct nv40_context *nv40) +{ + struct pipe_context *pipe = (struct pipe_context *)nv40; + struct nv40_fragment_program *fp = nv40->fragprog.fp; + float *map; + int i; + + if (!fp->translated) + nv40_fragprog_translate(nv40, fp); + + if (fp->num_consts) { + map = pipe->winsys->buffer_map(pipe->winsys, + nv40->fragprog.constant_buf, + PIPE_BUFFER_FLAG_READ); + for (i = 0; i < fp->num_consts; i++) { + uint pid = fp->consts[i].pipe_id; + + if (pid == -1) + continue; + + if (!memcmp(&fp->insn[fp->consts[i].hw_id], &map[pid*4], + 4 * sizeof(float))) + continue; + + memcpy(&fp->insn[fp->consts[i].hw_id], &map[pid*4], + 4 * sizeof(float)); + fp->on_hw = 0; + } + pipe->winsys->buffer_unmap(pipe->winsys, + nv40->fragprog.constant_buf); + } +} + +static INLINE void +nv40_state_update_vertprog(struct nv40_context *nv40) +{ + struct pipe_context *pipe = (struct pipe_context *)nv40; + struct nv40_vertex_program *vp = nv40->vertprog.vp; + float *map; + int i; + + if (!nv40->vertprog.vp->translated) + nv40_vertprog_translate(nv40, nv40->vertprog.vp); + + if (vp->num_consts) { + map = pipe->winsys->buffer_map(pipe->winsys, + nv40->vertprog.constant_buf, + PIPE_BUFFER_FLAG_READ); + for (i = 0; i < vp->num_consts; i++) { + uint pid = vp->consts[i].pipe_id; + + if (pid >= 0) { + if (!memcmp(vp->consts[i].value, &map[pid*4], + 4 * sizeof(float))) + continue; + memcpy(vp->consts[i].value, &map[pid*4], + 4 * sizeof(float)); + } + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (vp->consts[i].hw_id); + OUT_RINGp ((uint32_t *)vp->consts[i].value, 4); + } + pipe->winsys->buffer_unmap(pipe->winsys, + nv40->vertprog.constant_buf); + } +} + +void +nv40_emit_hw_state(struct nv40_context *nv40) +{ + if (nv40->dirty & NV40_NEW_FRAGPROG) { + struct nv40_fragment_program *cur = nv40->fragprog.fp; + + nv40_state_update_fragprog(nv40); + + if (cur->on_hw) + nv40->dirty &= ~NV40_NEW_FRAGPROG; + + if (!cur->on_hw || cur != nv40->fragprog.active_fp) + nv40_fragprog_bind(nv40, cur); + } + + if (nv40->dirty & NV40_NEW_TEXTURE) + nv40_state_tex_update(nv40); + + if (nv40->dirty & (NV40_NEW_TEXTURE | NV40_NEW_FRAGPROG)) { + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); + nv40->dirty &= ~(NV40_NEW_TEXTURE | NV40_NEW_FRAGPROG); + } + + if (nv40->dirty & NV40_NEW_VERTPROG) { + nv40_state_update_vertprog(nv40); + if (nv40->vertprog.vp != nv40->vertprog.active_vp) + nv40_vertprog_bind(nv40, nv40->vertprog.vp); + nv40->dirty &= ~NV40_NEW_VERTPROG; + } + + if (nv40->dirty & NV40_NEW_ARRAYS) { + nv40_vbo_arrays_update(nv40); + nv40->dirty &= ~NV40_NEW_ARRAYS; + } +} + diff --git a/src/mesa/pipe/nv40/nv40_state_tex.c b/src/mesa/pipe/nv40/nv40_state_tex.c new file mode 100644 index 0000000000..a92d6250a2 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_state_tex.c @@ -0,0 +1,140 @@ +#include "nv40_context.h" +#include "nv40_dma.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x << NV40TCL_TEX_SWIZZLE_S0_X_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0y << NV40TCL_TEX_SWIZZLE_S0_Y_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0z << NV40TCL_TEX_SWIZZLE_S0_Z_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0w << NV40TCL_TEX_SWIZZLE_S0_W_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S1_X_##ts1x << NV40TCL_TEX_SWIZZLE_S1_X_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S1_X_##ts1y << NV40TCL_TEX_SWIZZLE_S1_Y_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S1_X_##ts1z << NV40TCL_TEX_SWIZZLE_S1_Z_SHIFT) | \ + (NV40TCL_TEX_SWIZZLE_S1_X_##ts1w << NV40TCL_TEX_SWIZZLE_S1_W_SHIFT), \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(U_A8_R8_G8_B8, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(U_A1_R5_G5_B5, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(U_A4_R4_G4_B4, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(U_R5_G6_B5 , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(U_L8 , L8 , S1, S1, S1, ONE, X, X, X, X), + _(U_A8 , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(U_I8 , L8 , S1, S1, S1, S1, X, X, X, X), + _(U_A8_L8 , A8L8 , S1, S1, S1, S1, Z, W, X, Y), +// _(RGB_DXT1 , 0x86, S1, S1, S1, ONE, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT1 , 0x86, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT3 , 0x87, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +// _(RGBA_DXT5 , 0x88, S1, S1, S1, S1, X, Y, Z, W, 0x00, 0x00), +}; + +static struct nv40_texture_format * +nv40_tex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + +static INLINE int +nv40_tex_dims(uint pipe_target) +{ + switch (pipe_target) { + case PIPE_TEXTURE_1D: return 1; + case PIPE_TEXTURE_2D: return 2; + case PIPE_TEXTURE_3D: return 3; + case PIPE_TEXTURE_CUBE: return 2; + default: + NOUVEAU_ERR("AII unknown pipe target: %d\n", pipe_target); + return 2; + } +} + +static void +nv40_tex_unit_enable(struct nv40_context *nv40, int unit) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; + struct pipe_mipmap_tree *mt = nv40->tex_miptree[unit]; + struct nv40_texture_format *tf; + uint32_t txf, txs, txp; + int swizzled = 0; /*XXX: implement in region code? */ + + tf = nv40_tex_format(mt->format); + if (!tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", mt->format); + return; + } + + txf = (tf->format | 0x80) << NV40TCL_TEX_FORMAT_FORMAT_SHIFT; + txf |= ((mt->last_level - mt->first_level + 1) << + NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + if (1) /* XXX */ + txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + + txf |= (nv40_tex_dims(mt->target) << NV40TCL_TEX_FORMAT_DIMS_SHIFT); + if (0) /*XXX*/ + txf |= NV40TCL_TEX_FORMAT_RECT; + + if (swizzled) { + txp = 0; + } else { + txp = mt->pitch * mt->cpp; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + if (mt->format == PIPE_FORMAT_U_A8_L8) + txs |= (1<<16); /*nfi*/ + + BEGIN_RING(curie, NV40TCL_TEX_OFFSET(unit), 8); + OUT_RELOCl(mt->region->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD); + OUT_RELOCd(mt->region->buffer, txf, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_OR | NOUVEAU_BO_RD, NV40TCL_TEX_FORMAT_DMA0, + NV40TCL_TEX_FORMAT_DMA1); + OUT_RING (ps->wrap); + OUT_RING (NV40TCL_TEX_ENABLE_ENABLE | + (0x00078000) /* mipmap related? */); + OUT_RING (txs); + OUT_RING (ps->filt | 0x3fd6 /*voodoo*/); + OUT_RING ((mt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) | mt->height0); + OUT_RING (ps->bcol); + BEGIN_RING(curie, NV40TCL_TEX_SIZE1(unit), 1); + OUT_RING ((mt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); +} + +void +nv40_state_tex_update(struct nv40_context *nv40) +{ + while (nv40->tex_dirty) { + int unit = ffs(nv40->tex_dirty) - 1; + + if (nv40->tex_miptree[unit]) { + nv40_tex_unit_enable(nv40, unit); + } else { + BEGIN_RING(curie, NV40TCL_TEX_ENABLE(unit), 1); + OUT_RING (0); + } + + nv40->tex_dirty &= ~(1 << unit); + } +} + diff --git a/src/mesa/pipe/nv40/nv40_surface.c b/src/mesa/pipe/nv40/nv40_surface.c new file mode 100644 index 0000000000..84e0d79268 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_surface.c @@ -0,0 +1,229 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + + +#define CLIP_TILE \ + do { \ + if (x >= ps->width) \ + return; \ + if (y >= ps->height) \ + return; \ + if (x + w > ps->width) \ + w = ps->width - x; \ + if (y + h > ps->height) \ + h = ps->height -y; \ + } while(0) + + +/** + * Note: this is exactly like a8r8g8b8_get_tile() in sp_surface.c + * Share it someday. + */ +static void +nv40_get_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, float *p) +{ + const unsigned *src + = ((const unsigned *) (ps->region->map + ps->offset)) + + y * ps->region->pitch + x; + unsigned i, j; + unsigned w0 = w; + + CLIP_TILE; + + switch (ps->format) { + case PIPE_FORMAT_U_A8_R8_G8_B8: + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++) { + const unsigned pixel = src[j]; + pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); + pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); + pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); + pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff); + pRow += 4; + } + src += ps->region->pitch; + p += w0 * 4; + } + break; + case PIPE_FORMAT_S8_Z24: + { + const float scale = 1.0 / (float) 0xffffff; + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++) { + const unsigned pixel = src[j]; + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (pixel & 0xffffff) * scale; + pRow += 4; + } + src += ps->region->pitch; + p += w0 * 4; + } + } + break; + default: + assert(0); + } +} + + +static void +nv40_put_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, const float *p) +{ + /* TODO */ + assert(0); +} + + +/* + * XXX note: same as code in sp_surface.c + */ +static void +nv40_get_tile(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride) +{ + const uint cpp = ps->region->cpp; + const uint w0 = w; + const ubyte *pSrc; + ubyte *pDest; + uint i; + + assert(ps->region->map); + + CLIP_TILE; + + if (dst_stride == 0) { + dst_stride = w0 * cpp; + } + + pSrc = ps->region->map + ps->offset + (y * ps->region->pitch + x) * cpp; + pDest = (ubyte *) p; + + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, w0 * cpp); + pDest += dst_stride; + pSrc += ps->region->pitch * cpp; + } +} + + +/* + * XXX note: same as code in sp_surface.c + */ +static void +nv40_put_tile(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride) +{ + const uint cpp = ps->region->cpp; + const uint w0 = w; + const ubyte *pSrc; + ubyte *pDest; + uint i; + + assert(ps->region->map); + + CLIP_TILE; + + if (src_stride == 0) { + src_stride = w0 * cpp; + } + + pSrc = (const ubyte *) p; + pDest = ps->region->map + ps->offset + (y * ps->region->pitch + x) * cpp; + + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, w0 * cpp); + pDest += ps->region->pitch * cpp; + pSrc += src_stride; + } +} + + +/* + * XXX note: same as code in sp_surface.c + */ +static struct pipe_surface * +nv40_get_tex_surface(struct pipe_context *pipe, + struct pipe_mipmap_tree *mt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = mt->level[level].level_offset; + + if (mt->target == PIPE_TEXTURE_CUBE) { + offset += mt->level[level].image_offset[face] * mt->cpp; + } + else if (mt->target == PIPE_TEXTURE_3D) { + offset += mt->level[level].image_offset[zslice] * mt->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = pipe->winsys->surface_alloc(pipe->winsys, mt->format); + if (ps) { + assert(ps->format); + assert(ps->refcount); + pipe_region_reference(&ps->region, mt->region); + ps->width = mt->level[level].width; + ps->height = mt->level[level].height; + ps->offset = offset; + } + return ps; +} + + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ + nv40->pipe.get_tex_surface = nv40_get_tex_surface; + nv40->pipe.get_tile = nv40_get_tile; + nv40->pipe.put_tile = nv40_put_tile; + nv40->pipe.get_tile_rgba = nv40_get_tile_rgba; + nv40->pipe.put_tile_rgba = nv40_put_tile_rgba; +} diff --git a/src/mesa/pipe/nv40/nv40_vbo.c b/src/mesa/pipe/nv40/nv40_vbo.c new file mode 100644 index 0000000000..aa930476b6 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_vbo.c @@ -0,0 +1,222 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "nv40_context.h" +#include "nv40_dma.h" +#include "nv40_state.h" +#include "nvgl_pipe.h" + +boolean +nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + unsigned nr; + + if (nv40->dirty) + nv40_emit_hw_state(nv40); + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (count & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + /*XXX: large arrays (nr>2047) will blow up */ + nr = count >> 8; + if (nr) { + assert (nr <= 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, nr); + while (nr--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + pipe->flush(pipe, PIPE_FLUSH_WAIT); + return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint8_t *elts = (uint8_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2046); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint16_t *elts = (uint16_t *)ib + start; + int push, i; + + if (count & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; count--; + } + + while (count) { + push = MIN2(count, 2046); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + count -= push; + elts += push; + } +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, + unsigned start, unsigned count) +{ + uint32_t *elts = (uint32_t *)ib + start; + int push; + + while (count) { + push = MIN2(count, 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + count -= push; + elts += push; + } +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer_handle *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = (struct nv40_context *)pipe; + void *ib; + + if (nv40->dirty) + nv40_emit_hw_state(nv40); + + ib = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_FLAG_READ); + if (!ib) { + NOUVEAU_ERR("Couldn't map index buffer!!\n"); + return FALSE; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + switch (indexSize) { + case 1: + nv40_draw_elements_u08(nv40, ib, start, count); + break; + case 2: + nv40_draw_elements_u16(nv40, ib, start, count); + break; + case 4: + nv40_draw_elements_u32(nv40, ib, start, count); + break; + default: + NOUVEAU_ERR("unsupported elt size %d\n", indexSize); + break; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + pipe->winsys->buffer_unmap(pipe->winsys, ib); + pipe->flush(pipe, PIPE_FLUSH_WAIT); + return TRUE; +} + +static INLINE int +nv40_vbo_format_to_ncomp(uint format) +{ + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: return 4; + case PIPE_FORMAT_R32G32B32_FLOAT: return 3; + case PIPE_FORMAT_R32G32_FLOAT: return 2; + case PIPE_FORMAT_R32_FLOAT: return 1; + default: + NOUVEAU_ERR("AII, unknown vbo format %d\n", format); + return 1; + } +} + +void +nv40_vbo_arrays_update(struct nv40_context *nv40) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct nv40_vertex_program *vp = nv40->vertprog.vp; + uint32_t inputs, vtxfmt[16]; + int hw, num_hw; + + inputs = vp->ir; + for (hw = 0; hw < 16 && inputs; hw++) { + if (inputs & (1 << hw)) { + num_hw = hw; + inputs &= ~(1 << hw); + } + } + num_hw++; + + inputs = vp->ir; + BEGIN_RING(curie, NV40TCL_VTXBUF_ADDRESS(0), num_hw); + for (hw = 0; hw < num_hw; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + + if (!(inputs & (1 << hw))) { + OUT_RING(0); + vtxfmt[hw] = NV40TCL_VTXFMT_TYPE_FLOAT; + continue; + } + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + OUT_RELOC(vb->buffer, vb->buffer_offset + ve->src_offset, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_OR | NOUVEAU_BO_RD, 0, + NV40TCL_VTXBUF_ADDRESS_DMA1); + vtxfmt[hw] = ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (nv40_vbo_format_to_ncomp(ve->src_format) << + NV40TCL_VTXFMT_SIZE_SHIFT) | + NV40TCL_VTXFMT_TYPE_FLOAT); + } + + BEGIN_RING(curie, 0x1710, 1); + OUT_RING (0); /* vtx cache flush */ + BEGIN_RING(curie, NV40TCL_VTXFMT(0), num_hw); + OUT_RINGp (vtxfmt, num_hw); +} + diff --git a/src/mesa/pipe/nv40/nv40_vertprog.c b/src/mesa/pipe/nv40/nv40_vertprog.c new file mode 100644 index 0000000000..be550e4743 --- /dev/null +++ b/src/mesa/pipe/nv40/nv40_vertprog.c @@ -0,0 +1,594 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/tgsi/exec/tgsi_token.h" +#include "pipe/tgsi/exec/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_dma.h" +#include "nv40_state.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +static uint32_t +passthrough_vp_data[] = { + 0x40041c6c, 0x0040010d, 0x8106c083, 0x6041ff84, + 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff81, +}; + +static struct nv40_vertex_program +passthrough_vp = { + .pipe = NULL, + .translated = TRUE, + + .insn = passthrough_vp_data, + .insn_len = sizeof(passthrough_vp_data) / sizeof(uint32_t), + + .ir = 0x00000003, + .or = 0x00000001, +}; + +struct nv40_vpc { + struct nv40_vertex_program *vp; + + uint output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; +}; + +static INLINE struct nv40_sreg +nv40_sr_temp(struct nv40_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp; + return nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP, idx); +} + +static INLINE struct nv40_sreg +nv40_sr_const(struct nv40_vpc *vpc, int pipe, + float x, float y, float z, float w) +{ + struct nv40_vertex_program *vp = vpc->vp; + int idx = vp->num_consts; + + vp->consts[idx].pipe_id = pipe; + vp->consts[idx].hw_id = idx; + vp->consts[idx].value[0] = x; + vp->consts[idx].value[1] = y; + vp->consts[idx].value[2] = z; + vp->consts[idx].value[3] = w; + vp->num_consts++; + + return nv40_sr(0, NV40_VP_SRC_REG_TYPE_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) +#define temp(vpc) nv40_sr_temp((vpc)) +#define constant(v,p,x,y,z,w) nv40_sr_const((v), (p), (x), (y), (z), (w)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + sr |= (src.type << NV40_VP_SRC_REG_TYPE_SHIFT); + if (src.type == NV40_VP_SRC_REG_TYPE_INPUT) { + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + } else + if (src.type == NV40_VP_SRC_REG_TYPE_CONST) { + hw[1] |= (src.index << NV40_VP_INST_CONST_SRC_SHIFT); + } else { + sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + } + + if (src.negate) + sr |= NV40_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + + switch (pos) { + case 0: + hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << + NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << + NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ + struct nv40_vertex_program *vp = vpc->vp; + + if (dst.output == 0) { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + } else { + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK; + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + } +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, + struct nv40_sreg s2) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t *hw = &vp->insn[vp->insn_len]; + + hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= (NV40_VP_INST_SCA_RESULT | + NV40_VP_INST_SCA_DEST_TEMP_MASK); + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[1] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); + + vp->insn_len += 4; +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT, + fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv40_sr(0, NV40_VP_SRC_REG_TYPE_TEMP, + fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + uint out, idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + out = 1; + idx = vpc->output_map[fdst->DstRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + out = 0; + idx = fdst->DstRegister.Index; + if (vpc->high_temp < idx) + vpc->high_temp = idx; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return nv40_sr(out, NV40_VP_SRC_REG_TYPE_TEMP, idx); +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(0, NV40_VP_SRC_REG_TYPE_INPUT, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_RET) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV40_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->u.DeclarationRange.First] = hw; + return TRUE; +} + +void +nv40_vertprog_translate(struct nv40_context *nv40, + struct nv40_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv40_vpc *vpc = NULL; + + vpc = calloc(1, sizeof(struct nv40_vpc)); + if (!vpc) + return; + vp->insn = calloc(1, 128*4*sizeof(uint32_t)); + vpc->vp = vp; + vpc->high_temp = -1; + + tgsi_parse_init(&parse, vp->pipe->tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insn[vp->insn_len - 1] |= NV40_VP_INST_LAST; +#if 0 + { + int i; + for (i = 0; i < vp->insn_len; i++) + NOUVEAU_ERR("inst[%d] = 0x%08x\n", i, vp->insn[i]); + } +#endif + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + free(vpc); +} + +void +nv40_vertprog_bind(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + int i; + + if (!vp->translated) { + NOUVEAU_ERR("vtxprog invalid, using passthrough shader\n"); + vp = &passthrough_vp; + } + + if (!vp->on_hw) { + if (nv40->vertprog.active_vp) + nv40->vertprog.active_vp->on_hw = FALSE; + vp->on_hw = TRUE; + vp->start_ip = 0; + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->start_ip); + for (i = 0; i < vp->insn_len; i += 4) { + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (&vp->insn[i], 4); + } + } + + BEGIN_RING(curie, NV40TCL_VP_START_FROM_ID, 1); + OUT_RING (vp->start_ip); + BEGIN_RING(curie, NV40TCL_VP_ATTRIB_EN, 2); + OUT_RING (vp->ir); + OUT_RING (vp->or); + + nv40->vertprog.active_vp = vp; +} diff --git a/src/mesa/pipe/nv40/nvgl_pipe.h b/src/mesa/pipe/nv40/nvgl_pipe.h new file mode 100644 index 0000000000..15ff318023 --- /dev/null +++ b/src/mesa/pipe/nv40/nvgl_pipe.h @@ -0,0 +1,198 @@ +#ifndef __NVGL_PIPE_H__ +#define __NVGL_PIPE_H__ + +#include <GL/gl.h> + +static INLINE unsigned +nvgl_blend_func(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + return GL_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return GL_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return GL_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return GL_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return GL_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return GL_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return GL_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return GL_CONSTANT_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return GL_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return GL_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return GL_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return GL_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return GL_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return GL_ONE_MINUS_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return GL_ONE_MINUS_CONSTANT_ALPHA; + default: + return GL_ONE; + } +} + +static INLINE unsigned +nvgl_blend_eqn(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return GL_FUNC_ADD; + case PIPE_BLEND_SUBTRACT: + return GL_FUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return GL_FUNC_REVERSE_SUBTRACT; + case PIPE_BLEND_MIN: + return GL_MIN; + case PIPE_BLEND_MAX: + return GL_MAX; + default: + return GL_FUNC_ADD; + } +} + +static INLINE unsigned +nvgl_logicop_func(unsigned func) +{ + switch (func) { + case PIPE_LOGICOP_CLEAR: + return GL_CLEAR; + case PIPE_LOGICOP_NOR: + return GL_NOR; + case PIPE_LOGICOP_AND_INVERTED: + return GL_AND_INVERTED; + case PIPE_LOGICOP_COPY_INVERTED: + return GL_COPY_INVERTED; + case PIPE_LOGICOP_AND_REVERSE: + return GL_AND_REVERSE; + case PIPE_LOGICOP_INVERT: + return GL_INVERT; + case PIPE_LOGICOP_XOR: + return GL_XOR; + case PIPE_LOGICOP_NAND: + return GL_NAND; + case PIPE_LOGICOP_AND: + return GL_AND; + case PIPE_LOGICOP_EQUIV: + return GL_EQUIV; + case PIPE_LOGICOP_NOOP: + return GL_NOOP; + case PIPE_LOGICOP_OR_INVERTED: + return GL_OR_INVERTED; + case PIPE_LOGICOP_COPY: + return GL_COPY; + case PIPE_LOGICOP_OR_REVERSE: + return GL_OR_REVERSE; + case PIPE_LOGICOP_OR: + return GL_OR; + case PIPE_LOGICOP_SET: + return GL_SET; + default: + return GL_CLEAR; + } +} + +static INLINE unsigned +nvgl_comparison_op(unsigned op) +{ + switch (op) { + case PIPE_FUNC_NEVER: + return GL_NEVER; + case PIPE_FUNC_LESS: + return GL_LESS; + case PIPE_FUNC_EQUAL: + return GL_EQUAL; + case PIPE_FUNC_LEQUAL: + return GL_LEQUAL; + case PIPE_FUNC_GREATER: + return GL_GREATER; + case PIPE_FUNC_NOTEQUAL: + return GL_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return GL_GEQUAL; + case PIPE_FUNC_ALWAYS: + return GL_ALWAYS; + default: + return GL_NEVER; + } +} + +static INLINE unsigned +nvgl_polygon_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_FILL: + return GL_FILL; + case PIPE_POLYGON_MODE_LINE: + return GL_LINE; + case PIPE_POLYGON_MODE_POINT: + return GL_POINT; + default: + return GL_FILL; + } +} + +static INLINE unsigned +nvgl_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return GL_KEEP; + case PIPE_STENCIL_OP_ZERO: + return GL_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return GL_REPLACE; + case PIPE_STENCIL_OP_INCR: + return GL_INCR; + case PIPE_STENCIL_OP_DECR: + return GL_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return GL_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return GL_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return GL_INVERT; + default: + return GL_KEEP; + } +} + +static INLINE unsigned +nvgl_primitive(unsigned prim) { + switch (prim) { + case PIPE_PRIM_POINTS: + return GL_POINTS + 1; + case PIPE_PRIM_LINES: + return GL_LINES + 1; + case PIPE_PRIM_LINE_LOOP: + return GL_LINE_LOOP + 1; + case PIPE_PRIM_LINE_STRIP: + return GL_LINE_STRIP + 1; + case PIPE_PRIM_TRIANGLES: + return GL_TRIANGLES + 1; + case PIPE_PRIM_TRIANGLE_STRIP: + return GL_TRIANGLE_STRIP + 1; + case PIPE_PRIM_TRIANGLE_FAN: + return GL_TRIANGLE_FAN + 1; + case PIPE_PRIM_QUADS: + return GL_QUADS + 1; + case PIPE_PRIM_QUAD_STRIP: + return GL_QUAD_STRIP + 1; + case PIPE_PRIM_POLYGON: + return GL_POLYGON + 1; + default: + return GL_POINTS + 1; + } +} + +#endif |