From e2b4d9b317104ff3c56a9bf108aa79084d49eba5 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 5 Nov 2006 13:46:48 +0000 Subject: Architect the DRI : - make use of the autogenerated nouveau_reg.h file - add object creation to the DRI - some work on screen and context creation --- src/mesa/drivers/dri/nouveau/nouveau_object.c | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_object.c (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c new file mode 100644 index 0000000000..9003fb1eae --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -0,0 +1,36 @@ + +#include "nouveau_fifo.h" +#include "nouveau_object.h" + + +static GLboolean NVDmaCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, + uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) +{ + drm_nouveau_object_init_t cto; + int ret; + + cto.handle = handle; + cto.class = class; + cto.flags = flags; + cto.dma0= dma_in; + cto.dma1= dma_out; + cto.dma_notifier = dma_notifier; + ret = drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_OBJECT_INIT, &cto, sizeof(cto)); + + return ret == 0; +} + +static void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int handle, int subchannel) +{ + BEGIN_RING_SIZE(subchannel, 0, 1); + OUT_RING(handle); +} + +void nouveauObjectInit(nouveauContextPtr nmesa) +{ + NVDmaCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); + nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); +} + + + -- cgit v1.2.3 From a75440bcf04efb7a4840e9b0d1f0903b40b952bf Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 21 Nov 2006 14:03:06 +0000 Subject: nouveauCreateDmaObject --- src/mesa/drivers/dri/nouveau/nouveau_context.c | 1 + src/mesa/drivers/dri/nouveau/nouveau_object.c | 37 +++++++++++++++++++++++--- src/mesa/drivers/dri/nouveau/nouveau_object.h | 2 ++ 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index a2ac056010..d3fbdab9f3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -45,6 +45,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_object.h" #include "nouveau_fifo.h" #include "nouveau_tex.h" +#include "nouveau_msg.h" #include "nv10_swtcl.h" #include "vblank.h" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 9003fb1eae..fe3b44df69 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -3,8 +3,7 @@ #include "nouveau_object.h" -static GLboolean NVDmaCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, - uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) +static GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) { drm_nouveau_object_init_t cto; int ret; @@ -20,7 +19,27 @@ static GLboolean NVDmaCreateContextObject(nouveauContextPtr nmesa, int handle, i return ret == 0; } -static void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int handle, int subchannel) +static GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, + uint32_t handle, + uint32_t offset, + uint32_t size, + int target, + int access) +{ + drm_nouveau_dma_object_init_t dma; + int ret; + + dma.handle = handle; + dma.target = target; + dma.access = access; + dma.offset = offset; + dma.handle = handle; + ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_DMA_OBJECT_INIT, + &dma, sizeof(dma)); + return ret == 0; +} + +void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int subchannel, int handle) { BEGIN_RING_SIZE(subchannel, 0, 1); OUT_RING(handle); @@ -28,8 +47,18 @@ static void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int handle, int s void nouveauObjectInit(nouveauContextPtr nmesa) { - NVDmaCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); +#ifdef NOUVEAU_RING_DEBUG + return; +#endif + + nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); +/* We need to know vram size.. */ +#if 0 + nouveauCreateDmaObject( nmesa, NvDmaFB, + 0, (256*1024*1024), + 0 /*NV_DMA_TARGET_FB*/, 0 /*NV_DMA_ACCESS_RW*/); +#endif } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.h b/src/mesa/drivers/dri/nouveau/nouveau_object.h index e9a30d127a..8386f923c3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.h @@ -7,10 +7,12 @@ void nouveauObjectInit(nouveauContextPtr nmesa); enum DMAObjects { Nv3D = 0x80000019, + NvDmaFB = 0xD0FB0001 }; enum DMASubchannel { NvSub3D = 7, }; +extern void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int subchannel, int handle); #endif -- cgit v1.2.3 From 9c9e6abbf82fbf591575a9c352f86721bc72aa90 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sat, 25 Nov 2006 09:58:35 +0000 Subject: Incomplete shader stuff, should mostly work for NV40. Other cards, not so much.. --- src/mesa/drivers/dri/nouveau/Makefile | 11 +- src/mesa/drivers/dri/nouveau/nouveau_context.c | 8 + src/mesa/drivers/dri/nouveau/nouveau_context.h | 7 + src/mesa/drivers/dri/nouveau/nouveau_fifo.h | 28 +- src/mesa/drivers/dri/nouveau/nouveau_object.c | 2 +- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 734 +++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_shader.h | 362 ++++++++++ .../drivers/dri/nouveau/nouveau_shader_0_arb.c | 694 +++++++++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_shader_1.c | 318 +++++++++ src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 238 +++++++ src/mesa/drivers/dri/nouveau/nouveau_state.h | 1 + src/mesa/drivers/dri/nouveau/nv20_shader.h | 121 ++++ src/mesa/drivers/dri/nouveau/nv20_vertprog.c | 447 +++++++++++++ src/mesa/drivers/dri/nouveau/nv30_fragprog.c | 707 ++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nv30_shader.h | 378 +++++++++++ src/mesa/drivers/dri/nouveau/nv30_state.c | 2 +- src/mesa/drivers/dri/nouveau/nv30_vertprog.c | 356 ++++++++++ src/mesa/drivers/dri/nouveau/nv40_fragprog.c | 152 +++++ src/mesa/drivers/dri/nouveau/nv40_shader.h | 467 +++++++++++++ src/mesa/drivers/dri/nouveau/nv40_vertprog.c | 647 ++++++++++++++++++ 20 files changed, 5667 insertions(+), 13 deletions(-) create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader.c create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader.h create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader_0_arb.c create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader_1.c create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_shader_2.c create mode 100644 src/mesa/drivers/dri/nouveau/nv20_shader.h create mode 100644 src/mesa/drivers/dri/nouveau/nv20_vertprog.c create mode 100644 src/mesa/drivers/dri/nouveau/nv30_fragprog.c create mode 100644 src/mesa/drivers/dri/nouveau/nv30_shader.h create mode 100644 src/mesa/drivers/dri/nouveau/nv30_vertprog.c create mode 100644 src/mesa/drivers/dri/nouveau/nv40_fragprog.c create mode 100644 src/mesa/drivers/dri/nouveau/nv40_shader.h create mode 100644 src/mesa/drivers/dri/nouveau/nv40_vertprog.c (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 4d1e3e6c70..384713eeeb 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -17,13 +17,22 @@ DRIVER_SOURCES = \ nouveau_screen.c \ nouveau_span.c \ nouveau_state.c \ + nouveau_shader.c \ + nouveau_shader_0_arb.c \ + nouveau_shader_1.c \ + nouveau_shader_2.c \ nouveau_tex.c \ nouveau_swtcl.c \ nv10_swtcl.c \ nv10_state.c \ nv20_state.c \ nv30_state.c \ - nouveau_state_cache.c + nouveau_state_cache.c \ + nv20_vertprog.c \ + nv30_fragprog.c \ + nv30_vertprog.c \ + nv40_fragprog.c \ + nv40_vertprog.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index d3fbdab9f3..4ae0c68fa9 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -35,6 +35,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "tnl/t_vp_build.h" #include "drivers/common/driverfuncs.h" @@ -130,6 +131,13 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, nmesa->current_primitive = -1; + nouveauShaderInitFuncs(ctx); + /* Install Mesa's fixed-function shader support */ + if (nmesa->screen->card->type >= NV_40) { + ctx->_MaintainTnlProgram = GL_TRUE; + ctx->_MaintainTexEnvProgram = GL_TRUE; + } + /* Initialize the swrast */ _swrast_CreateContext( ctx ); _ac_CreateContext( ctx ); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index 8ae7be015d..e488f9d42d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -38,6 +38,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_screen.h" #include "nouveau_state_cache.h" +#include "nouveau_shader.h" #include "xmlconfig.h" @@ -119,6 +120,12 @@ typedef struct nouveau_context { GLenum current_primitive; /* the current primitive enum */ DECLARE_RENDERINPUTS(render_inputs_bitset); /* the current render inputs */ + /* Shader state */ + nvsFunc VPfunc; + nvsFunc FPfunc; + nouveauShader *current_fragprog; + nouveauShader *current_vertprog; + nouveauScreenRec *screen; drm_nouveau_sarea_t *sarea; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fifo.h b/src/mesa/drivers/dri/nouveau/nouveau_fifo.h index ce465cdca5..44b9f356d1 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fifo.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_fifo.h @@ -33,6 +33,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_ctrlreg.h" //#define NOUVEAU_RING_DEBUG +//#define NOUVEAU_STATE_CACHE_DISABLE #define NV_READ(reg) *(volatile u_int32_t *)(nmesa->mmio + (reg)) @@ -63,11 +64,11 @@ int i; printf("OUT_RINGp: (size 0x%x dwords)\n",sz); for(i=0;ififo.buffer[nmesa->fifo.current++]=(n); \ #endif +#define BEGIN_RING_SIZE(subchannel,tag,size) do { \ + nouveau_state_cache_flush(nmesa); \ + if (nmesa->fifo.free <= (size)) \ + WAIT_RING(nmesa,(size)); \ + OUT_RING( ((size)<<18) | ((subchannel) << 13) | (tag)); \ + nmesa->fifo.free -= ((size) + 1); \ +}while(0) + extern void WAIT_RING(nouveauContextPtr nmesa,u_int32_t size); extern void nouveau_state_cache_flush(nouveauContextPtr nmesa); extern void nouveau_state_cache_init(nouveauContextPtr nmesa); +#ifdef NOUVEAU_STATE_CACHE_DISABLE +#define BEGIN_RING_CACHE(subc,tag,size) BEGIN_RING_SIZE((subc), (tag), (size)) +#define OUT_RING_CACHE(n) OUT_RING((n)) +#define OUT_RING_CACHEf(n) OUT_RINGf((n)) +#define OUT_RING_CACHEp(ptr, sz) OUT_RINGp((ptr), (sz)) +#else #define BEGIN_RING_CACHE(subchannel,tag,size) do { \ nmesa->state_cache.dirty=1; \ nmesa->state_cache.current_pos=((tag)/4); \ @@ -116,14 +131,7 @@ extern void nouveau_state_cache_init(nouveauContextPtr nmesa); uint32_t* p=(uint32_t*)(ptr); \ int i; for(i=0;ififo.free <= (size)) \ - WAIT_RING(nmesa,(size)); \ - OUT_RING( ((size)<<18) | ((subchannel) << 13) | (tag)); \ - nmesa->fifo.free -= ((size) + 1); \ -}while(0) +#endif #define RING_AVAILABLE() (nmesa->fifo.free-1) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index fe3b44df69..cd46feff7c 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -54,7 +54,7 @@ void nouveauObjectInit(nouveauContextPtr nmesa) nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); /* We need to know vram size.. */ -#if 0 +#if 0 nouveauCreateDmaObject( nmesa, NvDmaFB, 0, (256*1024*1024), 0 /*NV_DMA_TARGET_FB*/, 0 /*NV_DMA_ACCESS_RW*/); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c new file mode 100644 index 0000000000..97ea1ee547 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -0,0 +1,734 @@ +/* + * Copyright (C) 2006 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "extensions.h" + +#include "program.h" +#include "tnl/tnl.h" + +#include "nouveau_context.h" +#include "nouveau_shader.h" + +/***************************************************************************** + * Mesa entry points + */ +static void +nouveauBindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog) +{ +} + +static struct gl_program * +nouveauNewProgram(GLcontext *ctx, GLenum target, GLuint id) +{ + nouveauShader *nvs; + + nvs = CALLOC_STRUCT(_nouveauShader); + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + return _mesa_init_vertex_program(ctx, &nvs->mesa.vp, target, id); + case GL_FRAGMENT_PROGRAM_ARB: + return _mesa_init_fragment_program(ctx, &nvs->mesa.fp, target, id); + default: + _mesa_problem(ctx, "Unsupported shader target"); + break; + } + + FREE(nvs); + return NULL; +} + +static void +nouveauDeleteProgram(GLcontext *ctx, struct gl_program *prog) +{ + nouveauShader *nvs = (nouveauShader *)prog; + + if (nvs->translated) + FREE(nvs->program); + _mesa_delete_program(ctx, prog); +} + +static void +nouveauProgramStringNotify(GLcontext *ctx, GLenum target, + struct gl_program *prog) +{ + nouveauShader *nvs = (nouveauShader *)prog; + + if (nvs->translated) + FREE(nvs->program); + nvs->translated = 0; + + _tnl_program_string(ctx, target, prog); +} + +static GLboolean +nouveauIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) +{ + nouveauShader *nvs = (nouveauShader *)prog; + + return nvs->translated; +} + +GLboolean +nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + struct gl_program_parameter_list *plist; + int i; + + /* Translate to HW format now if necessary */ + if (!nvs->translated) { + /* Mesa ASM shader -> nouveauShader */ + if (!nouveau_shader_pass0_arb(ctx, nvs)) + return GL_FALSE; + /* Basic dead code elimination + register usage info */ + if (!nouveau_shader_pass1(nvs)) + return GL_FALSE; + /* nouveauShader -> HW bytecode, HW register alloc */ + if (!nouveau_shader_pass2(nvs)) + return GL_FALSE; + assert(nvs->translated); + assert(nvs->program); + } + + /* Update state parameters */ + plist = nvs->mesa.vp.Base.Parameters; + _mesa_load_state_parameters(ctx, plist); + for (i=0; iNumParameters; i++) { + if (!nvs->on_hardware) { + /* if we've been kicked off the hardware there's no guarantee our + * consts are still there.. reupload them all + */ + nvs->func->UpdateConst(ctx, nvs, i); + } else if (plist->Parameters[i].Type == PROGRAM_STATE_VAR) { + /* update any changed state parameters */ + if (!TEST_EQ_4V(nvs->params[i].val, nvs->params[i].source_val)) + nvs->func->UpdateConst(ctx, nvs, i); + } + } + + /* Upload program to hardware, this must come after state param update + * as >=NV30 fragprogs inline consts into the bytecode. + */ + if (!nvs->on_hardware) { + nouveauShader **current; + + if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB) + current = &nmesa->current_vertprog; + else + current = &nmesa->current_fragprog; + if (*current) (*current)->on_hardware = 0; + + nvs->func->UploadToHW(ctx, nvs); + nvs->on_hardware = 1; + + *current = nvs; + } + + return GL_TRUE; +} + +void +nouveauShaderInitFuncs(GLcontext * ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + switch (nmesa->screen->card->type) { + case NV_20: + NV20VPInitShaderFuncs(&nmesa->VPfunc); + break; + case NV_30: + NV30VPInitShaderFuncs(&nmesa->VPfunc); + NV30FPInitShaderFuncs(&nmesa->FPfunc); + break; + case NV_40: + case G_70: + NV40VPInitShaderFuncs(&nmesa->VPfunc); + NV40FPInitShaderFuncs(&nmesa->FPfunc); + break; + default: + return; + } + + _mesa_enable_extension(ctx, "GL_ARB_vertex_program"); + ctx->Const.VertexProgram.MaxNativeInstructions = nmesa->VPfunc.MaxInst; + ctx->Const.VertexProgram.MaxNativeAluInstructions = nmesa->VPfunc.MaxInst; + ctx->Const.VertexProgram.MaxNativeTexInstructions = nmesa->VPfunc.MaxInst; + ctx->Const.VertexProgram.MaxNativeTexIndirections = + ctx->Const.VertexProgram.MaxNativeTexInstructions; + ctx->Const.VertexProgram.MaxNativeAttribs = nmesa->VPfunc.MaxAttrib; + ctx->Const.VertexProgram.MaxNativeTemps = nmesa->VPfunc.MaxTemp; + ctx->Const.VertexProgram.MaxNativeAddressRegs = nmesa->VPfunc.MaxAddress; + ctx->Const.VertexProgram.MaxNativeParameters = nmesa->VPfunc.MaxConst; + + if (nmesa->screen->card->type >= NV_30) { + _mesa_enable_extension(ctx, "GL_ARB_fragment_program"); + + ctx->Const.FragmentProgram.MaxNativeInstructions = nmesa->FPfunc.MaxInst; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = nmesa->FPfunc.MaxInst; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = nmesa->FPfunc.MaxInst; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = + ctx->Const.FragmentProgram.MaxNativeTexInstructions; + ctx->Const.FragmentProgram.MaxNativeAttribs = nmesa->FPfunc.MaxAttrib; + ctx->Const.FragmentProgram.MaxNativeTemps = nmesa->FPfunc.MaxTemp; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = nmesa->FPfunc.MaxAddress; + ctx->Const.FragmentProgram.MaxNativeParameters = nmesa->FPfunc.MaxConst; + } + + ctx->Driver.NewProgram = nouveauNewProgram; + ctx->Driver.BindProgram = nouveauBindProgram; + ctx->Driver.DeleteProgram = nouveauDeleteProgram; + ctx->Driver.ProgramStringNotify = nouveauProgramStringNotify; + ctx->Driver.IsProgramNative = nouveauIsProgramNative; +} + + +/***************************************************************************** + * Disassembly support structs + */ +#define CHECK_RANGE(idx, arr) ((idx)= (sizeof(ops) / sizeof(struct _opcode_info))) + return NULL; + if (ops[op].name == NULL) + return NULL; + return &ops[op]; +} + +static const char *_SFR_STRING[] = { + [NVS_FR_POSITION] = "position", + [NVS_FR_WEIGHT] = "weight", + [NVS_FR_NORMAL] = "normal", + [NVS_FR_COL0] = "color", + [NVS_FR_COL1] = "color.secondary", + [NVS_FR_BFC0] = "bfc", + [NVS_FR_BFC1] = "bfc.secondary", + [NVS_FR_FOGCOORD] = "fogcoord", + [NVS_FR_POINTSZ] = "pointsize", + [NVS_FR_TEXCOORD0] = "texcoord[0]", + [NVS_FR_TEXCOORD1] = "texcoord[1]", + [NVS_FR_TEXCOORD2] = "texcoord[2]", + [NVS_FR_TEXCOORD3] = "texcoord[3]", + [NVS_FR_TEXCOORD4] = "texcoord[4]", + [NVS_FR_TEXCOORD5] = "texcoord[5]", + [NVS_FR_TEXCOORD6] = "texcoord[6]", + [NVS_FR_TEXCOORD7] = "texcoord[7]", + [NVS_FR_FRAGDATA0] = "data[0]", + [NVS_FR_FRAGDATA1] = "data[1]", + [NVS_FR_FRAGDATA2] = "data[2]", + [NVS_FR_FRAGDATA3] = "data[3]", + [NVS_FR_CLIP0] = "clip_plane[0]", + [NVS_FR_CLIP1] = "clip_plane[1]", + [NVS_FR_CLIP2] = "clip_plane[2]", + [NVS_FR_CLIP3] = "clip_plane[3]", + [NVS_FR_CLIP4] = "clip_plane[4]", + [NVS_FR_CLIP5] = "clip_plane[5]", + [NVS_FR_CLIP6] = "clip_plane[6]", + [NVS_FR_FACING] = "facing", +}; + +#define SFR_STRING(idx) CHECK_RANGE((idx), SFR_STRING) + +static const char *_SWZ_STRING[] = { + [NVS_SWZ_X] = "x", + [NVS_SWZ_Y] = "y", + [NVS_SWZ_Z] = "z", + [NVS_SWZ_W] = "w" +}; + +#define SWZ_STRING(idx) CHECK_RANGE((idx), SWZ_STRING) + +static const char *_NVS_PREC_STRING[] = { + [NVS_PREC_FLOAT32] = "R", + [NVS_PREC_FLOAT16] = "H", + [NVS_PREC_FIXED12] = "X", + [NVS_PREC_UNKNOWN] = "?" +}; + +#define NVS_PREC_STRING(idx) CHECK_RANGE((idx), NVS_PREC_STRING) + +static const char *_NVS_COND_STRING[] = { + [NVS_COND_FL] = "FL", + [NVS_COND_LT] = "LT", + [NVS_COND_EQ] = "EQ", + [NVS_COND_LE] = "LE", + [NVS_COND_GT] = "GT", + [NVS_COND_NE] = "NE", + [NVS_COND_GE] = "GE", + [NVS_COND_TR] = "TR", + [NVS_COND_UNKNOWN] = "??" +}; + +#define NVS_COND_STRING(idx) CHECK_RANGE((idx), NVS_COND_STRING) + +/***************************************************************************** + * ShaderFragment dumping + */ +static void +nvsDumpIndent(int lvl) +{ + while (lvl--) + printf(" "); +} + +static void +nvsDumpSwizzle(nvsSwzComp *swz) +{ + printf(".%s%s%s%s", + SWZ_STRING(swz[0]), + SWZ_STRING(swz[1]), SWZ_STRING(swz[2]), SWZ_STRING(swz[3]) + ); +} + +static void +nvsDumpReg(nvsInstruction * inst, nvsRegister * reg) +{ + if (reg->negate) + printf("-"); + if (reg->abs) + printf("abs("); + + switch (reg->file) { + case NVS_FILE_TEMP: + printf("R%d", reg->index); + nvsDumpSwizzle(reg->swizzle); + break; + case NVS_FILE_ATTRIB: + printf("attrib.%s", SFR_STRING(reg->index)); + nvsDumpSwizzle(reg->swizzle); + break; + case NVS_FILE_ADDRESS: + printf("A%d", reg->index); + break; + case NVS_FILE_CONST: + if (reg->indexed) + printf("const[A%d.%s + %d]", + reg->addr_reg, SWZ_STRING(reg->addr_comp), reg->index); + else + printf("const[%d]", reg->index); + nvsDumpSwizzle(reg->swizzle); + break; + default: + printf("UNKNOWN_FILE"); + break; + } + + if (reg->abs) + printf(")"); +} + +void +nvsDumpInstruction(nvsInstruction * inst, int slot, int lvl) +{ + struct _opcode_info *opr = &ops[inst->op]; + int i; + + nvsDumpIndent(lvl); + printf("%s ", opr->name); + + if (!opr->flags & NODS) { + switch (inst->dest.file) { + case NVS_FILE_RESULT: + printf("result.%s", SFR_STRING(inst->dest.index)); + break; + case NVS_FILE_TEMP: + printf("R%d", inst->dest.index); + break; + case NVS_FILE_ADDRESS: + printf("A%d", inst->dest.index); + break; + default: + printf("UNKNOWN_DST_FILE"); + break; + } + + if (inst->mask != SMASK_ALL) { + printf("."); + if (inst->mask & SMASK_X) + printf("x"); + if (inst->mask & SMASK_Y) + printf("y"); + if (inst->mask & SMASK_Z) + printf("z"); + if (inst->mask & SMASK_W) + printf("w"); + } + + if (opr->numsrc) + printf(", "); + } + + for (i = 0; i < opr->numsrc; i++) { + nvsDumpReg(inst, &inst->src[i]); + if (i != opr->numsrc - 1) + printf(", "); + } + if (opr->flags & TI_UNIT) + printf(", texture[%d]", inst->tex_unit); + + printf("\n"); +} + +void +nvsDumpFragmentList(nvsFragmentList *f, int lvl) +{ + while (f) { + switch (f->fragment->type) { + case NVS_INSTRUCTION: + nvsDumpInstruction((nvsInstruction*)f->fragment, 0, lvl); + break; + default: + fprintf(stderr, "%s: Only NVS_INSTRUCTION fragments can be in" + "nvsFragmentList!\n", __func__); + return; + } + f = f->next; + } +} + +/***************************************************************************** + * HW shader disassembly + */ +static void +nvsDisasmHWShaderOp(nvsFunc * shader, int merged) +{ + struct _opcode_info *opi; + nvsOpcode op; + nvsRegFile file; + nvsSwzComp swz[4]; + int i; + + op = shader->GetOpcode(shader, merged); + opi = _get_op_info(op); + if (!opi) { + printf("NO OPINFO!"); + return; + } + + printf("%s", opi->name); + if (shader->GetPrecision && + (!(opi->flags & BRANCH_ALL)) && (!(opi->flags * NODS)) && + (op != NVS_OP_NOP)) + printf("%s", NVS_PREC_STRING(shader->GetPrecision(shader))); + if (shader->SupportsConditional && shader->SupportsConditional(shader)) { + if (shader->GetConditionUpdate(shader)) { + printf("C%d", shader->GetCondRegID(shader)); + } + } + if (shader->GetSaturate && shader->GetSaturate(shader)) + printf("_SAT"); + + if (!(opi->flags & NODS)) { + int mask = shader->GetDestMask(shader, merged); + + switch (shader->GetDestFile(shader, merged)) { + case NVS_FILE_ADDRESS: + printf(" A%d", shader->GetDestID(shader, merged)); + break; + case NVS_FILE_TEMP: + printf(" R%d", shader->GetDestID(shader, merged)); + break; + case NVS_FILE_RESULT: + printf(" result.%s", (SFR_STRING(shader->GetDestID(shader, merged)))); + break; + default: + printf(" BAD_RESULT_FILE"); + break; + } + + if (mask != SMASK_ALL) { + printf("."); + if (mask & SMASK_X) printf("x"); + if (mask & SMASK_Y) printf("y"); + if (mask & SMASK_Z) printf("z"); + if (mask & SMASK_W) printf("w"); + } + } + + if (shader->SupportsConditional && shader->SupportsConditional(shader) && + shader->GetConditionTest(shader)) { + shader->GetCondRegSwizzle(shader, swz); + + printf(" (%s%d.%s%s%s%s)", + NVS_COND_STRING(shader->GetCondition(shader)), + shader->GetCondRegID(shader), + SWZ_STRING(swz[NVS_SWZ_X]), + SWZ_STRING(swz[NVS_SWZ_Y]), + SWZ_STRING(swz[NVS_SWZ_Z]), + SWZ_STRING(swz[NVS_SWZ_W]) + ); + } + + /* looping */ + if (opi->flags & COUNT_ALL) { + printf(" { "); + if (opi->flags & COUNT_NUM) { + printf("%d", shader->GetLoopCount(shader)); + } + if (opi->flags & COUNT_IND) { + printf(", %d", shader->GetLoopInitial(shader)); + } + if (opi->flags & COUNT_INC) { + printf(", %d", shader->GetLoopIncrement(shader)); + } + printf(" }"); + } + + /* branching */ + if (opi->flags & BRANCH_TR) + printf(" %d", shader->GetBranch(shader)); + if (opi->flags & BRANCH_EL) + printf(" ELSE %d", shader->GetBranchElse(shader)); + if (opi->flags & BRANCH_EN) + printf(" END %d", shader->GetBranchEnd(shader)); + + if (!(opi->flags & NODS) && opi->numsrc) + printf(","); + printf(" "); + + for (i = 0; i < opi->numsrc; i++) { + if (shader->GetSourceAbs(shader, merged, i)) + printf("abs("); + if (shader->GetSourceNegate(shader, merged, i)) + printf("-"); + + file = shader->GetSourceFile(shader, merged, i); + switch (file) { + case NVS_FILE_TEMP: + printf("R%d", shader->GetSourceID(shader, merged, i)); + break; + case NVS_FILE_CONST: + if (shader->GetSourceIndexed(shader, merged, i)) { + printf("c[A%d.%s + 0x%x]", + shader->GetRelAddressRegID(shader), + SWZ_STRING(shader->GetRelAddressSwizzle(shader)), + shader->GetSourceID(shader, merged, i) + ); + } else { + float val[4]; + + if (shader->GetSourceConstVal) { + shader->GetSourceConstVal(shader, merged, i, val); + printf("{ %.02f, %.02f, %.02f, %.02f }", + val[0], val[1], val[2], val[3]); + } else { + printf("c[0x%x]", shader->GetSourceID(shader, merged, i)); + } + } + break; + case NVS_FILE_ATTRIB: + if (shader->GetSourceIndexed(shader, merged, i)) { + printf("attrib[A%d.%s + %d]", + shader->GetRelAddressRegID(shader), + SWZ_STRING(shader->GetRelAddressSwizzle(shader)), + shader->GetSourceID(shader, merged, i) + ); + } + else { + printf("attrib.%s", + SFR_STRING(shader->GetSourceID(shader, merged, i)) + ); + } + break; + case NVS_FILE_ADDRESS: + printf("A%d", shader->GetRelAddressRegID(shader)); + break; + default: + printf("UNKNOWN_SRC_FILE"); + break; + } + + shader->GetSourceSwizzle(shader, merged, i, swz); + if (file != NVS_FILE_ADDRESS && + (swz[NVS_SWZ_X] != NVS_SWZ_X || swz[NVS_SWZ_Y] != NVS_SWZ_Y || + swz[NVS_SWZ_Z] != NVS_SWZ_Z || swz[NVS_SWZ_W] != NVS_SWZ_W)) { + printf(".%s%s%s%s", SWZ_STRING(swz[NVS_SWZ_X]), + SWZ_STRING(swz[NVS_SWZ_Y]), + SWZ_STRING(swz[NVS_SWZ_Z]), + SWZ_STRING(swz[NVS_SWZ_W])); + } + + if (shader->GetSourceAbs(shader, merged, i)) + printf(")"); + if (shader->GetSourceScale) { + int scale = shader->GetSourceScale(shader, merged, i); + if (scale > 1) + printf("{scaled %dx}", scale); + } + if (i < (opi->numsrc - 1)) + printf(", "); + } + + if (shader->IsLastInst(shader)) + printf(" + END"); +} + +void +nvsDisasmHWShader(nvsPtr nvs) +{ + nvsFunc *shader = nvs->func; + unsigned int iaddr = 0; + + if (!nvs->program) { + fprintf(stderr, "No HW program present"); + return; + } + + shader->inst = nvs->program; + while (1) { + if (shader->inst >= (nvs->program + nvs->program_size)) { + fprintf(stderr, "Reached end of program, but HW inst has no END"); + break; + } + + printf("\t0x%08x:\n", shader->inst[0]); + printf("\t0x%08x:\n", shader->inst[1]); + printf("\t0x%08x:\n", shader->inst[2]); + printf("\t0x%08x:", shader->inst[3]); + + printf("\n\t\tINST %d.0: ", iaddr); + nvsDisasmHWShaderOp(shader, 0); + if (shader->HasMergedInst(shader)) { + printf("\n\t\tINST %d.1: ", iaddr); + nvsDisasmHWShaderOp(shader, 1); + } + printf("\n"); + + if (shader->IsLastInst(shader)) + break; + + shader->inst += shader->GetOffsetNext(shader); + iaddr++; + } + + printf("\n"); +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h new file mode 100644 index 0000000000..baf59d0259 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -0,0 +1,362 @@ +#ifndef __SHADER_COMMON_H__ +#define __SHADER_COMMON_H__ + +#include "mtypes.h" + +typedef struct _nvsFunc nvsFunc; + +#define NVS_MAX_TEMPS 32 +#define NVS_MAX_ATTRIBS 16 +#define NVS_MAX_CONSTS 256 +#define NVS_MAX_ADDRESS 2 +#define NVS_MAX_INSNS 4096 + +typedef struct { + enum { + NVS_INSTRUCTION, + } type; + int position; +} nvsFragmentHeader; + +typedef struct _nvs_fragment_list { + struct _nvs_fragment_list *prev; + struct _nvs_fragment_list *next; + nvsFragmentHeader *fragment; +} nvsFragmentList; + +typedef struct _nouveauShader { + union { + struct gl_vertex_program vp; + struct gl_fragment_program fp; + } mesa; + GLcontext *ctx; + nvsFunc *func; + + /* State of the final program */ + GLboolean translated; + GLboolean on_hardware; + unsigned int *program; + unsigned int program_size; + unsigned int program_alloc_size; + unsigned int program_start_id; + unsigned int program_current; + unsigned int inputs_read; + unsigned int outputs_written; + int inst_count; + + struct { + GLfloat *source_val; /* NULL if invariant */ + float val[4]; + int hw_index; /* hw-specific value */ + } params[NVS_MAX_CONSTS]; + + struct { + int last_use; + } temps[NVS_MAX_TEMPS]; + + /* Pass-private data */ + void *pass_rec; + + nvsFragmentList *list_head; + nvsFragmentList *list_tail; +} nouveauShader, *nvsPtr; + +typedef enum { + NVS_FILE_NONE, + NVS_FILE_TEMP, + NVS_FILE_ATTRIB, + NVS_FILE_CONST, + NVS_FILE_RESULT, + NVS_FILE_ADDRESS, + NVS_FILE_UNKNOWN +} nvsRegFile; + +typedef enum { + NVS_OP_UNKNOWN = 0, + NVS_OP_NOP, + NVS_OP_ABS, NVS_OP_ADD, NVS_OP_ARA, NVS_OP_ARL, NVS_OP_ARR, + NVS_OP_BRA, NVS_OP_BRK, + NVS_OP_CAL, NVS_OP_CMP, NVS_OP_COS, + NVS_OP_DDX, NVS_OP_DDY, NVS_OP_DIV, NVS_OP_DP2, NVS_OP_DP2A, NVS_OP_DP3, + NVS_OP_DP4, NVS_OP_DPH, NVS_OP_DST, + NVS_OP_EX2, NVS_OP_EXP, + NVS_OP_FLR, NVS_OP_FRC, + NVS_OP_IF, + NVS_OP_KIL, + NVS_OP_LG2, NVS_OP_LIT, NVS_OP_LOG, NVS_OP_LOOP, NVS_OP_LRP, + NVS_OP_MAD, NVS_OP_MAX, NVS_OP_MIN, NVS_OP_MOV, NVS_OP_MUL, + NVS_OP_NRM, + NVS_OP_PK2H, NVS_OP_PK2US, NVS_OP_PK4B, NVS_OP_PK4UB, NVS_OP_POW, + NVS_OP_POPA, NVS_OP_PUSHA, + NVS_OP_RCC, NVS_OP_RCP, NVS_OP_REP, NVS_OP_RET, NVS_OP_RFL, NVS_OP_RSQ, + NVS_OP_SCS, NVS_OP_SEQ, NVS_OP_SFL, NVS_OP_SGE, NVS_OP_SGT, NVS_OP_SIN, + NVS_OP_SLE, NVS_OP_SLT, NVS_OP_SNE, NVS_OP_SSG, NVS_OP_STR, NVS_OP_SUB, + NVS_OP_SWZ, + NVS_OP_TEX, NVS_OP_TXB, NVS_OP_TXD, NVS_OP_TXL, NVS_OP_TXP, + NVS_OP_UP2H, NVS_OP_UP2US, NVS_OP_UP4B, NVS_OP_UP4UB, + NVS_OP_X2D, NVS_OP_XPD, + NVS_OP_EMUL +} nvsOpcode; + +typedef enum { + NVS_PREC_FLOAT32, + NVS_PREC_FLOAT16, + NVS_PREC_FIXED12, + NVS_PREC_UNKNOWN +} nvsPrecision; + +typedef enum { + NVS_SWZ_X = 0, + NVS_SWZ_Y = 1, + NVS_SWZ_Z = 2, + NVS_SWZ_W = 3 +} nvsSwzComp; + +typedef enum { + NVS_FR_POSITION, + NVS_FR_WEIGHT, + NVS_FR_NORMAL, + NVS_FR_COL0, + NVS_FR_COL1, + NVS_FR_BFC0, + NVS_FR_BFC1, + NVS_FR_FOGCOORD, + NVS_FR_POINTSZ, + NVS_FR_TEXCOORD0, + NVS_FR_TEXCOORD1, + NVS_FR_TEXCOORD2, + NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, + NVS_FR_TEXCOORD5, + NVS_FR_TEXCOORD6, + NVS_FR_TEXCOORD7, + NVS_FR_FRAGDATA0, + NVS_FR_FRAGDATA1, + NVS_FR_FRAGDATA2, + NVS_FR_FRAGDATA3, + NVS_FR_CLIP0, + NVS_FR_CLIP1, + NVS_FR_CLIP2, + NVS_FR_CLIP3, + NVS_FR_CLIP4, + NVS_FR_CLIP5, + NVS_FR_CLIP6, + NVS_FR_FACING, + NVS_FR_UNKNOWN +} nvsFixedReg; + +typedef enum { + NVS_COND_FL, NVS_COND_LT, NVS_COND_EQ, NVS_COND_LE, NVS_COND_GT, + NVS_COND_NE, NVS_COND_GE, NVS_COND_TR, NVS_COND_UN, + NVS_COND_UNKNOWN +} nvsCond; + +typedef struct { + nvsRegFile file; + unsigned int index; + + unsigned int indexed; + unsigned int addr_reg; + nvsSwzComp addr_comp; + + nvsSwzComp swizzle[4]; + int negate; + int abs; +} nvsRegister; + +static const nvsRegister nvr_unused = { + .file = NVS_FILE_ATTRIB, + .index = 0, + .indexed = 0, + .addr_reg = 0, + .addr_comp = NVS_SWZ_X, + .swizzle = {NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W}, + .negate = 0, + .abs = 0, +}; + +typedef enum { + NVS_TEX_TARGET_1D, + NVS_TEX_TARGET_2D, + NVS_TEX_TARGET_3D, + NVS_TEX_TARGET_CUBE, + NVS_TEX_TARGET_RECT, + NVS_TEX_TARGET_UNKNOWN = 0 +} nvsTexTarget; + +typedef struct { + nvsFragmentHeader header; + + nvsOpcode op; + unsigned int saturate; + + nvsRegister dest; + unsigned int mask; + + nvsRegister src[3]; + + unsigned int tex_unit; + nvsTexTarget tex_target; + + nvsCond cond; + nvsSwzComp cond_swizzle[4]; + int cond_reg; + int cond_test; + int cond_update; +} nvsInstruction; + +#define SMASK_X (1<<0) +#define SMASK_Y (1<<1) +#define SMASK_Z (1<<2) +#define SMASK_W (1<<3) +#define SMASK_ALL (SMASK_X|SMASK_Y|SMASK_Z|SMASK_W) + +#define SPOS_ADDRESS 3 +struct _op_xlat { + unsigned int NV; + nvsOpcode SOP; + int srcpos[3]; +}; +#define MOD_OPCODE(t,hw,sop,s0,s1,s2) do { \ + t[hw].NV = hw; \ + t[hw].SOP = sop; \ + t[hw].srcpos[0] = s0; \ + t[hw].srcpos[1] = s1; \ + t[hw].srcpos[2] = s2; \ +} while(0) + +extern unsigned int NVVP_TX_VOP_COUNT; +extern unsigned int NVVP_TX_NVS_OP_COUNT; +extern struct _op_xlat NVVP_TX_VOP[]; +extern struct _op_xlat NVVP_TX_SOP[]; + +extern unsigned int NVFP_TX_AOP_COUNT; +extern unsigned int NVFP_TX_BOP_COUNT; +extern struct _op_xlat NVFP_TX_AOP[]; +extern struct _op_xlat NVFP_TX_BOP[]; + +#define SCAP_SRC_ABS (1<<0) + +struct _nvsFunc { + unsigned int MaxInst; + unsigned int MaxAttrib; + unsigned int MaxTemp; + unsigned int MaxAddress; + unsigned int MaxConst; + unsigned int caps; + + unsigned int *inst; + void (*UploadToHW) (GLcontext *, nouveauShader *); + void (*UpdateConst) (GLcontext *, nouveauShader *, int); + + struct _op_xlat*(*GetOPTXRec) (nvsFunc *, int merged); + struct _op_xlat*(*GetOPTXFromSOP) (nvsOpcode, int *id); + + int (*SupportsOpcode) (nvsFunc *, nvsOpcode); + void (*SetOpcode) (nvsFunc *, unsigned int opcode, + int slot); + void (*SetCCUpdate) (nvsFunc *); + void (*SetCondition) (nvsFunc *, int on, nvsCond, int reg, + nvsSwzComp *swizzle); + void (*SetResult) (nvsFunc *, nvsRegister *, + unsigned int mask, int slot); + void (*SetSource) (nvsFunc *, nvsRegister *, int pos); + void (*SetUnusedSource) (nvsFunc *, int pos); + void (*SetTexImageUnit) (nvsFunc *, int unit); + void (*SetSaturate) (nvsFunc *); + void (*SetLastInst) (nvsFunc *); + + int (*HasMergedInst) (nvsFunc *); + int (*IsLastInst) (nvsFunc *); + int (*GetOffsetNext) (nvsFunc *); + + int (*GetOpcodeSlot) (nvsFunc *, int merged); + unsigned int (*GetOpcodeHW) (nvsFunc *, int slot); + nvsOpcode (*GetOpcode) (nvsFunc *, int merged); + + nvsPrecision (*GetPrecision) (nvsFunc *); + int (*GetSaturate) (nvsFunc *); + + nvsRegFile (*GetDestFile) (nvsFunc *, int merged); + unsigned int (*GetDestID) (nvsFunc *, int merged); + unsigned int (*GetDestMask) (nvsFunc *, int merged); + + unsigned int (*GetSourceHW) (nvsFunc *, int merged, int pos); + nvsRegFile (*GetSourceFile) (nvsFunc *, int merged, int pos); + int (*GetSourceID) (nvsFunc *, int merged, int pos); + int (*GetTexImageUnit) (nvsFunc *); + int (*GetSourceNegate) (nvsFunc *, int merged, int pos); + int (*GetSourceAbs) (nvsFunc *, int merged, int pos); + void (*GetSourceSwizzle) (nvsFunc *, int merged, int pos, + nvsSwzComp *swz); + int (*GetSourceIndexed) (nvsFunc *, int merged, int pos); + void (*GetSourceConstVal) (nvsFunc *, int merged, int pos, + float *val); + int (*GetSourceScale) (nvsFunc *, int merged, int pos); + + int (*GetRelAddressRegID) (nvsFunc *); + nvsSwzComp (*GetRelAddressSwizzle) (nvsFunc *); + + int (*SupportsConditional) (nvsFunc *); + int (*GetConditionUpdate) (nvsFunc *); + int (*GetConditionTest) (nvsFunc *); + nvsCond (*GetCondition) (nvsFunc *); + void (*GetCondRegSwizzle) (nvsFunc *, nvsSwzComp *swz); + int (*GetCondRegID) (nvsFunc *); + int (*GetBranch) (nvsFunc *); + int (*GetBranchElse) (nvsFunc *); + int (*GetBranchEnd) (nvsFunc *); + + int (*GetLoopCount) (nvsFunc *); + int (*GetLoopInitial) (nvsFunc *); + int (*GetLoopIncrement) (nvsFunc *); +}; + +static inline nvsRegister +nvsNegate(nvsRegister reg) +{ + reg.negate = !reg.negate; + return reg; +} + +static inline nvsRegister +nvsAbs(nvsRegister reg) +{ + reg.abs = 1; + return reg; +} + +static inline nvsRegister +nvsSwizzle(nvsRegister reg, nvsSwzComp x, nvsSwzComp y, + nvsSwzComp z, nvsSwzComp w) +{ + nvsSwzComp sc[4] = { x, y, z, w }; + nvsSwzComp oc[4]; + int i; + + for (i=0;i<4;i++) + oc[i] = reg.swizzle[i]; + for (i=0;i<4;i++) + reg.swizzle[i] = oc[sc[i]]; + return reg; +} + +extern GLboolean nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs); +extern void nvsDisasmHWShader(nvsPtr); + +extern void NV20VPInitShaderFuncs(nvsFunc *); +extern void NV30VPInitShaderFuncs(nvsFunc *); +extern void NV40VPInitShaderFuncs(nvsFunc *); + +extern void NV30FPInitShaderFuncs(nvsFunc *); +extern void NV40FPInitShaderFuncs(nvsFunc *); + +extern void nouveauShaderInitFuncs(GLcontext *ctx); + +extern GLboolean nouveau_shader_pass0_arb(GLcontext *ctx, nouveauShader *nvs); +extern GLboolean nouveau_shader_pass0_slang(GLcontext *ctx, nouveauShader *nvs); +extern GLboolean nouveau_shader_pass1(nvsPtr nvs); +extern GLboolean nouveau_shader_pass2(nvsPtr nvs); + +#endif + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0_arb.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0_arb.c new file mode 100644 index 0000000000..8b5222d069 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0_arb.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2006 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "program.h" +#include "programopt.h" +#include "program_instruction.h" + +#include "nouveau_context.h" +#include "nouveau_shader.h" + +static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = { + NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7, + NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */ +}; + +static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = { + NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */, + NVS_FR_UNKNOWN /* DEPR */ +}; + +static nvsFixedReg _tx_mesa_vp_src_reg[VERT_ATTRIB_MAX] = { + NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, + NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7, +/* Generic attribs 0-15, aliased to the above */ + NVS_FR_POSITION, NVS_FR_WEIGHT, NVS_FR_NORMAL, NVS_FR_COL0, NVS_FR_COL1, + NVS_FR_FOGCOORD, NVS_FR_UNKNOWN /* COLOR_INDEX */, NVS_FR_UNKNOWN, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7 +}; + +static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = { + NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD, + NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3, + NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7 +}; + +static nvsSwzComp _tx_mesa_swizzle[4] = { + NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W +}; + +static nvsOpcode _tx_mesa_opcode[] = { + [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD, + [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL, + [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR, + [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS, + [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY, + [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4, + [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST, + [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP, + [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC, + [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL, + [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT, + [OPCODE_LOG] = NVS_OP_LOG, + [OPCODE_LRP] = NVS_OP_LRP, + [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX, + [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV, + [OPCODE_MUL] = NVS_OP_MUL, + [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US, + [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB, + [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA, + [OPCODE_PUSHA] = NVS_OP_PUSHA, + [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP, + [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ, + [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ, + [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE, + [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN, + [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT, + [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG, + [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB, + [OPCODE_SWZ] = NVS_OP_MOV, + [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB, + [OPCODE_TXD] = NVS_OP_TXD, + [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP, + [OPCODE_TXP_NV] = NVS_OP_TXP, + [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US, + [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB, + [OPCODE_X2D] = NVS_OP_X2D, + [OPCODE_XPD] = NVS_OP_XPD +}; + +static nvsCond _tx_mesa_condmask[] = { + NVS_COND_UNKNOWN, NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE, + NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL +}; + +struct pass0_rec { + int nvs_ipos; + int next_temp; + int swzconst_done; + int swzconst_id; +}; + +#define X NVS_SWZ_X +#define Y NVS_SWZ_Y +#define Z NVS_SWZ_Z +#define W NVS_SWZ_W + +static void +pass0_append_fragment(nouveauShader *nvs, nvsFragmentHeader *fragment) +{ + nvsFragmentList *list = calloc(1, sizeof(nvsFragmentList)); + if (!list) + return; + + list->fragment = fragment; + list->prev = nvs->list_tail; + if ( nvs->list_tail) + nvs->list_tail->next = list; + if (!nvs->list_head) + nvs->list_head = list; + nvs->list_tail = list; + + nvs->inst_count++; +} + +static void +pass0_make_reg(nouveauShader *nvs, nvsRegister *reg, + nvsRegFile file, unsigned int index) +{ + struct pass0_rec *rec = nvs->pass_rec; + + /* defaults */ + *reg = nvr_unused; + /* -1 == quick-and-dirty temp alloc */ + if (file == NVS_FILE_TEMP && index == -1) { + index = rec->next_temp++; + assert(index < NVS_MAX_TEMPS); + } + reg->file = file; + reg->index = index; +} + +static void +pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa) +{ + int i; + + for (i=0;i<4;i++) + swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)]; +} + +static nvsOpcode +pass0_make_opcode(enum prog_opcode op) +{ + if (op > MAX_OPCODE) + return NVS_OP_UNKNOWN; + return _tx_mesa_opcode[op]; +} + +static nvsCond +pass0_make_condmask(GLuint mesa) +{ + if (mesa > COND_FL) + return NVS_COND_UNKNOWN; + return _tx_mesa_condmask[mesa]; +} + +static unsigned int +pass0_make_mask(GLuint mesa_mask) +{ + unsigned int mask = 0; + + if (mesa_mask & WRITEMASK_X) mask |= SMASK_X; + if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y; + if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z; + if (mesa_mask & WRITEMASK_W) mask |= SMASK_W; + + return mask; +} + +static nvsTexTarget +pass0_make_tex_target(GLuint mesa) +{ + switch (mesa) { + case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D; + case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D; + case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D; + case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE; + case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT; + default: + return NVS_TEX_TARGET_UNKNOWN; + } +} + +static void +pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg, + struct prog_dst_register *dst) +{ + struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp; + nvsFixedReg sfr; + + switch (dst->File) { + case PROGRAM_OUTPUT: + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + sfr = (dst->Index < VERT_RESULT_MAX) ? + _tx_mesa_vp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; + } else { + sfr = (dst->Index < FRAG_RESULT_MAX) ? + _tx_mesa_fp_dst_reg[dst->Index] : NVS_FR_UNKNOWN; + } + pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr); + break; + case PROGRAM_TEMPORARY: + pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index); + break; + case PROGRAM_ADDRESS: + pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index); + break; + default: + fprintf(stderr, "Unknown dest file %d\n", dst->File); + assert(0); + } +} + +static void +pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src) +{ + struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base; + struct gl_program_parameter_list *p = mesa->Parameters; + + *reg = nvr_unused; + + switch (src->File) { + case PROGRAM_INPUT: + reg->file = NVS_FILE_ATTRIB; + if (mesa->Target == GL_VERTEX_PROGRAM_ARB) { + reg->index = (src->Index < VERT_ATTRIB_MAX) ? + _tx_mesa_vp_src_reg[src->Index] : NVS_FR_UNKNOWN; + } else { + reg->index = (src->Index < FRAG_ATTRIB_MAX) ? + _tx_mesa_fp_src_reg[src->Index] : NVS_FR_UNKNOWN; + } + break; + /* All const types seem to get shoved into here, not really sure why */ + case PROGRAM_STATE_VAR: + switch (p->Parameters[src->Index].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + nvs->params[src->Index].source_val = NULL; + COPY_4V(nvs->params[src->Index].val, p->ParameterValues[src->Index]); + break; + case PROGRAM_STATE_VAR: + nvs->params[src->Index].source_val = p->ParameterValues[src->Index]; + break; + default: + fprintf(stderr, "Unknown parameter type %d\n", + p->Parameters[src->Index].Type); + assert(0); + break; + } + + if (src->RelAddr) { + reg->indexed = 1; + reg->addr_reg = 0; + reg->addr_comp = NVS_SWZ_X; + } else + reg->indexed = 0; + reg->file = NVS_FILE_CONST; + reg->index = src->Index; + break; + case PROGRAM_TEMPORARY: + reg->file = NVS_FILE_TEMP; + reg->index = src->Index; + break; + default: + fprintf(stderr, "Unknown source type %d\n", src->File); + assert(0); + } + + /* per-component negate handled elsewhere */ + reg->negate = src->NegateBase != 0; + reg->abs = src->Abs; + pass0_make_swizzle(reg->swizzle, src->Swizzle); +} + +static nvsInstruction * +pass0_emit(nouveauShader *nvs, nvsOpcode op, nvsRegister dst, + unsigned int mask, int saturate, + nvsRegister src0, nvsRegister src1, nvsRegister src2) +{ + struct pass0_rec *rec = nvs->pass_rec; + nvsInstruction *sif = NULL; + + /* Seems mesa doesn't explicitly 0 this.. */ + if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB) + saturate = 0; + + sif = calloc(1, sizeof(nvsInstruction)); + if (sif) { + sif->header.type = NVS_INSTRUCTION; + sif->header.position = rec->nvs_ipos++; + sif->op = op; + sif->saturate = saturate; + sif->dest = dst; + sif->mask = mask; + sif->src[0] = src0; + sif->src[1] = src1; + sif->src[2] = src2; + sif->cond = COND_TR; + sif->cond_reg = 0; + sif->cond_test = 0; + sif->cond_update = 0; + pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP); + pass0_append_fragment(nvs, (nvsFragmentHeader *)sif); + } + + return sif; +} + +static void +pass0_fixup_swizzle(nvsPtr nvs, + struct prog_src_register *src, + unsigned int sm1, + unsigned int sm2) +{ + static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 }; + struct pass0_rec *rec = nvs->pass_rec; + int fixup_1, fixup_2; + nvsRegister sr, dr = nvr_unused; + nvsRegister sm1const, sm2const; + + if (!rec->swzconst_done) { + struct gl_program *prog = &nvs->mesa.vp.Base; + rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters, sc, 4); + rec->swzconst_done = 1; + COPY_4V(nvs->params[rec->swzconst_id].val, sc); + } + + fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) && sm2 != MAKE_SWIZZLE4(2,2,2,2)); + fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2)); + + if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) { + /* We can't use more than one const in an instruction, so move the const + * into a temp, and swizzle from there. + *TODO: should just emit the swizzled const, instead of swizzling it + * in the shader.. would need to reswizzle any state params when they + * change however.. + */ + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_make_src_reg(nvs, &sr, src); + pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, sr, nvr_unused, nvr_unused); + pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index); + } else { + if (fixup_1) + src->NegateBase = 0; + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + } + + pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm1const.swizzle, sm1); + if (fixup_1 && fixup_2) { + /* Any combination with SWIZZLE_ONE */ + pass0_make_reg(nvs, &sm2const, NVS_FILE_CONST, rec->swzconst_id); + pass0_make_swizzle(sm2const.swizzle, sm2); + pass0_emit(nvs, NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const); + } else { + /* SWIZZLE_ZERO || arbitrary negate */ + pass0_emit(nvs, NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused); + } + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle = SWIZZLE_NOOP; +} + +#define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3)) +static void +pass0_check_sources(nvsPtr nvs, struct prog_instruction *inst) +{ + unsigned int insrc = -1, constsrc = -1; + int i; + + for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + unsigned int sm_1 = 0, sm_2 = 0; + nvsRegister sr, dr; + int do_mov = 0, c; + + /* Build up swizzle masks as if we were going to use + * "MAD new, src, const1, const2" to support arbitrary negation + * and SWIZZLE_ZERO/SWIZZLE_ONE. + */ + for (c=0;c<4;c++) { + if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) { + SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */ + SET_SWZ(sm_2, c, SWIZZLE_Y); + SET_SWZ(src->Swizzle, c, SWIZZLE_X); + } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) { + SET_SWZ(sm_1, c, SWIZZLE_Y); + if (src->NegateBase & (1<Swizzle, c, SWIZZLE_X); + } else { + if (src->NegateBase & (1<File) { + case PROGRAM_INPUT: + if (insrc != -1 && insrc != src->Index) + do_mov = 1; + else insrc = src->Index; + break; + case PROGRAM_STATE_VAR: + if (constsrc != -1 && constsrc != src->Index) + do_mov = 1; + else constsrc = src->Index; + break; + default: + break; + } + + /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa instruction + * to point at the temp. + */ + if (do_mov) { + pass0_make_src_reg(nvs, &sr, src); + pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MOV, dr, SMASK_ALL, 0, + sr, nvr_unused, nvr_unused); + + src->File = PROGRAM_TEMPORARY; + src->Index = dr.index; + src->Swizzle= SWIZZLE_NOOP; + } + } +} + +static GLboolean +pass0_emulate_instruction(nouveauShader *nvs, struct prog_instruction *inst) +{ + nvsFunc *shader = nvs->func; + nvsRegister src[3], dest, temp; + nvsInstruction *nvsinst; + unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask); + int i, sat; + + sat = (inst->SaturateMode == SATURATE_ZERO_ONE); + + /* Build all the "real" regs for the instruction */ + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + if (inst->Opcode != OPCODE_KIL) + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + switch (inst->Opcode) { + case OPCODE_ABS: + if (shader->caps & SCAP_SRC_ABS) + pass0_emit(nvs, NVS_OP_MOV, dest, mask, sat, + nvsAbs(src[0]), nvr_unused, nvr_unused); + else + pass0_emit(nvs, NVS_OP_MAX, dest, mask, sat, + src[0], nvsNegate(src[0]), nvr_unused); + break; + case OPCODE_KIL: + /* This is only in ARB shaders, so we don't have to worry + * about clobbering a CC reg as they aren't supported anyway. + */ + /* MOVC0 temp, src */ + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + nvsinst = pass0_emit(nvs, NVS_OP_MOV, temp, SMASK_ALL, 0, + src[0], nvr_unused, nvr_unused); + nvsinst->cond_update = 1; + nvsinst->cond_reg = 0; + /* KIL_NV (LT0.xyzw) temp */ + nvsinst = pass0_emit(nvs, NVS_OP_KIL, nvr_unused, 0, 0, + nvr_unused, nvr_unused, nvr_unused); + nvsinst->cond = COND_LT; + nvsinst->cond_reg = 0; + nvsinst->cond_test = 1; + pass0_make_swizzle(nvsinst->cond_swizzle, MAKE_SWIZZLE4(0,1,2,3)); + break; + case OPCODE_LIT: + break; + case OPCODE_LRP: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MAD, temp, mask, 0, + nvsNegate(src[0]), src[2], src[2]); + pass0_emit(nvs, NVS_OP_MAD, dest, mask, sat, + src[0], src[1], temp); + break; + case OPCODE_POW: + if (shader->SupportsOpcode(shader, NVS_OP_LG2) && + shader->SupportsOpcode(shader, NVS_OP_EX2)) { + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + /* LG2 temp.x, src0.c */ + pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + /* MUL temp.x, temp.x, src1.c */ + pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_X, 0, + nvsSwizzle(temp, X, X, X, X), + nvsSwizzle(src[1], X, X, X, X), + nvr_unused); + /* EX2 dest, temp.x */ + pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), + nvr_unused, + nvr_unused); + } else { + /* can we use EXP/LOG instead of EX2/LG2?? */ + fprintf(stderr, "Implement POW for NV20 vtxprog!\n"); + return GL_FALSE; + } + break; + case OPCODE_RSQ: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_LG2, temp, SMASK_X, 0, + nvsAbs(nvsSwizzle(src[0], X, X, X, X)), nvr_unused, nvr_unused); + pass0_emit(nvs, NVS_OP_EX2, dest, mask, sat, + nvsSwizzle(temp, X, X, X, X), nvr_unused, nvr_unused); + break; + case OPCODE_SCS: + if (mask & SMASK_X) + pass0_emit(nvs, NVS_OP_COS, dest, SMASK_X, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + if (mask & SMASK_Y) + pass0_emit(nvs, NVS_OP_SIN, dest, SMASK_Y, sat, + nvsSwizzle(src[0], X, X, X, X), + nvr_unused, + nvr_unused); + break; + case OPCODE_SUB: + pass0_emit(nvs, NVS_OP_ADD, dest, mask, sat, + src[0], nvsNegate(src[1]), nvr_unused); + break; + case OPCODE_XPD: + pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1); + pass0_emit(nvs, NVS_OP_MUL, temp, SMASK_ALL, 0, + nvsSwizzle(src[0], Z, X, Y, Y), + nvsSwizzle(src[1], Y, Z, X, X), + nvr_unused); + pass0_emit(nvs, NVS_OP_MAD, dest, (mask & ~SMASK_W), sat, + nvsSwizzle(src[0], Y, Z, X, X), + nvsSwizzle(src[1], Z, X, Y, Y), + nvsNegate(temp)); + break; + default: + fprintf(stderr, "hw doesn't support opcode \"%s\", and no emulation found\n", + _mesa_opcode_string(inst->Opcode)); + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean +pass0_translate_instructions(nouveauShader *nvs) +{ + struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp; + struct pass0_rec *rec = nvs->pass_rec; + nvsFunc *shader = nvs->func; + int ipos; + + for (ipos=0; iposNumInstructions; ipos++) { + struct prog_instruction *inst = &prog->Instructions[ipos]; + + if (inst->Opcode == OPCODE_END) + break; + + /* Deal with multiple ATTRIB/PARAM in a single instruction */ + pass0_check_sources(nvs, inst); + + /* Now it's safe to do the prog_instruction->nvsInstruction conversion */ + if (shader->SupportsOpcode(shader, pass0_make_opcode(inst->Opcode))) { + nvsInstruction *nvsinst; + nvsRegister src[3], dest; + int i; + + for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) + pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]); + pass0_make_dst_reg(nvs, &dest, &inst->DstReg); + + nvsinst = pass0_emit(nvs, + pass0_make_opcode(inst->Opcode), + dest, + pass0_make_mask(inst->DstReg.WriteMask), + (inst->SaturateMode != SATURATE_OFF), + src[0], src[1], src[2]); + nvsinst->tex_unit = inst->TexSrcUnit; + nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget); + /* TODO when NV_fp/vp is implemented */ + nvsinst->cond = COND_TR; + } else { + if (!pass0_emulate_instruction(nvs, inst)) + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean +nouveau_shader_pass0_arb(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + struct gl_program *prog = (struct gl_program*)nvs; + struct gl_vertex_program *vp = (struct gl_vertex_program *)prog; + struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; + struct pass0_rec *rec; + int ret; + + switch (prog->Target) { + case GL_VERTEX_PROGRAM_ARB: + nvs->func = &nmesa->VPfunc; + if (vp->IsPositionInvariant) + _mesa_insert_mvp_code(ctx, vp); +#if 0 + if (IS_FIXEDFUNCTION_PROG && CLIP_PLANES_USED) + pass0_insert_ff_clip_planes(); +#endif + break; + case GL_FRAGMENT_PROGRAM_ARB: + nvs->func = &nmesa->FPfunc; + if (fp->FogOption != GL_NONE) + _mesa_append_fog_code(ctx, fp); + break; + default: + fprintf(stderr, "Unknown program type %d", prog->Target); + return GL_FALSE; + } + + rec = calloc(1, sizeof(struct pass0_rec)); + rec->next_temp = prog->NumTemporaries; + nvs->pass_rec = rec; + + ret = pass0_translate_instructions(nvs); + if (!ret) { + /* DESTROY list */ + } + + free(nvs->pass_rec); + return ret; +} + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c new file mode 100644 index 0000000000..5de9017f58 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_1.c @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2006 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "nouveau_shader.h" + +#define PASS1_OK 0 +#define PASS1_KILL 1 +#define PASS1_FAIL 2 + +struct pass1_rec { + unsigned int temp[NVS_MAX_TEMPS]; + unsigned int result[NVS_MAX_ATTRIBS]; + unsigned int address[NVS_MAX_ADDRESS]; + unsigned int cc[2]; +}; + +static void +pass1_remove_fragment(nvsPtr nvs, nvsFragmentList *item) +{ + if (item->prev) item->prev->next = item->next; + if (item->next) item->next->prev = item->prev; + if (nvs->list_head == item) nvs->list_head = item->next; + if (nvs->list_tail == item) nvs->list_tail = item->prev; + + nvs->inst_count--; +} + +static int +pass1_result_needed(struct pass1_rec *rec, nvsInstruction *inst) +{ + if (inst->cond_update && rec->cc[inst->cond_reg]) + return 1; + /* Only write components that are read later */ + if (inst->dest.file == NVS_FILE_TEMP) + return (inst->mask & rec->temp[inst->dest.index]); + if (inst->dest.file == NVS_FILE_ADDRESS) + return (inst->mask & rec->address[inst->dest.index]); + /* No point writing result components that are written later */ + if (inst->dest.file == NVS_FILE_RESULT) + return (inst->mask & ~rec->result[inst->dest.index]); + assert(0); +} + +static void +pass1_track_result(struct pass1_rec *rec, nvsInstruction *inst) +{ + if (inst->cond_test) + rec->cc[inst->cond_reg] = 1; + if (inst->dest.file == NVS_FILE_TEMP) { + inst->mask &= rec->temp[inst->dest.index]; + } else if (inst->dest.file == NVS_FILE_RESULT) { + inst->mask &= ~rec->result[inst->dest.index]; + rec->result[inst->dest.index] |= inst->mask; + } else if (inst->dest.file == NVS_FILE_ADDRESS) { + inst->mask &= rec->address[inst->dest.index]; + } +} + +static void +pass1_track_source(nouveauShader *nvs, nvsInstruction *inst, int pos, + unsigned int read) +{ + struct pass1_rec *rec = nvs->pass_rec; + nvsRegister *src = &inst->src[pos]; + unsigned int really_read = 0; + int i,sc; + + /* Account for swizzling */ + for (i=0; i<4; i++) + if (read & (1<swizzle[i]); + + /* Track register reads */ + if (src->file == NVS_FILE_TEMP) { + if (nvs->temps[src->index].last_use == -1) + nvs->temps[src->index].last_use = inst->header.position; + rec->temp [src->index] |= really_read; + } else if (src->indexed) { + rec->address[src->addr_reg] |= (1<addr_comp); + } + + /* Modify swizzle to only access read components */ + /* Find a component that is used.. */ + for (sc=0;sc<4;sc++) + if (really_read & (1<swizzle[i] = sc; +} + +static int +pass1_check_instruction(nouveauShader *nvs, nvsInstruction *inst) +{ + struct pass1_rec *rec = nvs->pass_rec; + unsigned int read0, read1, read2; + + if (inst->op != NVS_OP_KIL) { + if (!pass1_result_needed(rec, inst)) + return PASS1_KILL; + } + pass1_track_result(rec, inst); + + read0 = read1 = read2 = 0; + + switch (inst->op) { + case NVS_OP_FLR: + case NVS_OP_FRC: + case NVS_OP_MOV: + case NVS_OP_SSG: + case NVS_OP_ARL: + read0 = inst->mask; + break; + case NVS_OP_ADD: + case NVS_OP_MAX: + case NVS_OP_MIN: + case NVS_OP_MUL: + case NVS_OP_SEQ: + case NVS_OP_SFL: + case NVS_OP_SGE: + case NVS_OP_SGT: + case NVS_OP_SLE: + case NVS_OP_SLT: + case NVS_OP_SNE: + case NVS_OP_STR: + case NVS_OP_SUB: + read0 = inst->mask; + read1 = inst->mask; + break; + case NVS_OP_CMP: + case NVS_OP_LRP: + case NVS_OP_MAD: + read0 = inst->mask; + read1 = inst->mask; + read2 = inst->mask; + break; + case NVS_OP_XPD: + if (inst->mask & SMASK_X) read0 |= SMASK_Y|SMASK_Z; + if (inst->mask & SMASK_Y) read0 |= SMASK_X|SMASK_Z; + if (inst->mask & SMASK_Z) read0 |= SMASK_X|SMASK_Y; + read1 = read0; + break; + case NVS_OP_COS: + case NVS_OP_EX2: + case NVS_OP_EXP: + case NVS_OP_LG2: + case NVS_OP_LOG: + case NVS_OP_RCC: + case NVS_OP_RCP: + case NVS_OP_RSQ: + case NVS_OP_SCS: + case NVS_OP_SIN: + read0 = SMASK_X; + break; + case NVS_OP_POW: + read0 = SMASK_X; + read1 = SMASK_X; + break; + case NVS_OP_DIV: + read0 = inst->mask; + read1 = SMASK_X; + break; + case NVS_OP_DP2: + read0 = SMASK_X|SMASK_Y; + read1 = SMASK_X|SMASK_Y; + break; + case NVS_OP_DP3: + case NVS_OP_RFL: + read0 = SMASK_X|SMASK_Y|SMASK_Z; + read1 = SMASK_X|SMASK_Y|SMASK_Z; + break; + case NVS_OP_DP4: + read0 = SMASK_ALL; + read1 = SMASK_ALL; + break; + case NVS_OP_DPH: + read0 = SMASK_X|SMASK_Y|SMASK_Z; + read1 = SMASK_ALL; + break; + case NVS_OP_DST: + if (inst->mask & SMASK_Y) read0 = read1 = SMASK_Y; + if (inst->mask & SMASK_Z) read0 |= SMASK_Z; + if (inst->mask & SMASK_W) read1 |= SMASK_W; + break; + case NVS_OP_NRM: + read0 = SMASK_X|SMASK_Y|SMASK_Z; + break; + case NVS_OP_PK2H: + case NVS_OP_PK2US: + read0 = SMASK_X|SMASK_Y; + break; + case NVS_OP_DDX: + case NVS_OP_DDY: + case NVS_OP_UP2H: + case NVS_OP_UP2US: + case NVS_OP_PK4B: + case NVS_OP_PK4UB: + case NVS_OP_UP4B: + case NVS_OP_UP4UB: + read0 = SMASK_ALL; + break; + case NVS_OP_X2D: + read1 = SMASK_X|SMASK_Y; + if (inst->mask & (SMASK_X|SMASK_Z)) { + read0 |= SMASK_X; + read2 |= SMASK_X|SMASK_Y; + } + if (inst->mask & (SMASK_Y|SMASK_W)) { + read0 |= SMASK_Y; + read2 |= SMASK_Z|SMASK_W; + } + break; + case NVS_OP_LIT: + read0 |= SMASK_X|SMASK_Y|SMASK_W; + break; + case NVS_OP_TEX: + case NVS_OP_TXP: + case NVS_OP_TXL: + case NVS_OP_TXB: + read0 = SMASK_ALL; + break; + case NVS_OP_TXD: + read0 = SMASK_ALL; + read1 = SMASK_ALL; + read2 = SMASK_ALL; + break; + case NVS_OP_KIL: + break; + default: + fprintf(stderr, "Unknown sop=%d", inst->op); + return PASS1_FAIL; + } + + /* Any values that are written by this inst can't have been read further up */ + if (inst->dest.file == NVS_FILE_TEMP) + rec->temp[inst->dest.index] &= ~inst->mask; + + if (read0) pass1_track_source(nvs, inst, 0, read0); + if (read1) pass1_track_source(nvs, inst, 1, read1); + if (read2) pass1_track_source(nvs, inst, 2, read2); + + return PASS1_OK; +} + +/* Some basic dead code elimination + * - Remove unused instructions + * - Don't write unused register components + * - Modify swizzles to not reference unneeded components. + */ +GLboolean +nouveau_shader_pass1(nvsPtr nvs) +{ + nvsFragmentList *list = nvs->list_tail; + int i; + + for (i=0; itemps[i].last_use = -1; + + nvs->pass_rec = calloc(1, sizeof(struct pass1_rec)); + + while (list) { + assert(list->fragment->type == NVS_INSTRUCTION); + + switch(pass1_check_instruction(nvs, (nvsInstruction *)list->fragment)) { + case PASS1_OK: + break; + case PASS1_KILL: + pass1_remove_fragment(nvs, list); + break; + case PASS1_FAIL: + default: + free(nvs->pass_rec); + nvs->pass_rec = NULL; + return GL_FALSE; + } + + list = list->prev; + } + + free(nvs->pass_rec); + nvs->pass_rec = NULL; + + return GL_TRUE; +} + + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c new file mode 100644 index 0000000000..1f09b6d453 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2006 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" + +#include "nouveau_shader.h" + +struct pass2_rec { + /* Map nvsRegister temp ID onto hw temp ID */ + unsigned int temps[NVS_MAX_TEMPS]; + /* Track free hw registers */ + unsigned int hw_temps[NVS_MAX_TEMPS]; +}; + +static int +pass2_alloc_hw_temp(nvsPtr nvs) +{ + struct pass2_rec *rec = nvs->pass_rec; + int i; + + for (i=0; ifunc->MaxTemp; i++) { + /* This is a *horrible* hack.. R0 is both temp0 and result.color + * in NV30/40 fragprogs, we can use R0 as a temp before result is + * written however.. + */ + if (nvs->mesa.vp.Base.Target == GL_FRAGMENT_PROGRAM_ARB && i==0) + continue; + + if (rec->hw_temps[i] == 0) { + rec->hw_temps[i] = 1; + return i; + } + } + return -1; +} + +static void +pass2_free_hw_temp(nvsPtr nvs, int reg) +{ + struct pass2_rec *rec = nvs->pass_rec; + rec->hw_temps[reg] = 0; +} + +static nvsRegister +pass2_mangle_reg(nvsPtr nvs, nvsInstruction *inst, nvsRegister reg) +{ + struct pass2_rec *rec = nvs->pass_rec; + + if (reg.file == NVS_FILE_TEMP) { + int hwidx; + + if (rec->temps[reg.index] == -1) + rec->temps[reg.index] = pass2_alloc_hw_temp(nvs); + hwidx = rec->temps[reg.index]; + + if (nvs->temps[reg.index].last_use <= inst->header.position) + pass2_free_hw_temp(nvs, hwidx); + + reg.index = hwidx; + } + + return reg; +} + +static void +pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, + struct _op_xlat *op, int slot) +{ + nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; + nvsFunc *shader = nvs->func; + nvsRegister reg; + int i, srcpos_used = ~7; + + shader->SetOpcode(shader, op->NV, slot); + if (inst->saturate ) shader->SetSaturate(shader); + if (inst->cond_update) shader->SetCCUpdate(shader); + if (inst->cond_test ) shader->SetCondition(shader, 1, inst->cond, + inst->cond_reg, + inst->cond_swizzle); + else shader->SetCondition(shader, 0, NVS_COND_TR, + 0, + default_swz); + switch (inst->op) { + case NVS_OP_TEX: + case NVS_OP_TXB: + case NVS_OP_TXL: + case NVS_OP_TXP: + case NVS_OP_TXD: + shader->SetTexImageUnit(shader, inst->tex_unit); + break; + default: + break; + } + + for (i = 0; i < 3; i++) { + if (op->srcpos[i] != -1) { + reg = pass2_mangle_reg(nvs, inst, inst->src[i]); + if (reg.file == NVS_FILE_ATTRIB) + nvs->inputs_read |= (1 << reg.index); + shader->SetSource(shader, ®, op->srcpos[i]); + srcpos_used |= (1<srcpos[i]); + if (reg.file == NVS_FILE_CONST && shader->GetSourceConstVal) + nvs->params[reg.index].hw_index = nvs->program_current + 4; + } + } + for (i = 0; i < 3; i++) { + if (!(srcpos_used & (1<SetUnusedSource(shader, i); + } + + reg = pass2_mangle_reg(nvs, inst, inst->dest); + if (reg.file == NVS_FILE_RESULT) + nvs->outputs_written |= (1 << reg.index); + shader->SetResult(shader, ®, inst->mask, slot); +} + +static int +pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last) +{ + nvsFunc *shader = nvs->func; + struct _op_xlat *op, *op2; + unsigned int hw_inst[8] = {0,0,0,0,0,0,0,0,0}; + int slot, slot2; + int instsz; + int i; + + shader->inst = hw_inst; + + /* Assemble this instruction */ + if (!(op = shader->GetOPTXFromSOP(inst->op, &slot))) + return 0; + pass2_add_instruction(nvs, inst, op, slot); + if (last) + shader->SetLastInst(shader); + + instsz = shader->GetOffsetNext(nvs->func); + if (nvs->program_size + instsz >= nvs->program_alloc_size) { + nvs->program_alloc_size *= 2; + nvs->program = realloc(nvs->program, + nvs->program_alloc_size * sizeof(uint32_t)); + } + + for (i=0; iprogram[nvs->program_current++] = hw_inst[i]; + nvs->program_size = nvs->program_current; + return 1; +} + +/* Translate program into hardware format */ +GLboolean +nouveau_shader_pass2(nvsPtr nvs) +{ + nvsFragmentList *list = nvs->list_head; + struct pass2_rec *rec; + int i; + + rec = calloc(1, sizeof(struct pass2_rec)); + for (i=0; itemps[i] = -1; + nvs->pass_rec = rec; + + /* Start off with allocating 4 uint32_t's for each inst, will be grown + * if necessary.. + */ + nvs->program_alloc_size = nvs->inst_count * 4; + nvs->program = calloc(nvs->program_alloc_size, sizeof(uint32_t)); + nvs->program_size = 0; + nvs->program_current = 0; + + while (list) { + assert(list->fragment->type == NVS_INSTRUCTION); + + if (!pass2_assemble_instruction(nvs, (nvsInstruction *)list->fragment, list->next ? 0 : 1)) { + free(nvs->program); + nvs->program = NULL; + return GL_FALSE; + } + + list = list->next; + } + + /* Shrink allocated memory to only what we need */ + nvs->program = realloc(nvs->program, nvs->program_size * sizeof(uint32_t)); + nvs->program_alloc_size = nvs->program_size; + + nvs->translated = 1; + nvs->on_hardware = 0; + +#if 1 + fflush(stdout); fflush(stderr); + fprintf(stderr, "----------------MESA PROGRAM\n"); + fflush(stdout); fflush(stderr); + _mesa_print_program(&nvs->mesa.vp.Base); + fflush(stdout); fflush(stderr); + fprintf(stderr, "^^^^^^^^^^^^^^^^MESA PROGRAM\n"); + fflush(stdout); fflush(stderr); + fprintf(stderr, "----------------NV40 PROGRAM\n"); + fflush(stdout); fflush(stderr); + nvsDisasmHWShader(nvs); + fflush(stdout); fflush(stderr); + fprintf(stderr, "^^^^^^^^^^^^^^^^NV40 PROGRAM\n"); + fflush(stdout); fflush(stderr); +#endif + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.h b/src/mesa/drivers/dri/nouveau/nouveau_state.h index f8fd0cea50..37f04f41bd 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.h @@ -37,6 +37,7 @@ extern void nv20InitStateFuncs(struct dd_function_table *func); extern void nv30InitStateFuncs(struct dd_function_table *func); extern void nouveauInitState(GLcontext *ctx); + /* extern void nouveauDDUpdateState(GLcontext *ctx); extern void nouveauDDUpdateHWState(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/nouveau/nv20_shader.h b/src/mesa/drivers/dri/nouveau/nv20_shader.h new file mode 100644 index 0000000000..7d2e29db66 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv20_shader.h @@ -0,0 +1,121 @@ +/* NV20_TCL_PRIMITIVE_3D_0x0B00 */ +#define NV20_VP_INST_0B00 0x00000000 /* always 0? */ +#define NV20_VP_INST0_KNOWN 0 + +/* NV20_TCL_PRIMITIVE_3D_0x0B04 */ +#define NV20_VP_INST_SCA_OPCODE_SHIFT 25 +#define NV20_VP_INST_SCA_OPCODE_MASK (0x0F << 25) +#define NV20_VP_INST_OPCODE_RCP 0x2 +#define NV20_VP_INST_OPCODE_RCC 0x3 +#define NV20_VP_INST_OPCODE_RSQ 0x4 +#define NV20_VP_INST_OPCODE_EXP 0x5 +#define NV20_VP_INST_OPCODE_LOG 0x6 +#define NV20_VP_INST_OPCODE_LIT 0x7 +#define NV20_VP_INST_VEC_OPCODE_SHIFT 21 +#define NV20_VP_INST_VEC_OPCODE_MASK (0x0F << 21) +#define NV20_VP_INST_OPCODE_NOP 0x0 /* guess */ +#define NV20_VP_INST_OPCODE_MOV 0x1 +#define NV20_VP_INST_OPCODE_MUL 0x2 +#define NV20_VP_INST_OPCODE_ADD 0x3 +#define NV20_VP_INST_OPCODE_MAD 0x4 +#define NV20_VP_INST_OPCODE_DP3 0x5 +#define NV20_VP_INST_OPCODE_DPH 0x6 +#define NV20_VP_INST_OPCODE_DP4 0x7 +#define NV20_VP_INST_OPCODE_DST 0x8 +#define NV20_VP_INST_OPCODE_MIN 0x9 +#define NV20_VP_INST_OPCODE_MAX 0xA +#define NV20_VP_INST_OPCODE_SLT 0xB +#define NV20_VP_INST_OPCODE_SGE 0xC +#define NV20_VP_INST_OPCODE_ARL 0xD +#define NV20_VP_INST_CONST_SRC_SHIFT 13 +#define NV20_VP_INST_CONST_SRC_MASK (0xFF << 13) +#define NV20_VP_INST_INPUT_SRC_SHIFT 9 +#define NV20_VP_INST_INPUT_SRC_MASK (0xF << 9) /* guess */ +#define NV20_VP_INST_INPUT_SRC_POS 0 +#define NV20_VP_INST_INPUT_SRC_COL0 3 +#define NV20_VP_INST_INPUT_SRC_COL1 4 +#define NV20_VP_INST_INPUT_SRC_TC(n) (9+n) +#define NV20_VP_INST_SRC0H_SHIFT 0 +#define NV20_VP_INST_SRC0H_MASK (0x1FF << 0) +#define NV20_VP_INST1_KNOWN ( \ + NV20_VP_INST_OPCODE_MASK | \ + NV20_VP_INST_CONST_SRC_MASK | \ + NV20_VP_INST_INPUT_SRC_MASK | \ + NV20_VP_INST_SRC0H_MASK \ + ) + +/* NV20_TCL_PRIMITIVE_3D_0x0B08 */ +#define NV20_VP_INST_SRC0L_SHIFT 26 +#define NV20_VP_INST_SRC0L_MASK (0x3F <<26) +#define NV20_VP_INST_SRC1_SHIFT 11 +#define NV20_VP_INST_SRC1_MASK (0x7FFF<<11) +#define NV20_VP_INST_SRC2H_SHIFT 0 +#define NV20_VP_INST_SRC2H_MASK (0x7FF << 0) + +/* NV20_TCL_PRIMITIVE_3D_0x0B0C */ +#define NV20_VP_INST_SRC2L_SHIFT 28 +#define NV20_VP_INST_SRC2L_MASK (0x0F <<28) +#define NV20_VP_INST_VTEMP_WRITEMASK_SHIFT 24 +#define NV20_VP_INST_VTEMP_WRITEMASK_MASK (0x0F <<24) +# define NV20_VP_INST_TEMP_WRITEMASK_X (1<<27) +# define NV20_VP_INST_TEMP_WRITEMASK_Y (1<<26) +# define NV20_VP_INST_TEMP_WRITEMASK_Z (1<<25) +# define NV20_VP_INST_TEMP_WRITEMASK_W (1<<24) +#define NV20_VP_INST_DEST_TEMP_ID_SHIFT 20 +#define NV20_VP_INST_DEST_TEMP_ID_MASK (0x0F <<20) +#define NV20_VP_INST_STEMP_WRITEMASK_SHIFT 16 +#define NV20_VP_INST_STEMP_WRITEMASK_MASK (0x0F <<16) +# define NV20_VP_INST_STEMP_WRITEMASK_X (1<<19) +# define NV20_VP_INST_STEMP_WRITEMASK_Y (1<<18) +# define NV20_VP_INST_STEMP_WRITEMASK_Z (1<<17) +# define NV20_VP_INST_STEMP_WRITEMASK_W (1<<16) +#define NV20_VP_INST_DEST_WRITEMASK_SHIFT 12 +#define NV20_VP_INST_DEST_WRITEMASK_MASK (0x0F <<12) +# define NV20_VP_INST_DEST_WRITEMASK_X (1<<15) +# define NV20_VP_INST_DEST_WRITEMASK_Y (1<<14) +# define NV20_VP_INST_DEST_WRITEMASK_Z (1<<13) +# define NV20_VP_INST_DEST_WRITEMASK_W (1<<12) +#define NV20_VP_INST_DEST_SHIFT 3 +#define NV20_VP_INST_DEST_MASK (0xF << 3) /* guess */ +#define NV20_VP_INST_DEST_POS 0 +#define NV20_VP_INST_DEST_COL0 3 +#define NV20_VP_INST_DEST_COL1 4 +#define NV20_VP_INST_DEST_TC(n) (9+n) +#define NV20_VP_INST_INDEX_CONST (1<<1) +#define NV20_VP_INST3_KNOWN ( \ + NV20_VP_INST_SRC2L_MASK | \ + NV20_VP_INST_TEMP_WRITEMASK_MASK | \ + NV20_VP_INST_DEST_TEMP_ID_MASK | \ + NV20_VP_INST_STEMP_WRITEMASK_MASK | \ + NV20_VP_INST_DEST_WRITEMASK_MASK | \ + NV20_VP_INST_DEST_MASK | \ + NV20_VP_INST_INDEX_CONST \ + ) + +/* Useful to split the source selection regs into their pieces */ +#define NV20_VP_SRC0_HIGH_SHIFT 6 +#define NV20_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV20_VP_SRC0_LOW_MASK 0x0000003F +#define NV20_VP_SRC2_HIGH_SHIFT 4 +#define NV20_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV20_VP_SRC2_LOW_MASK 0x0000000F + +#define NV20_VP_SRC_REG_NEGATE (1<<14) +#define NV20_VP_SRC_REG_SWZ_X_SHIFT 12 +#define NV20_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV20_VP_SRC_REG_SWZ_Y_SHIFT 10 +#define NV20_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV20_VP_SRC_REG_SWZ_Z_SHIFT 8 +#define NV20_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV20_VP_SRC_REG_SWZ_W_SHIFT 6 +#define NV20_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV20_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV20_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV20_VP_SRC_REG_TEMP_ID_SHIFT 2 +#define NV20_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV20_VP_SRC_REG_TYPE_SHIFT 0 +#define NV20_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV20_VP_SRC_REG_TYPE_TEMP 1 +#define NV20_VP_SRC_REG_TYPE_INPUT 2 +#define NV20_VP_SRC_REG_TYPE_CONST 3 /* guess */ + diff --git a/src/mesa/drivers/dri/nouveau/nv20_vertprog.c b/src/mesa/drivers/dri/nouveau/nv20_vertprog.c new file mode 100644 index 0000000000..60cfcd7056 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv20_vertprog.c @@ -0,0 +1,447 @@ +#include "nouveau_context.h" +#include "nouveau_object.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" + +#include "nouveau_shader.h" +#include "nv20_shader.h" + +unsigned int NVVP_TX_VOP_COUNT = 16; +unsigned int NVVP_TX_NVS_OP_COUNT = 16; +struct _op_xlat NVVP_TX_VOP[32]; +struct _op_xlat NVVP_TX_SOP[32]; + +nvsSwzComp NV20VP_TX_SWIZZLE[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; + +/***************************************************************************** + * Support routines + */ +static void +NV20VPUploadToHW(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + int i; + + /* XXX: missing a way to say what insn we're uploading from, and possible + * the program start position (if NV20 has one) */ + for (i=0; iprogram_size; i+=4) { + BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0, 4); + OUT_RING(nvs->program[i + 0]); + OUT_RING(nvs->program[i + 1]); + OUT_RING(nvs->program[i + 2]); + OUT_RING(nvs->program[i + 3]); + } +} + +static void +NV20VPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + /* Worth checking if the value *actually* changed? Mesa doesn't tell us this + * as far as I know.. + */ + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 1); + OUT_RING (id); + BEGIN_RING_SIZE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_X, 4); + OUT_RINGf(nvs->params[id].source_val[0]); + OUT_RINGf(nvs->params[id].source_val[1]); + OUT_RINGf(nvs->params[id].source_val[2]); + OUT_RINGf(nvs->params[id].source_val[3]); +} + +/***************************************************************************** + * Assembly routines + */ + +/***************************************************************************** + * Disassembly routines + */ +void +NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz) +{ + swz[NVS_SWZ_X] = NV20VP_TX_SWIZZLE[(hwswz & 0xC0) >> 6]; + swz[NVS_SWZ_Y] = NV20VP_TX_SWIZZLE[(hwswz & 0x30) >> 4]; + swz[NVS_SWZ_Z] = NV20VP_TX_SWIZZLE[(hwswz & 0x0C) >> 2]; + swz[NVS_SWZ_W] = NV20VP_TX_SWIZZLE[(hwswz & 0x03) >> 0]; +} + +static int +NV20VPHasMergedInst(nvsFunc * shader) +{ + if (shader->GetOpcodeHW(shader, 0) != NV20_VP_INST_OPCODE_NOP && + shader->GetOpcodeHW(shader, 1) != NV20_VP_INST_OPCODE_NOP) + printf + ("\n\n*****both opcode fields have values - PLEASE REPORT*****\n"); + return 0; +} + +static int +NV20VPIsLastInst(nvsFunc * shader) +{ + return ((shader->inst[3] & (1 << 0)) ? 1 : 0); +} + +static int +NV20VPGetOffsetNext(nvsFunc * shader) +{ + return 4; +} + +static struct _op_xlat * +NV20VPGetOPTXRec(nvsFunc * shader, int merged) +{ + struct _op_xlat *opr; + int op; + + if (shader->GetOpcodeSlot(shader, merged)) { + opr = NVVP_TX_SOP; + op = shader->GetOpcodeHW(shader, 1); + if (op >= NVVP_TX_NVS_OP_COUNT) + return NULL; + } + else { + opr = NVVP_TX_VOP; + op = shader->GetOpcodeHW(shader, 0); + if (op >= NVVP_TX_VOP_COUNT) + return NULL; + } + + if (opr[op].SOP == NVS_OP_UNKNOWN) + return NULL; + return &opr[op]; +} + +static struct _op_xlat * +NV20VPGetOPTXFromSOP(nvsOpcode sop, int *id) +{ + int i; + + for (i=0;iHasMergedInst(shader)) + return merged; + if (shader->GetOpcodeHW(shader, 0) == NV20_VP_INST_OPCODE_NOP) + return 1; + return 0; +} + +static nvsOpcode +NV20VPGetOpcode(nvsFunc * shader, int merged) +{ + struct _op_xlat *opr; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr) + return NVS_OP_UNKNOWN; + + return opr->SOP; +} + +static nvsOpcode +NV20VPGetOpcodeHW(nvsFunc * shader, int slot) +{ + if (slot) + return (shader->inst[1] & NV20_VP_INST_SCA_OPCODE_MASK) + >> NV20_VP_INST_SCA_OPCODE_SHIFT; + return (shader->inst[1] & NV20_VP_INST_VEC_OPCODE_MASK) + >> NV20_VP_INST_VEC_OPCODE_SHIFT; +} + +static nvsRegFile +NV20VPGetDestFile(nvsFunc * shader, int merged) +{ + switch (shader->GetOpcode(shader, merged)) { + case NVS_OP_ARL: + return NVS_FILE_ADDRESS; + default: + /*FIXME: This probably isn't correct.. */ + if ((shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK) == 0) + return NVS_FILE_TEMP; + return NVS_FILE_RESULT; + } +} + +static unsigned int +NV20VPGetDestID(nvsFunc * shader, int merged) +{ + int id; + + switch (shader->GetDestFile(shader, merged)) { + case NVS_FILE_RESULT: + id = ((shader->inst[3] & NV20_VP_INST_DEST_MASK) + >> NV20_VP_INST_DEST_SHIFT); + switch (id) { + case NV20_VP_INST_DEST_POS : return NVS_FR_POSITION; + case NV20_VP_INST_DEST_COL0 : return NVS_FR_COL0; + case NV20_VP_INST_DEST_COL1 : return NVS_FR_COL1; + case NV20_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0; + case NV20_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1; + case NV20_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2; + case NV20_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3; + default: + return -1; + } + case NVS_FILE_ADDRESS: + return 0; + case NVS_FILE_TEMP: + id = ((shader->inst[3] & NV20_VP_INST_DEST_TEMP_ID_MASK) + >> NV20_VP_INST_DEST_TEMP_ID_SHIFT); + return id; + default: + return -1; + } +} + +static unsigned int +NV20VPGetDestMask(nvsFunc * shader, int merged) +{ + int hwmask, mask = 0; + + /* Special handling for ARL - hardware only supports a + * 1-component address reg + */ + if (shader->GetOpcode(shader, merged) == NVS_OP_ARL) + return SMASK_X; + + if (shader->GetDestFile(shader, merged) == NVS_FILE_RESULT) + hwmask = (shader->inst[3] & NV20_VP_INST_DEST_WRITEMASK_MASK) + >> NV20_VP_INST_DEST_WRITEMASK_SHIFT; + else if (shader->GetOpcodeSlot(shader, merged)) + hwmask = (shader->inst[3] & NV20_VP_INST_STEMP_WRITEMASK_MASK) + >> NV20_VP_INST_STEMP_WRITEMASK_SHIFT; + else + hwmask = (shader->inst[3] & NV20_VP_INST_VTEMP_WRITEMASK_MASK) + >> NV20_VP_INST_VTEMP_WRITEMASK_SHIFT; + + if (hwmask & (1 << 3)) mask |= SMASK_X; + if (hwmask & (1 << 2)) mask |= SMASK_Y; + if (hwmask & (1 << 1)) mask |= SMASK_Z; + if (hwmask & (1 << 0)) mask |= SMASK_W; + + return mask; +} + +static unsigned int +NV20VPGetSourceHW(nvsFunc * shader, int merged, int pos) +{ + struct _op_xlat *opr; + unsigned int src; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr) + return -1; + + switch (opr->srcpos[pos]) { + case 0: + src = ((shader->inst[1] & NV20_VP_INST_SRC0H_MASK) + >> NV20_VP_INST_SRC0H_SHIFT) + << NV20_VP_SRC0_HIGH_SHIFT; + src |= ((shader->inst[2] & NV20_VP_INST_SRC0L_MASK) + >> NV20_VP_INST_SRC0L_SHIFT); + break; + case 1: + src = ((shader->inst[2] & NV20_VP_INST_SRC1_MASK) + >> NV20_VP_INST_SRC1_SHIFT); + break; + case 2: + src = ((shader->inst[2] & NV20_VP_INST_SRC2H_MASK) + >> NV20_VP_INST_SRC2H_SHIFT) + << NV20_VP_SRC2_HIGH_SHIFT; + src |= ((shader->inst[3] & NV20_VP_INST_SRC2L_MASK) + >> NV20_VP_INST_SRC2L_SHIFT); + break; + default: + src = -1; + } + + return src; +} + +static nvsRegFile +NV20VPGetSourceFile(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + struct _op_xlat *opr; + int file; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1) + return -1; + + switch (opr->srcpos[pos]) { + case SPOS_ADDRESS: + return NVS_FILE_ADDRESS; + default: + src = NV20VPGetSourceHW(shader, merged, pos); + file = (src & NV20_VP_SRC_REG_TYPE_MASK) >> NV20_VP_SRC_REG_TYPE_SHIFT; + + switch (file) { + case NV20_VP_SRC_REG_TYPE_TEMP : return NVS_FILE_TEMP; + case NV20_VP_SRC_REG_TYPE_INPUT: return NVS_FILE_ATTRIB; + case NV20_VP_SRC_REG_TYPE_CONST: return NVS_FILE_CONST; + default: + return NVS_FILE_UNKNOWN; + } + } +} + +static int +NV20VPGetSourceID(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_TEMP: + src = shader->GetSourceHW(shader, merged, pos); + return ((src & NV20_VP_SRC_REG_TEMP_ID_MASK) >> + NV20_VP_SRC_REG_TEMP_ID_SHIFT); + case NVS_FILE_CONST: + return ((shader->inst[1] & NV20_VP_INST_CONST_SRC_MASK) + >> NV20_VP_INST_CONST_SRC_SHIFT); + case NVS_FILE_ATTRIB: + src = ((shader->inst[1] & NV20_VP_INST_INPUT_SRC_MASK) + >> NV20_VP_INST_INPUT_SRC_SHIFT); + switch (src) { + case NV20_VP_INST_INPUT_SRC_POS : return NVS_FR_POSITION; + case NV20_VP_INST_INPUT_SRC_COL0 : return NVS_FR_COL0; + case NV20_VP_INST_INPUT_SRC_COL1 : return NVS_FR_COL1; + case NV20_VP_INST_INPUT_SRC_TC(0): return NVS_FR_TEXCOORD0; + case NV20_VP_INST_INPUT_SRC_TC(1): return NVS_FR_TEXCOORD1; + case NV20_VP_INST_INPUT_SRC_TC(2): return NVS_FR_TEXCOORD2; + case NV20_VP_INST_INPUT_SRC_TC(3): return NVS_FR_TEXCOORD3; + default: + return NVS_FR_UNKNOWN; + } + default: + return -1; + } +} + +static int +NV20VPGetSourceNegate(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + + return ((src & NV20_VP_SRC_REG_NEGATE) ? 1 : 0); +} + +static int +NV20VPGetSourceAbs(nvsFunc * shader, int merged, int pos) +{ + /* NV20 can't do ABS on sources? Appears to be emulated with + * MAX reg, reg, -reg + */ + return 0; +} + +static void +NV20VPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz) +{ + unsigned int src; + int swzbits; + + src = shader->GetSourceHW(shader, merged, pos); + swzbits = + (src & NV20_VP_SRC_REG_SWZ_ALL_MASK) >> NV20_VP_SRC_REG_SWZ_ALL_SHIFT; + return NV20VPTXSwizzle(swzbits, swz); +} + +static int +NV20VPGetSourceIndexed(nvsFunc * shader, int merged, int pos) +{ + /* I don't think NV20 can index into attribs, at least no GL + * extension is exposed that will allow it. + */ + if (shader->GetSourceFile(shader, merged, pos) != NVS_FILE_CONST) + return 0; + if (shader->inst[3] & NV20_VP_INST_INDEX_CONST) + return 1; + return 0; +} + +static int +NV20VPGetAddressRegID(nvsFunc * shader) +{ + /* Only 1 address reg */ + return 0; +} + +static nvsSwzComp +NV20VPGetAddressRegSwizzle(nvsFunc * shader) +{ + /* Only A0.x available */ + return NVS_SWZ_X; +} + +void +NV20VPInitShaderFuncs(nvsFunc * shader) +{ + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MOV, NVS_OP_MOV, 0, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MUL, NVS_OP_MUL, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ADD, NVS_OP_ADD, 0, 2, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAD, NVS_OP_MAD, 0, 1, 2); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP3, NVS_OP_DP3, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DPH, NVS_OP_DPH, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DP4, NVS_OP_DP4, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_DST, NVS_OP_DST, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MIN, NVS_OP_MIN, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_MAX, NVS_OP_MAX, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SLT, NVS_OP_SLT, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_SGE, NVS_OP_SGE, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV20_VP_INST_OPCODE_ARL, NVS_OP_ARL, 0, -1, -1); + + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCP, NVS_OP_RCP, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RCC, NVS_OP_RCC, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_RSQ, NVS_OP_RSQ, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_EXP, NVS_OP_EXP, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LOG, NVS_OP_LOG, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV20_VP_INST_OPCODE_LIT, NVS_OP_LIT, 2, -1, -1); + + shader->UploadToHW = NV20VPUploadToHW; + shader->UpdateConst = NV20VPUpdateConst; + + shader->GetOPTXRec = NV20VPGetOPTXRec; + shader->GetOPTXFromSOP = NV20VPGetOPTXFromSOP; + + shader->HasMergedInst = NV20VPHasMergedInst; + shader->IsLastInst = NV20VPIsLastInst; + shader->GetOffsetNext = NV20VPGetOffsetNext; + shader->GetOpcodeSlot = NV20VPGetOpcodeSlot; + shader->GetOpcode = NV20VPGetOpcode; + shader->GetOpcodeHW = NV20VPGetOpcodeHW; + shader->GetDestFile = NV20VPGetDestFile; + shader->GetDestID = NV20VPGetDestID; + shader->GetDestMask = NV20VPGetDestMask; + shader->GetSourceHW = NV20VPGetSourceHW; + shader->GetSourceFile = NV20VPGetSourceFile; + shader->GetSourceID = NV20VPGetSourceID; + shader->GetSourceNegate = NV20VPGetSourceNegate; + shader->GetSourceAbs = NV20VPGetSourceAbs; + shader->GetSourceSwizzle = NV20VPGetSourceSwizzle; + shader->GetSourceIndexed = NV20VPGetSourceIndexed; + shader->GetRelAddressRegID = NV20VPGetAddressRegID; + shader->GetRelAddressSwizzle = NV20VPGetAddressRegSwizzle; +} diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c new file mode 100644 index 0000000000..2e35d08c07 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -0,0 +1,707 @@ +#include + +#include "glheader.h" +#include "macros.h" + +#include "nouveau_context.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" +#include "nouveau_drm.h" +#include "nouveau_shader.h" +#include "nouveau_object.h" +#include "nouveau_msg.h" +#include "nv30_shader.h" + +unsigned int NVFP_TX_AOP_COUNT = 64; +struct _op_xlat NVFP_TX_AOP[64]; + +/******************************************************************************* + * Support routines + */ + +/*XXX: bad bad bad bad */ +static uint64_t fragprog_ofs; +static uint32_t *fragprog_buf = NULL; + +static void +NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + drm_nouveau_mem_alloc_t mem; + + if (!fragprog_buf) { + mem.flags = NOUVEAU_MEM_FB|NOUVEAU_MEM_MAPPED; + mem.size = nvs->program_size * sizeof(uint32_t); + mem.alignment = 0; + mem.region_offset = &fragprog_ofs; + if (drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_MEM_ALLOC, &mem, + sizeof(mem))) { + fprintf(stderr, "MEM_ALLOC fail\n"); + return; + } + + if (drmMap(nmesa->driFd, fragprog_ofs, mem.size, &fragprog_buf)) { + fprintf(stderr, "MEM_MAP fail\n"); + return; + } + } + + /*XXX: should do a DMA.. and not copy over a possibly in-use program.. */ + /* not using state cache here, updated programs at the same address + * seem to not take effect unless ACTIVE_PROGRAM is called again. hw + * caches the program somewhere? so, maybe not so bad to just clobber the + * old program in vram.. + */ + memcpy(fragprog_buf, nvs->program, nvs->program_size * sizeof(uint32_t)); + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1); + OUT_RING(((uint32_t)fragprog_ofs-0xE0000000)|1); +} + +static void +NV30FPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id) +{ + uint32_t *current = nvs->program + nvs->params[id].hw_index; + uint32_t *new = nvs->params[id].source_val ? + nvs->params[id].source_val : nvs->params[id].val; + + COPY_4V(current, new); + nvs->on_hardware = 0; +} + +/******************************************************************************* + * Assembly helpers + */ +static struct _op_xlat * +NV30FPGetOPTXFromSOP(nvsOpcode op, int *id) +{ + int i; + + for (i=0; iGetOPTXFromSOP(op, NULL)) + return 1; + return 0; +} + +static void +NV30FPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) +{ + shader->inst[0] |= (opcode << NV30_FP_OP_OPCODE_SHIFT); +} + +static void +NV30FPSetCCUpdate(nvsFunc *shader) +{ + shader->inst[0] |= NV30_FP_OP_COND_WRITE_ENABLE; +} + +static void +NV30FPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, + nvsSwzComp *swz) +{ + nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; + unsigned int hwcond; + + /* cond masking is always enabled */ + if (!on) { + cond = NVS_COND_TR; + reg = 0; + swz = default_swz; + } + + switch (cond) { + case NVS_COND_TR: hwcond = NV30_FP_OP_COND_TR; break; + case NVS_COND_FL: hwcond = NV30_FP_OP_COND_FL; break; + case NVS_COND_LT: hwcond = NV30_FP_OP_COND_LT; break; + case NVS_COND_GT: hwcond = NV30_FP_OP_COND_GT; break; + case NVS_COND_LE: hwcond = NV30_FP_OP_COND_LE; break; + case NVS_COND_GE: hwcond = NV30_FP_OP_COND_GE; break; + case NVS_COND_EQ: hwcond = NV30_FP_OP_COND_EQ; break; + case NVS_COND_NE: hwcond = NV30_FP_OP_COND_NE; break; + default: + WARN_ONCE("unknown fp condmask=%d\n", cond); + hwcond = NV30_FP_OP_COND_TR; + break; + } + + shader->inst[1] |= (hwcond << NV30_FP_OP_COND_SHIFT); + shader->inst[1] |= (swz[NVS_SWZ_X] << NV30_FP_OP_COND_SWZ_X_SHIFT); + shader->inst[1] |= (swz[NVS_SWZ_Y] << NV30_FP_OP_COND_SWZ_Y_SHIFT); + shader->inst[1] |= (swz[NVS_SWZ_Z] << NV30_FP_OP_COND_SWZ_Z_SHIFT); + shader->inst[1] |= (swz[NVS_SWZ_W] << NV30_FP_OP_COND_SWZ_W_SHIFT); +} + +static void +NV30FPSetResult(nvsFunc *shader, nvsRegister *reg, unsigned int mask, int slot) +{ + unsigned int hwreg, hwmask = 0; + + if (mask & SMASK_X) shader->inst[0] |= NV30_FP_OP_OUT_X; + if (mask & SMASK_Y) shader->inst[0] |= NV30_FP_OP_OUT_Y; + if (mask & SMASK_Z) shader->inst[0] |= NV30_FP_OP_OUT_Z; + if (mask & SMASK_W) shader->inst[0] |= NV30_FP_OP_OUT_W; + + if (reg->file == NVS_FILE_RESULT) { + hwreg = 0; /* FIXME: this is only fragment.color */ + /* This is *not* correct, I have no idea what it is either */ + shader->inst[0] |= NV30_FP_OP_UNK0_7; + } else + hwreg = reg->index; + shader->inst[0] |= (hwreg << NV30_FP_OP_OUT_REG_SHIFT); +} + +static void +NV30FPSetSource(nvsFunc *shader, nvsRegister *reg, int pos) +{ + unsigned int hwsrc = 0; + + switch (reg->file) { + case NVS_FILE_TEMP: + hwsrc |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); + hwsrc |= (reg->index << NV30_FP_REG_SRC_SHIFT); + break; + case NVS_FILE_ATTRIB: + { + unsigned int hwin; + + switch (reg->index) { + case NVS_FR_POSITION : hwin = NV30_FP_OP_INPUT_SRC_POSITION; break; + case NVS_FR_COL0 : hwin = NV30_FP_OP_INPUT_SRC_COL0; break; + case NVS_FR_COL1 : hwin = NV30_FP_OP_INPUT_SRC_COL1; break; + case NVS_FR_FOGCOORD : hwin = NV30_FP_OP_INPUT_SRC_FOGC; break; + case NVS_FR_TEXCOORD0: hwin = NV30_FP_OP_INPUT_SRC_TC(0); break; + case NVS_FR_TEXCOORD1: hwin = NV30_FP_OP_INPUT_SRC_TC(1); break; + case NVS_FR_TEXCOORD2: hwin = NV30_FP_OP_INPUT_SRC_TC(2); break; + case NVS_FR_TEXCOORD3: hwin = NV30_FP_OP_INPUT_SRC_TC(3); break; + case NVS_FR_TEXCOORD4: hwin = NV30_FP_OP_INPUT_SRC_TC(4); break; + case NVS_FR_TEXCOORD5: hwin = NV30_FP_OP_INPUT_SRC_TC(5); break; + case NVS_FR_TEXCOORD6: hwin = NV30_FP_OP_INPUT_SRC_TC(6); break; + case NVS_FR_TEXCOORD7: hwin = NV30_FP_OP_INPUT_SRC_TC(7); break; + default: + WARN_ONCE("unknown fp input %d\n", reg->index); + hwin = NV30_FP_OP_INPUT_SRC_COL0; + break; + } + shader->inst[0] |= (hwin << NV30_FP_OP_INPUT_SRC_SHIFT); + hwsrc |= (hwin << NV30_FP_REG_SRC_SHIFT); + } + hwsrc |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + break; + case NVS_FILE_CONST: + /* consts are inlined after the inst */ + hwsrc |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + break; + } + + if (reg->negate) + hwsrc |= NV30_FP_REG_NEGATE; + if (reg->abs) + shader->inst[1] |= (1 << (29+pos)); + hwsrc |= (reg->swizzle[NVS_SWZ_X] << NV30_FP_REG_SWZ_X_SHIFT); + hwsrc |= (reg->swizzle[NVS_SWZ_Y] << NV30_FP_REG_SWZ_Y_SHIFT); + hwsrc |= (reg->swizzle[NVS_SWZ_Z] << NV30_FP_REG_SWZ_Z_SHIFT); + hwsrc |= (reg->swizzle[NVS_SWZ_W] << NV30_FP_REG_SWZ_W_SHIFT); + + shader->inst[pos+1] |= hwsrc; +} + +static void +NV30FPSetUnusedSource(nvsFunc *shader, int pos) +{ + shader->inst[pos+1] |= ( + (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT) | + (NVS_SWZ_X << NV30_FP_REG_SWZ_X_SHIFT) | + (NVS_SWZ_Y << NV30_FP_REG_SWZ_Y_SHIFT) | + (NVS_SWZ_Z << NV30_FP_REG_SWZ_Z_SHIFT) | + (NVS_SWZ_W << NV30_FP_REG_SWZ_W_SHIFT) + ); +} + +static void +NV30FPSetTexImageUnit(nvsFunc *shader, int unit) +{ + shader->inst[0] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); +} + +static void +NV30FPSetSaturate(nvsFunc *shader) +{ + shader->inst[0] |= NV30_FP_OP_OUT_SAT; +} + +static void +NV30FPSetLastInst(nvsFunc *shader) +{ + shader->inst[0] |= 1; + +} + +/******************************************************************************* + * Disassembly helpers + */ +static struct _op_xlat * +NV30FPGetOPTXRec(nvsFunc * shader, int merged) +{ + int op; + + op = shader->GetOpcodeHW(shader, 0); + if (op > NVFP_TX_AOP_COUNT) + return NULL; + if (NVFP_TX_AOP[op].SOP == NVS_OP_UNKNOWN) + return NULL; + return &NVFP_TX_AOP[op]; +} + +static int +NV30FPHasMergedInst(nvsFunc * shader) +{ + return 0; +} + +static int +NV30FPIsLastInst(nvsFunc * shader) +{ + return ((shader->inst[0] & NV30_FP_OP_PROGRAM_END) ? 1 : 0); +} + +static int +NV30FPGetOffsetNext(nvsFunc * shader) +{ + int i; + + for (i = 0; i < 3; i++) + if (shader->GetSourceFile(shader, 0, i) == NVS_FILE_CONST) + return 8; + return 4; +} + +static nvsOpcode +NV30FPGetOpcode(nvsFunc * shader, int merged) +{ + struct _op_xlat *opr; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr) + return NVS_OP_UNKNOWN; + + return opr->SOP; +} + +static unsigned int +NV30FPGetOpcodeHW(nvsFunc * shader, int slot) +{ + int op; + + op = (shader->inst[0] & NV30_FP_OP_OPCODE_MASK) >> NV30_FP_OP_OPCODE_SHIFT; + + return op; +} + +static nvsRegFile +NV30FPGetDestFile(nvsFunc * shader, int merged) +{ + /* Result regs overlap temporary regs */ + return NVS_FILE_TEMP; +} + +static unsigned int +NV30FPGetDestID(nvsFunc * shader, int merged) +{ + int id; + + switch (shader->GetDestFile(shader, merged)) { + case NVS_FILE_TEMP: + id = ((shader->inst[0] & NV30_FP_OP_OUT_REG_MASK) + >> NV30_FP_OP_OUT_REG_SHIFT); + return id; + default: + return -1; + } +} + +static unsigned int +NV30FPGetDestMask(nvsFunc * shader, int merged) +{ + unsigned int mask = 0; + + if (shader->inst[0] & NV30_FP_OP_OUT_X) mask |= SMASK_X; + if (shader->inst[0] & NV30_FP_OP_OUT_Y) mask |= SMASK_Y; + if (shader->inst[0] & NV30_FP_OP_OUT_Z) mask |= SMASK_Z; + if (shader->inst[0] & NV30_FP_OP_OUT_W) mask |= SMASK_W; + + return mask; +} + +static unsigned int +NV30FPGetSourceHW(nvsFunc * shader, int merged, int pos) +{ + struct _op_xlat *opr; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1) + return -1; + + return shader->inst[opr->srcpos[pos] + 1]; +} + +static nvsRegFile +NV30FPGetSourceFile(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + struct _op_xlat *opr; + int file; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1) + return NVS_FILE_UNKNOWN; + + switch (opr->srcpos[pos]) { + case SPOS_ADDRESS: return NVS_FILE_ADDRESS; + default: + src = shader->GetSourceHW(shader, merged, pos); + file = (src & NV30_FP_REG_TYPE_MASK) >> NV30_FP_REG_TYPE_SHIFT; + + switch (file) { + case NV30_FP_REG_TYPE_TEMP : return NVS_FILE_TEMP; + case NV30_FP_REG_TYPE_INPUT: return NVS_FILE_ATTRIB; + case NV30_FP_REG_TYPE_CONST: return NVS_FILE_CONST; + default: + return NVS_FILE_UNKNOWN; + } + } +} + +static int +NV30FPGetSourceID(nvsFunc * shader, int merged, int pos) +{ + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_ATTRIB: + switch ((shader->inst[0] & NV30_FP_OP_INPUT_SRC_MASK) + >> NV30_FP_OP_INPUT_SRC_SHIFT) { + case NV30_FP_OP_INPUT_SRC_POSITION: return NVS_FR_POSITION; + case NV30_FP_OP_INPUT_SRC_COL0 : return NVS_FR_COL0; + case NV30_FP_OP_INPUT_SRC_COL1 : return NVS_FR_COL1; + case NV30_FP_OP_INPUT_SRC_FOGC : return NVS_FR_FOGCOORD; + case NV30_FP_OP_INPUT_SRC_TC(0) : return NVS_FR_TEXCOORD0; + case NV30_FP_OP_INPUT_SRC_TC(1) : return NVS_FR_TEXCOORD1; + case NV30_FP_OP_INPUT_SRC_TC(2) : return NVS_FR_TEXCOORD2; + case NV30_FP_OP_INPUT_SRC_TC(3) : return NVS_FR_TEXCOORD3; + case NV30_FP_OP_INPUT_SRC_TC(4) : return NVS_FR_TEXCOORD4; + case NV30_FP_OP_INPUT_SRC_TC(5) : return NVS_FR_TEXCOORD5; + case NV30_FP_OP_INPUT_SRC_TC(6) : return NVS_FR_TEXCOORD6; + case NV30_FP_OP_INPUT_SRC_TC(7) : return NVS_FR_TEXCOORD7; + default: + return -1; + } + break; + case NVS_FILE_TEMP: + { + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + return ((src & NV30_FP_REG_SRC_MASK) >> NV30_FP_REG_SRC_SHIFT); + } + case NVS_FILE_CONST: /* inlined into fragprog */ + default: + return -1; + } +} + +static int +NV30FPGetTexImageUnit(nvsFunc *shader) +{ + return ((shader->inst[0] & NV30_FP_OP_TEX_UNIT_MASK) + >> NV30_FP_OP_TEX_UNIT_SHIFT); +} + +static int +NV30FPGetSourceNegate(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + + if (src == -1) + return -1; + return ((src & NV30_FP_REG_NEGATE) ? 1 : 0); +} + +static int +NV30FPGetSourceAbs(nvsFunc * shader, int merged, int pos) +{ + struct _op_xlat *opr; + static unsigned int abspos[3] = { + NV30_FP_OP_OUT_ABS, + (1 << 30), /* guess */ + (1 << 31) /* guess */ + }; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1) + return -1; + + return ((shader->inst[1] & abspos[opr->srcpos[pos]]) ? 1 : 0); +} + +nvsSwzComp NV30FP_TX_SWIZZLE[4] = {NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; + +static void +NV30FPTXSwizzle(int hwswz, nvsSwzComp *swz) +{ + swz[NVS_SWZ_W] = NV30FP_TX_SWIZZLE[(hwswz & 0xC0) >> 6]; + swz[NVS_SWZ_Z] = NV30FP_TX_SWIZZLE[(hwswz & 0x30) >> 4]; + swz[NVS_SWZ_Y] = NV30FP_TX_SWIZZLE[(hwswz & 0x0C) >> 2]; + swz[NVS_SWZ_X] = NV30FP_TX_SWIZZLE[(hwswz & 0x03) >> 0]; +} + +static void +NV30FPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz) +{ + unsigned int src; + int swzbits; + + src = shader->GetSourceHW(shader, merged, pos); + swzbits = (src & NV30_FP_REG_SWZ_ALL_MASK) >> NV30_FP_REG_SWZ_ALL_SHIFT; + NV30FPTXSwizzle(swzbits, swz); +} + +static int +NV30FPGetSourceIndexed(nvsFunc * shader, int merged, int pos) +{ + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_ATTRIB: + return ((shader->inst[3] & NV30_FP_OP_INDEX_INPUT) ? 1 : 0); + default: + return 0; + } +} + +static void +NV30FPGetSourceConstVal(nvsFunc * shader, int merged, int pos, float *val) +{ + val[0] = *(float *) &(shader->inst[4]); + val[1] = *(float *) &(shader->inst[5]); + val[2] = *(float *) &(shader->inst[6]); + val[3] = *(float *) &(shader->inst[7]); +} + +static int +NV30FPGetSourceScale(nvsFunc * shader, int merged, int pos) +{ +/*FIXME: is this per-source, only for a specific source, or all sources??*/ + return (1 << ((shader->inst[2] & NV30_FP_OP_SRC_SCALE_MASK) + >> NV30_FP_OP_SRC_SCALE_SHIFT)); +} + +static int +NV30FPGetAddressRegID(nvsFunc * shader) +{ + return 0; +} + +static nvsSwzComp +NV30FPGetAddressRegSwizzle(nvsFunc * shader) +{ + return NVS_SWZ_X; +} + +static int +NV30FPSupportsConditional(nvsFunc * shader) +{ + /*FIXME: Is this true of all ops? */ + return 1; +} + +static int +NV30FPGetConditionUpdate(nvsFunc * shader) +{ + return ((shader->inst[0] & NV30_FP_OP_COND_WRITE_ENABLE) ? 1 : 0); +} + +static int +NV30FPGetConditionTest(nvsFunc * shader) +{ + /*FIXME: always? */ + return 1; +} + +static nvsCond +NV30FPGetCondition(nvsFunc * shader) +{ + int cond; + + cond = ((shader->inst[1] & NV30_FP_OP_COND_MASK) + >> NV30_FP_OP_COND_SHIFT); + + switch (cond) { + case NV30_FP_OP_COND_FL: return NVS_COND_FL; + case NV30_FP_OP_COND_LT: return NVS_COND_LT; + case NV30_FP_OP_COND_EQ: return NVS_COND_EQ; + case NV30_FP_OP_COND_LE: return NVS_COND_LE; + case NV30_FP_OP_COND_GT: return NVS_COND_GT; + case NV30_FP_OP_COND_NE: return NVS_COND_NE; + case NV30_FP_OP_COND_GE: return NVS_COND_GE; + case NV30_FP_OP_COND_TR: return NVS_COND_TR; + default: + return NVS_COND_UNKNOWN; + } +} + +static void +NV30FPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz) +{ + int swzbits; + + swzbits = (shader->inst[1] & NV30_FP_OP_COND_SWZ_ALL_MASK) + >> NV30_FP_OP_COND_SWZ_ALL_SHIFT; + NV30FPTXSwizzle(swzbits, swz); +} + +static int +NV30FPGetCondRegID(nvsFunc * shader) +{ + return 0; +} + +static nvsPrecision +NV30FPGetPrecision(nvsFunc * shader) +{ + int p; + + p = (shader->inst[0] & NV30_FP_OP_PRECISION_MASK) + >> NV30_FP_OP_PRECISION_SHIFT; + + switch (p) { + case NV30_FP_PRECISION_FP32: return NVS_PREC_FLOAT32; + case NV30_FP_PRECISION_FP16: return NVS_PREC_FLOAT16; + case NV30_FP_PRECISION_FX12: return NVS_PREC_FIXED12; + default: + return NVS_PREC_UNKNOWN; + } +} + +static int +NV30FPGetSaturate(nvsFunc * shader) +{ + return ((shader->inst[0] & NV30_FP_OP_OUT_SAT) ? 1 : 0); +} + +/******************************************************************************* + * Init + */ +void +NV30FPInitShaderFuncs(nvsFunc * shader) +{ + /* These are probably bogus, I made them up... */ + shader->MaxInst = 1024; + shader->MaxAttrib = 16; + shader->MaxTemp = 32; + shader->MaxAddress = 1; + shader->MaxConst = 256; + shader->caps = SCAP_SRC_ABS; + + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MOV, NVS_OP_MOV, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MUL, NVS_OP_MUL, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_ADD, NVS_OP_ADD, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MAD, NVS_OP_MAD, 0, 1, 2); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DP3, NVS_OP_DP3, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DP4, NVS_OP_DP4, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DST, NVS_OP_DST, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MIN, NVS_OP_MIN, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_MAX, NVS_OP_MAX, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SLT, NVS_OP_SLT, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SGE, NVS_OP_SGE, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_FRC, NVS_OP_FRC, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_FLR, NVS_OP_FLR, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TEX, NVS_OP_TEX, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXD, NVS_OP_TXD, 0, 1, 2); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXP, NVS_OP_TXP, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_TXB, NVS_OP_TXB, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SEQ, NVS_OP_SEQ, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SGT, NVS_OP_SGT, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SLE, NVS_OP_SLE, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SNE, NVS_OP_SNE, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RCP, NVS_OP_RCP, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LG2, NVS_OP_LG2, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_EX2, NVS_OP_EX2, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_COS, NVS_OP_COS, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_SIN, NVS_OP_SIN, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_NOP, NVS_OP_NOP, -1, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DDX, NVS_OP_DDX, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_DDY, NVS_OP_DDY, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_KIL, NVS_OP_KIL, -1, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK4B, NVS_OP_PK4B, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP4B, NVS_OP_UP4B, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK2H, NVS_OP_PK2H, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP2H, NVS_OP_UP2H, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK4UB, NVS_OP_PK4UB, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP4UB, NVS_OP_UP4UB, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_PK2US, NVS_OP_PK2US, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_UP2US, NVS_OP_UP2US, 0, -1, -1); + /*FIXME: Haven't confirmed the source positions for the below opcodes */ + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LIT, NVS_OP_LIT, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_LRP, NVS_OP_LRP, 0, 1, 2); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_POW, NVS_OP_POW, 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RSQ, NVS_OP_RSQ, 0, -1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV30_FP_OP_OPCODE_RFL, NVS_OP_RFL, 0, 1, -1); + + shader->GetOPTXRec = NV30FPGetOPTXRec; + shader->GetOPTXFromSOP = NV30FPGetOPTXFromSOP; + + shader->UploadToHW = NV30FPUploadToHW; + shader->UpdateConst = NV30FPUpdateConst; + + shader->SupportsOpcode = NV30FPSupportsOpcode; + shader->SetOpcode = NV30FPSetOpcode; + shader->SetCCUpdate = NV30FPSetCCUpdate; + shader->SetCondition = NV30FPSetCondition; + shader->SetResult = NV30FPSetResult; + shader->SetSource = NV30FPSetSource; + shader->SetUnusedSource = NV30FPSetUnusedSource; + shader->SetTexImageUnit = NV30FPSetTexImageUnit; + shader->SetSaturate = NV30FPSetSaturate; + shader->SetLastInst = NV30FPSetLastInst; + + shader->HasMergedInst = NV30FPHasMergedInst; + shader->IsLastInst = NV30FPIsLastInst; + shader->GetOffsetNext = NV30FPGetOffsetNext; + shader->GetOpcode = NV30FPGetOpcode; + shader->GetOpcodeHW = NV30FPGetOpcodeHW; + shader->GetDestFile = NV30FPGetDestFile; + shader->GetDestID = NV30FPGetDestID; + shader->GetDestMask = NV30FPGetDestMask; + shader->GetSourceHW = NV30FPGetSourceHW; + shader->GetSourceFile = NV30FPGetSourceFile; + shader->GetSourceID = NV30FPGetSourceID; + shader->GetTexImageUnit = NV30FPGetTexImageUnit; + shader->GetSourceNegate = NV30FPGetSourceNegate; + shader->GetSourceAbs = NV30FPGetSourceAbs; + shader->GetSourceSwizzle = NV30FPGetSourceSwizzle; + shader->GetSourceIndexed = NV30FPGetSourceIndexed; + shader->GetSourceConstVal = NV30FPGetSourceConstVal; + shader->GetSourceScale = NV30FPGetSourceScale; + shader->GetRelAddressRegID = NV30FPGetAddressRegID; + shader->GetRelAddressSwizzle = NV30FPGetAddressRegSwizzle; + shader->GetPrecision = NV30FPGetPrecision; + shader->GetSaturate = NV30FPGetSaturate; + shader->SupportsConditional = NV30FPSupportsConditional; + shader->GetConditionUpdate = NV30FPGetConditionUpdate; + shader->GetConditionTest = NV30FPGetConditionTest; + shader->GetCondition = NV30FPGetCondition; + shader->GetCondRegSwizzle = NV30FPGetCondRegSwizzle; + shader->GetCondRegID = NV30FPGetCondRegID; +} diff --git a/src/mesa/drivers/dri/nouveau/nv30_shader.h b/src/mesa/drivers/dri/nouveau/nv30_shader.h new file mode 100644 index 0000000000..d0bf639930 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv30_shader.h @@ -0,0 +1,378 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * there are only two address registers available. The destination in the ARL + * instruction is set to TEMP (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the available + * ADDRESS regs into the first hardware reg in the X and Y components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular component + * of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * Conditional execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with COND_{FL,LT,...}. + * It is possible to swizzle the values in the condition register, which allows for + * testing against an individual component. + * + * Branching + * The BRA/CAL instructions seem to follow a slightly different opcode layout. The + * destination instruction ID (IADDR) overlaps a source field. Instruction ID's seem to + * be numbered based on the UPLOAD_FROM_ID FIFO command, and is incremented automatically + * on each UPLOAD_INST FIFO command. + * + * Conditional branching is achieved by using the condition tests described above. + * There doesn't appear to be dedicated looping instructions, but this can be done + * using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first executed + * instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_OUT_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +# define NV30_VP_INST_COND_FL 0 /* guess */ +# define NV30_VP_INST_COND_LT 1 +# define NV30_VP_INST_COND_EQ 2 +# define NV30_VP_INST_COND_LE 3 +# define NV30_VP_INST_COND_GT 4 +# define NV30_VP_INST_COND_NE 5 +# define NV30_VP_INST_COND_GE 6 +# define NV30_VP_INST_COND_TR 7 /* guess */ +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +# define NV30_VP_INST_OP_NOP 0x00 +# define NV30_VP_INST_OP_RCP 0x02 +# define NV30_VP_INST_OP_RCC 0x03 +# define NV30_VP_INST_OP_RSQ 0x04 +# define NV30_VP_INST_OP_EXP 0x05 +# define NV30_VP_INST_OP_LOG 0x06 +# define NV30_VP_INST_OP_LIT 0x07 +# define NV30_VP_INST_OP_BRA 0x09 +# define NV30_VP_INST_OP_CAL 0x0B +# define NV30_VP_INST_OP_RET 0x0C +# define NV30_VP_INST_OP_LG2 0x0D +# define NV30_VP_INST_OP_EX2 0x0E +# define NV30_VP_INST_OP_SIN 0x0F +# define NV30_VP_INST_OP_COS 0x10 +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +# define NV30_VP_INST_OP_NOPV 0x00 +# define NV30_VP_INST_OP_MOV 0x01 +# define NV30_VP_INST_OP_MUL 0x02 +# define NV30_VP_INST_OP_ADD 0x03 +# define NV30_VP_INST_OP_MAD 0x04 +# define NV30_VP_INST_OP_DP3 0x05 +# define NV30_VP_INST_OP_DP4 0x07 +# define NV30_VP_INST_OP_DPH 0x06 +# define NV30_VP_INST_OP_DST 0x08 +# define NV30_VP_INST_OP_MIN 0x09 +# define NV30_VP_INST_OP_MAX 0x0A +# define NV30_VP_INST_OP_SLT 0x0B +# define NV30_VP_INST_OP_SGE 0x0C +# define NV30_VP_INST_OP_ARL 0x0D +# define NV30_VP_INST_OP_FRC 0x0E +# define NV30_VP_INST_OP_FLR 0x0F +# define NV30_VP_INST_OP_SEQ 0x10 +# define NV30_VP_INST_OP_SFL 0x11 +# define NV30_VP_INST_OP_SGT 0x12 +# define NV30_VP_INST_OP_SLE 0x13 +# define NV30_VP_INST_OP_SNE 0x14 +# define NV30_VP_INST_OP_STR 0x15 +# define NV30_VP_INST_OP_SSG 0x16 +# define NV30_VP_INST_OP_ARR 0x17 +# define NV30_VP_INST_OP_ARA 0x18 +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +# define NV30_VP_INST_IN_NORMAL 2 +# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +# define NV30_VP_INST_IN_COL1 4 +# define NV30_VP_INST_IN_FOGC 5 +# define NV30_VP_INST_IN_TC0 8 +# define NV30_VP_INST_IN_TC(n) (8+n) +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* DWORD 2 */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /*NV20*/ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /*NV20*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xFF << 2) /* guess */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_ID_SHIFT 2 +#define NV30_VP_INST_DEST_ID_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_REG_NEGATE (1<<14) +#define NV30_VP_SRC_REG_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_REG_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_REG_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_REG_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_REG_TEMP_ID_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + */ + +//== Opcode / Destination selection == +#define NV30_FP_OP_PROGRAM_END (1 << 0) +#define NV30_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +/* Needs to be set when writing outputs to get expected result.. */ +#define NV30_FP_OP_UNK0_7 (1 << 7) +#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV30_FP_OP_OUTMASK_SHIFT 9 +#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV30_FP_OP_OUT_X (1<<9) +# define NV30_FP_OP_OUT_Y (1<<10) +# define NV30_FP_OP_OUT_Z (1<<11) +# define NV30_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV30_FP_OP_INPUT_SRC_SHIFT 13 +#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV30_FP_OP_INPUT_SRC_COL0 0x1 +# define NV30_FP_OP_INPUT_SRC_COL1 0x2 +# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV30_FP_OP_INPUT_SRC_TC0 0x4 +# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +#define NV30_FP_OP_TEX_UNIT_SHIFT 17 +#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NV30_FP_OP_PRECISION_SHIFT 22 +#define NV30_FP_OP_PRECISION_MASK (3 << 22) +# define NV30_FP_PRECISION_FP32 0 +# define NV30_FP_PRECISION_FP16 1 +# define NV30_FP_PRECISION_FX12 2 +#define NV30_FP_OP_OPCODE_SHIFT 24 +#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV30_FP_OP_OPCODE_NOP 0x00 +# define NV30_FP_OP_OPCODE_MOV 0x01 +# define NV30_FP_OP_OPCODE_MUL 0x02 +# define NV30_FP_OP_OPCODE_ADD 0x03 +# define NV30_FP_OP_OPCODE_MAD 0x04 +# define NV30_FP_OP_OPCODE_DP3 0x05 +# define NV30_FP_OP_OPCODE_DP4 0x06 +# define NV30_FP_OP_OPCODE_DST 0x07 +# define NV30_FP_OP_OPCODE_MIN 0x08 +# define NV30_FP_OP_OPCODE_MAX 0x09 +# define NV30_FP_OP_OPCODE_SLT 0x0A +# define NV30_FP_OP_OPCODE_SGE 0x0B +# define NV30_FP_OP_OPCODE_SLE 0x0C +# define NV30_FP_OP_OPCODE_SGT 0x0D +# define NV30_FP_OP_OPCODE_SNE 0x0E +# define NV30_FP_OP_OPCODE_SEQ 0x0F +# define NV30_FP_OP_OPCODE_FRC 0x10 +# define NV30_FP_OP_OPCODE_FLR 0x11 +# define NV30_FP_OP_OPCODE_KIL 0x12 +# define NV30_FP_OP_OPCODE_PK4B 0x13 +# define NV30_FP_OP_OPCODE_UP4B 0x14 +# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +# define NV30_FP_OP_OPCODE_TEX 0x17 +# define NV30_FP_OP_OPCODE_TXP 0x18 +# define NV30_FP_OP_OPCODE_TXD 0x19 +# define NV30_FP_OP_OPCODE_RCP 0x1A +# define NV30_FP_OP_OPCODE_RSQ 0x1B +# define NV30_FP_OP_OPCODE_EX2 0x1C +# define NV30_FP_OP_OPCODE_LG2 0x1D +# define NV30_FP_OP_OPCODE_LIT 0x1E +# define NV30_FP_OP_OPCODE_LRP 0x1F +# define NV30_FP_OP_OPCODE_COS 0x22 +# define NV30_FP_OP_OPCODE_SIN 0x23 +# define NV30_FP_OP_OPCODE_PK2H 0x24 +# define NV30_FP_OP_OPCODE_UP2H 0x25 +# define NV30_FP_OP_OPCODE_POW 0x26 +# define NV30_FP_OP_OPCODE_PK4UB 0x27 +# define NV30_FP_OP_OPCODE_UP4UB 0x28 +# define NV30_FP_OP_OPCODE_PK2US 0x29 +# define NV30_FP_OP_OPCODE_UP2US 0x2A +# define NV30_FP_OP_OPCODE_DP2A 0x2E +# define NV30_FP_OP_OPCODE_TXB 0x31 +# define NV30_FP_OP_OPCODE_RFL 0x36 +#define NV30_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV30_FP_OP_OUT_ABS (1 << 29) +#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV30_FP_OP_COND_SHIFT 18 +#define NV30_FP_OP_COND_MASK (0x07 << 18) +# define NV30_FP_OP_COND_FL 0 +# define NV30_FP_OP_COND_LT 1 +# define NV30_FP_OP_COND_EQ 2 +# define NV30_FP_OP_COND_LE 3 +# define NV30_FP_OP_COND_GT 4 +# define NV30_FP_OP_COND_NE 5 +# define NV30_FP_OP_COND_GE 6 +# define NV30_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV30_FP_OP_SRC_SCALE_SHIFT 28 +#define NV30_FP_OP_SRC_SCALE_MASK (3 << 28) + +/* high order bits of SRC2 */ +#define NV30_FP_OP_INDEX_INPUT (1 << 30) + +//== Register selection == +#define NV30_FP_REG_TYPE_SHIFT 0 +#define NV30_FP_REG_TYPE_MASK (3 << 0) +# define NV30_FP_REG_TYPE_TEMP 0 +# define NV30_FP_REG_TYPE_INPUT 1 +# define NV30_FP_REG_TYPE_CONST 2 +#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV30_FP_REG_UNK_0 (1 << 8) +#define NV30_FP_REG_SWZ_ALL_SHIFT 9 +#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV30_FP_REG_SWZ_X_SHIFT 9 +#define NV30_FP_REG_SWZ_X_MASK (3 << 9) +#define NV30_FP_REG_SWZ_Y_SHIFT 11 +#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV30_FP_REG_SWZ_Z_SHIFT 13 +#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV30_FP_REG_SWZ_W_SHIFT 15 +#define NV30_FP_REG_SWZ_W_MASK (3 << 15) +# define NV30_FP_SWIZZLE_X 0 +# define NV30_FP_SWIZZLE_Y 1 +# define NV30_FP_SWIZZLE_Z 2 +# define NV30_FP_SWIZZLE_W 3 +#define NV30_FP_REG_NEGATE (1 << 17) + +#endif diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index 851641c0c9..3ffb5d3a41 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -327,7 +327,7 @@ static void nv30Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *params) switch(pname) { case GL_FOG_MODE: - BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FOG_MODE, 1); + //BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FOG_MODE, 1); //OUT_RING_CACHE (params); break; /* TODO: unsure about the rest.*/ diff --git a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c new file mode 100644 index 0000000000..e60422dad1 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c @@ -0,0 +1,356 @@ +#include "nouveau_context.h" +#include "nouveau_object.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" + +#include "nouveau_shader.h" +#include "nv30_shader.h" + +extern nvsSwzComp NV20VP_TX_SWIZZLE[4]; +extern void NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz); + +/***************************************************************************** + * Support routines + */ +static void +NV30VPUploadToHW(GLcontext *ctx, nouveauShader *nvs) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + int i; + + /* We can do better here and keep more than one VP on the hardware, and + * switch between them with PROGRAM_START_ID.. + */ + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_FROM_ID, 1); + OUT_RING(0); + for (i=0; iprogram_size; i+=4) { + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0, 4); + OUT_RING(nvs->program[i + 0]); + OUT_RING(nvs->program[i + 1]); + OUT_RING(nvs->program[i + 2]); + OUT_RING(nvs->program[i + 3]); + } + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1); + OUT_RING(0); +} + +static void +NV30VPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + GLfloat *val; + + val = nvs->params[id].source_val ? + nvs->params[id].source_val : nvs->params[id].val; + + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_CONST_ID, 5); + OUT_RING (id); + OUT_RINGp(val, 4); +} + +/***************************************************************************** + * Assembly routines + */ + +/***************************************************************************** + * Disassembly routines + */ +static unsigned int +NV30VPGetOpcodeHW(nvsFunc * shader, int slot) +{ + int op; + + if (slot) { + op = (shader->inst[1] & NV30_VP_INST_SCA_OPCODEL_MASK) + >> NV30_VP_INST_SCA_OPCODEL_SHIFT; + op |= ((shader->inst[0] & NV30_VP_INST_SCA_OPCODEH_MASK) + >> NV30_VP_INST_SCA_OPCODEH_SHIFT) << 4; + } + else { + op = (shader->inst[1] & NV30_VP_INST_VEC_OPCODE_MASK) + >> NV30_VP_INST_VEC_OPCODE_SHIFT; + } + + return op; +} + +static nvsRegFile +NV30VPGetDestFile(nvsFunc * shader, int merged) +{ + switch (shader->GetOpcode(shader, merged)) { + case NVS_OP_ARL: + case NVS_OP_ARR: + case NVS_OP_ARA: + return NVS_FILE_ADDRESS; + default: + /*FIXME: This probably isn't correct.. */ + if ((shader->inst[3] & NV30_VP_INST_VDEST_WRITEMASK_MASK) != 0) + return NVS_FILE_RESULT; + if ((shader->inst[3] & NV30_VP_INST_SDEST_WRITEMASK_MASK) != 0) + return NVS_FILE_RESULT; + return NVS_FILE_TEMP; + } +} + +static unsigned int +NV30VPGetDestID(nvsFunc * shader, int merged) +{ + int id; + + switch (shader->GetDestFile(shader, merged)) { + case NVS_FILE_RESULT: + id = ((shader->inst[3] & NV30_VP_INST_DEST_ID_MASK) + >> NV30_VP_INST_DEST_ID_SHIFT); + switch (id) { + case NV30_VP_INST_DEST_POS : return NVS_FR_POSITION; + case NV30_VP_INST_DEST_COL0 : return NVS_FR_COL0; + case NV30_VP_INST_DEST_COL1 : return NVS_FR_COL1; + case NV30_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0; + case NV30_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1; + case NV30_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2; + case NV30_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3; + case NV30_VP_INST_DEST_TC(4): return NVS_FR_TEXCOORD4; + case NV30_VP_INST_DEST_TC(5): return NVS_FR_TEXCOORD5; + case NV30_VP_INST_DEST_TC(6): return NVS_FR_TEXCOORD6; + case NV30_VP_INST_DEST_TC(7): return NVS_FR_TEXCOORD7; + default: + return -1; + } + case NVS_FILE_ADDRESS: + case NVS_FILE_TEMP: + return (shader->inst[0] & NV30_VP_INST_DEST_TEMP_ID_MASK) + >> NV30_VP_INST_DEST_TEMP_ID_SHIFT; + default: + return -1; + } +} + +static unsigned int +NV30VPGetDestMask(nvsFunc * shader, int merged) +{ + int hwmask, mask = 0; + + if (shader->GetDestFile(shader, merged) == NVS_FILE_RESULT) + if (shader->GetOpcodeSlot(shader, merged)) + hwmask = (shader->inst[3] & NV30_VP_INST_SDEST_WRITEMASK_MASK) + >> NV30_VP_INST_SDEST_WRITEMASK_SHIFT; + else + hwmask = (shader->inst[3] & NV30_VP_INST_VDEST_WRITEMASK_MASK) + >> NV30_VP_INST_VDEST_WRITEMASK_SHIFT; + else if (shader->GetOpcodeSlot(shader, merged)) + hwmask = (shader->inst[3] & NV30_VP_INST_STEMP_WRITEMASK_MASK) + >> NV30_VP_INST_STEMP_WRITEMASK_SHIFT; + else + hwmask = (shader->inst[3] & NV30_VP_INST_VTEMP_WRITEMASK_MASK) + >> NV30_VP_INST_VTEMP_WRITEMASK_SHIFT; + + if (hwmask & (1 << 3)) mask |= SMASK_X; + if (hwmask & (1 << 2)) mask |= SMASK_Y; + if (hwmask & (1 << 1)) mask |= SMASK_Z; + if (hwmask & (1 << 0)) mask |= SMASK_W; + + return mask; +} + +static int +NV30VPGetSourceID(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_TEMP: + src = shader->GetSourceHW(shader, merged, pos); + return ((src & NV30_VP_SRC_REG_TEMP_ID_MASK) >> + NV30_VP_SRC_REG_TEMP_ID_SHIFT); + case NVS_FILE_CONST: + return ((shader->inst[1] & NV30_VP_INST_CONST_SRC_MASK) + >> NV30_VP_INST_CONST_SRC_SHIFT); + case NVS_FILE_ATTRIB: + src = ((shader->inst[1] & NV30_VP_INST_INPUT_SRC_MASK) + >> NV30_VP_INST_INPUT_SRC_SHIFT); + switch (src) { + case NV30_VP_INST_IN_POS : return NVS_FR_POSITION; + case NV30_VP_INST_IN_COL0 : return NVS_FR_COL0; + case NV30_VP_INST_IN_COL1 : return NVS_FR_COL1; + case NV30_VP_INST_IN_TC(0): return NVS_FR_TEXCOORD0; + case NV30_VP_INST_IN_TC(1): return NVS_FR_TEXCOORD1; + case NV30_VP_INST_IN_TC(2): return NVS_FR_TEXCOORD2; + case NV30_VP_INST_IN_TC(3): return NVS_FR_TEXCOORD3; + case NV30_VP_INST_IN_TC(4): return NVS_FR_TEXCOORD4; + case NV30_VP_INST_IN_TC(5): return NVS_FR_TEXCOORD5; + case NV30_VP_INST_IN_TC(6): return NVS_FR_TEXCOORD6; + case NV30_VP_INST_IN_TC(7): return NVS_FR_TEXCOORD7; + default: + return NVS_FR_UNKNOWN; + } + default: + return -1; + } +} + +static int +NV30VPGetSourceAbs(nvsFunc * shader, int merged, int pos) +{ + struct _op_xlat *opr; + static unsigned int abspos[3] = { + NV30_VP_INST_SRC0_ABS, + NV30_VP_INST_SRC1_ABS, + NV30_VP_INST_SRC2_ABS, + }; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1 || opr->srcpos[pos] > 2) + return 0; + + return ((shader->inst[0] & abspos[opr->srcpos[pos]]) ? 1 : 0); +} + +static int +NV30VPGetRelAddressRegID(nvsFunc * shader) +{ + return ((shader->inst[0] & NV30_VP_INST_ADDR_REG_SELECT_1) ? 1 : 0); +} + +static nvsSwzComp +NV30VPGetRelAddressSwizzle(nvsFunc * shader) +{ + nvsSwzComp swz; + + swz = NV20VP_TX_SWIZZLE[(shader->inst[0] & NV30_VP_INST_ADDR_SWZ_MASK) + >> NV30_VP_INST_ADDR_SWZ_SHIFT]; + return swz; +} + +static int +NV30VPSupportsConditional(nvsFunc * shader) +{ + /*FIXME: Is this true of all ops? */ + return 1; +} + +static int +NV30VPGetConditionUpdate(nvsFunc * shader) +{ + return ((shader->inst[0] & NV30_VP_INST_COND_UPDATE_ENABLE) ? 1 : 0); +} + +static int +NV30VPGetConditionTest(nvsFunc * shader) +{ + int op; + + /* The condition test is unconditionally enabled on some + * instructions. ie: the condition test bit does *NOT* have + * to be set. + * + * FIXME: check other relevant ops for this situation. + */ + op = shader->GetOpcodeHW(shader, 1); + switch (op) { + case NV30_VP_INST_OP_BRA: + return 1; + default: + return ((shader->inst[0] & NV30_VP_INST_COND_TEST_ENABLE) ? 1 : 0); + } +} + +static nvsCond +NV30VPGetCondition(nvsFunc * shader) +{ + int cond; + + cond = ((shader->inst[0] & NV30_VP_INST_COND_MASK) + >> NV30_VP_INST_COND_SHIFT); + + switch (cond) { + case NV30_VP_INST_COND_FL: return NVS_COND_FL; + case NV30_VP_INST_COND_LT: return NVS_COND_LT; + case NV30_VP_INST_COND_EQ: return NVS_COND_EQ; + case NV30_VP_INST_COND_LE: return NVS_COND_LE; + case NV30_VP_INST_COND_GT: return NVS_COND_GT; + case NV30_VP_INST_COND_NE: return NVS_COND_NE; + case NV30_VP_INST_COND_GE: return NVS_COND_GE; + case NV30_VP_INST_COND_TR: return NVS_COND_TR; + default: + return NVS_COND_UNKNOWN; + } +} + +static void +NV30VPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz) +{ + int swzbits; + + swzbits = (shader->inst[0] & NV30_VP_INST_COND_SWZ_ALL_MASK) + >> NV30_VP_INST_COND_SWZ_ALL_SHIFT; + NV20VPTXSwizzle(swzbits, swz); +} + +static int +NV30VPGetCondRegID(nvsFunc * shader) +{ + return 0; +} + + +static int +NV30VPGetBranch(nvsFunc * shader) +{ + return ((shader->inst[2] & NV30_VP_INST_IADDR_MASK) + >> NV30_VP_INST_IADDR_SHIFT); +} + +void +NV30VPInitShaderFuncs(nvsFunc * shader) +{ + /* Inherit NV20 code, a lot of it is the same */ + NV20VPInitShaderFuncs(shader); + + /* Increase max valid opcode ID, and add new instructions */ + NVVP_TX_VOP_COUNT = NVVP_TX_NVS_OP_COUNT = 32; + + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_FRC, NVS_OP_FRC, 0, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_FLR, NVS_OP_FLR, 0, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SEQ, NVS_OP_SEQ, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SFL, NVS_OP_SFL, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SGT, NVS_OP_SGT, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SLE, NVS_OP_SLE, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SNE, NVS_OP_SNE, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_STR, NVS_OP_STR, 0, 1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_SSG, NVS_OP_SSG, 0, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_ARR, NVS_OP_ARR, 0, -1, -1); + MOD_OPCODE(NVVP_TX_VOP, NV30_VP_INST_OP_ARA, NVS_OP_ARA, 3, -1, -1); + + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_BRA, NVS_OP_BRA, -1, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_CAL, NVS_OP_CAL, -1, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_RET, NVS_OP_RET, -1, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_LG2, NVS_OP_LG2, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_EX2, NVS_OP_EX2, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_SIN, NVS_OP_SIN, 2, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV30_VP_INST_OP_COS, NVS_OP_COS, 2, -1, -1); + + shader->UploadToHW = NV30VPUploadToHW; + shader->UpdateConst = NV30VPUpdateConst; + + shader->GetOpcodeHW = NV30VPGetOpcodeHW; + + shader->GetDestFile = NV30VPGetDestFile; + shader->GetDestID = NV30VPGetDestID; + shader->GetDestMask = NV30VPGetDestMask; + + shader->GetSourceID = NV30VPGetSourceID; + shader->GetSourceAbs = NV30VPGetSourceAbs; + + shader->GetRelAddressRegID = NV30VPGetRelAddressRegID; + shader->GetRelAddressSwizzle = NV30VPGetRelAddressSwizzle; + + shader->SupportsConditional = NV30VPSupportsConditional; + shader->GetConditionUpdate = NV30VPGetConditionUpdate; + shader->GetConditionTest = NV30VPGetConditionTest; + shader->GetCondition = NV30VPGetCondition; + shader->GetCondRegSwizzle = NV30VPGetCondRegSwizzle; + shader->GetCondRegID = NV30VPGetCondRegID; + + shader->GetBranch = NV30VPGetBranch; +} + diff --git a/src/mesa/drivers/dri/nouveau/nv40_fragprog.c b/src/mesa/drivers/dri/nouveau/nv40_fragprog.c new file mode 100644 index 0000000000..3d58d6b666 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv40_fragprog.c @@ -0,0 +1,152 @@ +#include "nouveau_shader.h" +#include "nv40_shader.h" + +/* branching ops */ +unsigned int NVFP_TX_BOP_COUNT = 5; +struct _op_xlat NVFP_TX_BOP[64]; + +static struct _op_xlat * +NV40FPGetOPTXRec(nvsFunc * shader, int merged) +{ + struct _op_xlat *opr; + int op; + + op = shader->GetOpcodeHW(shader, 0); + if (shader->inst[2] & NV40_FP_OP_OPCODE_IS_BRANCH) { + opr = NVFP_TX_BOP; + op &= ~NV40_FP_OP_OPCODE_IS_BRANCH; + if (op > NVFP_TX_BOP_COUNT) + return NULL; + } + else { + opr = NVFP_TX_AOP; + if (op > NVFP_TX_AOP_COUNT) + return NULL; + } + + if (opr[op].SOP == NVS_OP_UNKNOWN) + return NULL; + return &opr[op]; +} + +static int +NV40FPGetSourceID(nvsFunc * shader, int merged, int pos) +{ + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_ATTRIB: + switch ((shader->inst[0] & NV40_FP_OP_INPUT_SRC_MASK) + >> NV40_FP_OP_INPUT_SRC_SHIFT) { + case NV40_FP_OP_INPUT_SRC_POSITION: return NVS_FR_POSITION; + case NV40_FP_OP_INPUT_SRC_COL0 : return NVS_FR_COL0; + case NV40_FP_OP_INPUT_SRC_COL1 : return NVS_FR_COL1; + case NV40_FP_OP_INPUT_SRC_FOGC : return NVS_FR_FOGCOORD; + case NV40_FP_OP_INPUT_SRC_TC(0) : return NVS_FR_TEXCOORD0; + case NV40_FP_OP_INPUT_SRC_TC(1) : return NVS_FR_TEXCOORD1; + case NV40_FP_OP_INPUT_SRC_TC(2) : return NVS_FR_TEXCOORD2; + case NV40_FP_OP_INPUT_SRC_TC(3) : return NVS_FR_TEXCOORD3; + case NV40_FP_OP_INPUT_SRC_TC(4) : return NVS_FR_TEXCOORD4; + case NV40_FP_OP_INPUT_SRC_TC(5) : return NVS_FR_TEXCOORD5; + case NV40_FP_OP_INPUT_SRC_TC(6) : return NVS_FR_TEXCOORD6; + case NV40_FP_OP_INPUT_SRC_TC(7) : return NVS_FR_TEXCOORD7; + case NV40_FP_OP_INPUT_SRC_FACING : return NVS_FR_FACING; + default: + return -1; + } + break; + case NVS_FILE_TEMP: + { + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + return ((src & NV40_FP_REG_SRC_MASK) >> NV40_FP_REG_SRC_SHIFT); + } + case NVS_FILE_CONST: /* inlined into fragprog */ + default: + return -1; + } +} + +static int +NV40FPGetBranch(nvsFunc * shader) +{ + return ((shader->inst[2] & NV40_FP_OP_IADDR_MASK) + >> NV40_FP_OP_IADDR_SHIFT);; +} + +static int +NV40FPGetBranchElse(nvsFunc * shader) +{ + return ((shader->inst[2] & NV40_FP_OP_ELSE_ID_MASK) + >> NV40_FP_OP_ELSE_ID_SHIFT); +} + +static int +NV40FPGetBranchEnd(nvsFunc * shader) +{ + return ((shader->inst[3] & NV40_FP_OP_END_ID_MASK) + >> NV40_FP_OP_END_ID_SHIFT); +} + +static int +NV40FPGetLoopCount(nvsFunc * shader) +{ + return ((shader->inst[2] & NV40_FP_OP_LOOP_COUNT_MASK) + >> NV40_FP_OP_LOOP_COUNT_SHIFT); +} + +static int +NV40FPGetLoopInitial(nvsFunc * shader) +{ + return ((shader->inst[2] & NV40_FP_OP_LOOP_INDEX_MASK) + >> NV40_FP_OP_LOOP_INDEX_SHIFT); +} + +static int +NV40FPGetLoopIncrement(nvsFunc * shader) +{ + return ((shader->inst[2] & NV40_FP_OP_LOOP_INCR_MASK) + >> NV40_FP_OP_LOOP_INCR_SHIFT); +} + +void +NV40FPInitShaderFuncs(nvsFunc * shader) +{ + /* Inherit NV30 FP code, it's mostly the same */ + NV30FPInitShaderFuncs(shader); + + /* Kill off opcodes seen on NV30, but not seen on NV40 - need to find + * out if these actually work or not. + * + * update: either LIT/RSQ don't work on nv40, or I generate bad code for + * them. haven't tested the others yet + */ + MOD_OPCODE(NVFP_TX_AOP, 0x1B, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 RSQ */ + MOD_OPCODE(NVFP_TX_AOP, 0x1E, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 LIT */ + MOD_OPCODE(NVFP_TX_AOP, 0x1F, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 LRP */ + MOD_OPCODE(NVFP_TX_AOP, 0x26, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 POW */ + MOD_OPCODE(NVFP_TX_AOP, 0x36, NVS_OP_UNKNOWN, -1, -1, -1); /* NV30 RFL */ + + /* Extra opcodes supported on NV40 */ + MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_DIV , NVS_OP_DIV , 0, 1, -1); + MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_DP2A , NVS_OP_DP2A, 0, 1, 2); + MOD_OPCODE(NVFP_TX_AOP, NV40_FP_OP_OPCODE_TXL , NVS_OP_TXL , 0, -1, -1); + + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_BRK , NVS_OP_BRK , -1, -1, -1); + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_CAL , NVS_OP_CAL , -1, -1, -1); + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_IF , NVS_OP_IF , -1, -1, -1); + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_LOOP, NVS_OP_LOOP, -1, -1, -1); + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_REP , NVS_OP_REP , -1, -1, -1); + MOD_OPCODE(NVFP_TX_BOP, NV40_FP_OP_BRA_OPCODE_RET , NVS_OP_RET , -1, -1, -1); + + /* fragment.facing */ + shader->GetSourceID = NV40FPGetSourceID; + + /* branching */ + shader->GetOPTXRec = NV40FPGetOPTXRec; + shader->GetBranch = NV40FPGetBranch; + shader->GetBranchElse = NV40FPGetBranchElse; + shader->GetBranchEnd = NV40FPGetBranchEnd; + shader->GetLoopCount = NV40FPGetLoopCount; + shader->GetLoopInitial = NV40FPGetLoopInitial; + shader->GetLoopIncrement = NV40FPGetLoopIncrement; +} diff --git a/src/mesa/drivers/dri/nouveau/nv40_shader.h b/src/mesa/drivers/dri/nouveau/nv40_shader.h new file mode 100644 index 0000000000..2a2b5639b6 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv40_shader.h @@ -0,0 +1,467 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXWHAT 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (31 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_UNK0_7 (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_SRC_SCALE_SHIFT 28 +#define NV40_FP_OP_SRC_SCALE_MASK (3 << 28) + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (31 << 2) +#define NV40_FP_REG_UNK_0 (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +#endif diff --git a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c new file mode 100644 index 0000000000..111c6de71b --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c @@ -0,0 +1,647 @@ +#include "nouveau_shader.h" +#include "nouveau_msg.h" +#include "nv40_shader.h" + +extern nvsSwzComp NV20VP_TX_SWIZZLE[4]; +extern void NV20VPTXSwizzle(int hwswz, nvsSwzComp *swz); + +/***************************************************************************** + * Assembly routines + */ +static int +NV40VPSupportsOpcode(nvsFunc * shader, nvsOpcode op) +{ + if (shader->GetOPTXFromSOP(op, NULL)) + return 1; + return 0; +} + +static void +NV40VPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) +{ + if (slot) shader->inst[1] |= (opcode << NV40_VP_INST_SCA_OPCODE_SHIFT); + else shader->inst[1] |= (opcode << NV40_VP_INST_VEC_OPCODE_SHIFT); +} + +static void +NV40VPSetCCUpdate(nvsFunc *shader) +{ + shader->inst[0] |= NV40_VP_INST_COND_UPDATE_ENABLE; +} + +static void +NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, + nvsSwzComp *swizzle) +{ + unsigned int hwcond; + + if (on ) shader->inst[0] |= NV40_VP_INST_COND_TEST_ENABLE; + if (reg) shader->inst[0] |= NV40_VP_INST_COND_REG_SELECT_1; + + switch (cond) { + case NVS_COND_TR: hwcond = NV40_VP_INST_COND_TR; break; + case NVS_COND_FL: hwcond = NV40_VP_INST_COND_FL; break; + case NVS_COND_LT: hwcond = NV40_VP_INST_COND_LT; break; + case NVS_COND_GT: hwcond = NV40_VP_INST_COND_GT; break; + case NVS_COND_NE: hwcond = NV40_VP_INST_COND_NE; break; + case NVS_COND_EQ: hwcond = NV40_VP_INST_COND_EQ; break; + case NVS_COND_GE: hwcond = NV40_VP_INST_COND_GE; break; + case NVS_COND_LE: hwcond = NV40_VP_INST_COND_LE; break; + default: + WARN_ONCE("unknown vp cond %d\n", cond); + hwcond = NV40_VP_INST_COND_TR; + break; + } + shader->inst[0] |= (hwcond << NV40_VP_INST_COND_SHIFT); + + shader->inst[0] |= (swizzle[NVS_SWZ_X] << NV40_VP_INST_COND_SWZ_X_SHIFT); + shader->inst[0] |= (swizzle[NVS_SWZ_Y] << NV40_VP_INST_COND_SWZ_Y_SHIFT); + shader->inst[0] |= (swizzle[NVS_SWZ_Z] << NV40_VP_INST_COND_SWZ_Z_SHIFT); + shader->inst[0] |= (swizzle[NVS_SWZ_W] << NV40_VP_INST_COND_SWZ_W_SHIFT); +} + +static void +NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, + int slot) +{ + unsigned int hwmask = 0; + + if (mask & SMASK_X) hwmask |= (1 << 3); + if (mask & SMASK_Y) hwmask |= (1 << 2); + if (mask & SMASK_Z) hwmask |= (1 << 1); + if (mask & SMASK_W) hwmask |= (1 << 0); + + if (dest->file == NVS_FILE_RESULT) { + int hwidx; + + switch (dest->index) { + case NVS_FR_POSITION : hwidx = NV40_VP_INST_DEST_POS; break; + case NVS_FR_COL0 : hwidx = NV40_VP_INST_DEST_COL0; break; + case NVS_FR_COL1 : hwidx = NV40_VP_INST_DEST_COL1; break; + case NVS_FR_BFC0 : hwidx = NV40_VP_INST_DEST_BFC0; break; + case NVS_FR_BFC1 : hwidx = NV40_VP_INST_DEST_BFC1; break; + case NVS_FR_FOGCOORD : hwidx = NV40_VP_INST_DEST_FOGC; break; + case NVS_FR_POINTSZ : hwidx = NV40_VP_INST_DEST_PSZ; break; + case NVS_FR_TEXCOORD0: hwidx = NV40_VP_INST_DEST_TC(0); break; + case NVS_FR_TEXCOORD1: hwidx = NV40_VP_INST_DEST_TC(1); break; + case NVS_FR_TEXCOORD2: hwidx = NV40_VP_INST_DEST_TC(2); break; + case NVS_FR_TEXCOORD3: hwidx = NV40_VP_INST_DEST_TC(3); break; + case NVS_FR_TEXCOORD4: hwidx = NV40_VP_INST_DEST_TC(4); break; + case NVS_FR_TEXCOORD5: hwidx = NV40_VP_INST_DEST_TC(5); break; + case NVS_FR_TEXCOORD6: hwidx = NV40_VP_INST_DEST_TC(6); break; + case NVS_FR_TEXCOORD7: hwidx = NV40_VP_INST_DEST_TC(7); break; + default: + WARN_ONCE("unknown vtxprog output %d\n", dest->index); + hwidx = 0; + break; + } + shader->inst[3] |= (hwidx << NV40_VP_INST_DEST_SHIFT); + + if (slot) { + shader->inst[3] |= NV40_VP_INST_SCA_RESULT; + shader->inst[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } else { + shader->inst[0] |= NV40_VP_INST_VEC_RESULT; + shader->inst[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } + } else { + /* NVS_FILE_TEMP || NVS_FILE_ADDRESS */ + if (slot) + shader->inst[3] |= (dest->index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + else + shader->inst[0] |= (dest->index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } + + if (slot) shader->inst[3] |= (hwmask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + else shader->inst[3] |= (hwmask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); +} + +static void +NV40VPInsertSource(nvsFunc *shader, unsigned int hw, int pos) +{ + switch (pos) { + case 0: + shader->inst[1] |= ((hw & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) + << NV40_VP_INST_SRC0H_SHIFT; + shader->inst[2] |= (hw & NV40_VP_SRC0_LOW_MASK) + << NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + shader->inst[2] |= hw + << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + shader->inst[2] |= ((hw & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) + << NV40_VP_INST_SRC2H_SHIFT; + shader->inst[3] |= (hw & NV40_VP_SRC2_LOW_MASK) + << NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + break; + } +} + +static void +NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) +{ + unsigned int hw = 0; + + switch (src->file) { + case NVS_FILE_ADDRESS: + break; + case NVS_FILE_ATTRIB: + hw |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT); + + shader->inst[1] |= (src->index << NV40_VP_INST_INPUT_SRC_SHIFT); + if (src->indexed) { + shader->inst[0] |= NV40_VP_INST_INDEX_INPUT; + if (src->addr_reg) + shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1; + shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT); + } + break; + case NVS_FILE_CONST: + hw |= (NV40_VP_SRC_REG_TYPE_CONST << NV40_VP_SRC_REG_TYPE_SHIFT); + + shader->inst[1] |= (src->index << NV40_VP_INST_CONST_SRC_SHIFT); + if (src->indexed) { + shader->inst[3] |= NV40_VP_INST_INDEX_CONST; + if (src->addr_reg) + shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1; + shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT); + } + break; + case NVS_FILE_TEMP: + hw |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); + hw |= (src->index << NV40_VP_SRC_TEMP_SRC_SHIFT); + break; + default: + fprintf(stderr, "unknown source file %d\n", src->file); + assert(0); + break; + } + + if (src->file != NVS_FILE_ADDRESS) { + if (src->negate) + hw |= NV40_VP_SRC_NEGATE; + if (src->abs) + shader->inst[0] |= (1 << (21 + pos)); + hw |= (src->swizzle[0] << NV40_VP_SRC_SWZ_X_SHIFT); + hw |= (src->swizzle[1] << NV40_VP_SRC_SWZ_Y_SHIFT); + hw |= (src->swizzle[2] << NV40_VP_SRC_SWZ_Z_SHIFT); + hw |= (src->swizzle[3] << NV40_VP_SRC_SWZ_W_SHIFT); + + NV40VPInsertSource(shader, hw, pos); + } +} + +static void +NV40VPSetUnusedSource(nvsFunc *shader, int pos) +{ + unsigned int hw; + + hw = ((NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT) | + (NVS_SWZ_X << NV40_VP_SRC_SWZ_X_SHIFT) | + (NVS_SWZ_Y << NV40_VP_SRC_SWZ_Y_SHIFT) | + (NVS_SWZ_Z << NV40_VP_SRC_SWZ_Z_SHIFT) | + (NVS_SWZ_W << NV40_VP_SRC_SWZ_W_SHIFT)); + + NV40VPInsertSource(shader, hw, pos); +} + +static void +NV40VPSetLastInst(nvsFunc *shader, int pos) +{ + shader->inst[3] |= 1; +} + +/***************************************************************************** + * Disassembly routines + */ +static int +NV40VPHasMergedInst(nvsFunc * shader) +{ + if (shader->GetOpcodeHW(shader, 0) != NV40_VP_INST_OP_NOP && + shader->GetOpcodeHW(shader, 1) != NV40_VP_INST_OP_NOP) + return 1; + return 0; +} + +static unsigned int +NV40VPGetOpcodeHW(nvsFunc * shader, int slot) +{ + int op; + + if (slot) + op = (shader->inst[1] & NV40_VP_INST_SCA_OPCODE_MASK) + >> NV40_VP_INST_SCA_OPCODE_SHIFT; + else + op = (shader->inst[1] & NV40_VP_INST_VEC_OPCODE_MASK) + >> NV40_VP_INST_VEC_OPCODE_SHIFT; + + return op; +} + +static nvsRegFile +NV40VPGetDestFile(nvsFunc * shader, int merged) +{ + nvsOpcode op; + + op = shader->GetOpcode(shader, merged); + switch (op) { + case NVS_OP_ARL: + case NVS_OP_ARR: + case NVS_OP_ARA: + case NVS_OP_POPA: + return NVS_FILE_ADDRESS; + default: + if (shader->GetOpcodeSlot(shader, merged)) { + if (shader->inst[3] & NV40_VP_INST_SCA_RESULT) + return NVS_FILE_RESULT; + } + else { + if (shader->inst[0] & NV40_VP_INST_VEC_RESULT) + return NVS_FILE_RESULT; + } + return NVS_FILE_TEMP; + } + +} + +static unsigned int +NV40VPGetDestID(nvsFunc * shader, int merged) +{ + int id; + + switch (shader->GetDestFile(shader, merged)) { + case NVS_FILE_RESULT: + id = ((shader->inst[3] & NV40_VP_INST_DEST_MASK) + >> NV40_VP_INST_DEST_SHIFT); + switch (id) { + case NV40_VP_INST_DEST_POS : return NVS_FR_POSITION; + case NV40_VP_INST_DEST_COL0: return NVS_FR_COL0; + case NV40_VP_INST_DEST_COL1: return NVS_FR_COL1; + case NV40_VP_INST_DEST_BFC0: return NVS_FR_BFC0; + case NV40_VP_INST_DEST_BFC1: return NVS_FR_BFC1; + case NV40_VP_INST_DEST_FOGC: { + int mask = shader->GetDestMask(shader, merged); + switch (mask) { + case SMASK_X: return NVS_FR_FOGCOORD; + case SMASK_Y: return NVS_FR_CLIP0; + case SMASK_Z: return NVS_FR_CLIP1; + case SMASK_W: return NVS_FR_CLIP2; + default: + printf("more than 1 mask component set in FOGC writemask!\n"); + return NVS_FR_UNKNOWN; + } + } + case NV40_VP_INST_DEST_PSZ: + { + int mask = shader->GetDestMask(shader, merged); + switch (mask) { + case SMASK_X: return NVS_FR_POINTSZ; + case SMASK_Y: return NVS_FR_CLIP3; + case SMASK_Z: return NVS_FR_CLIP4; + case SMASK_W: return NVS_FR_CLIP5; + default: + printf("more than 1 mask component set in PSZ writemask!\n"); + return NVS_FR_UNKNOWN; + } + } + case NV40_VP_INST_DEST_TC(0): return NVS_FR_TEXCOORD0; + case NV40_VP_INST_DEST_TC(1): return NVS_FR_TEXCOORD1; + case NV40_VP_INST_DEST_TC(2): return NVS_FR_TEXCOORD2; + case NV40_VP_INST_DEST_TC(3): return NVS_FR_TEXCOORD3; + case NV40_VP_INST_DEST_TC(4): return NVS_FR_TEXCOORD4; + case NV40_VP_INST_DEST_TC(5): return NVS_FR_TEXCOORD5; + case NV40_VP_INST_DEST_TC(6): return NVS_FR_TEXCOORD6; + case NV40_VP_INST_DEST_TC(7): return NVS_FR_TEXCOORD7; + default: + return -1; + } + case NVS_FILE_ADDRESS: + /* Instructions that write address regs are encoded as if + * they would write temps. + */ + case NVS_FILE_TEMP: + if (shader->GetOpcodeSlot(shader, merged)) + id = ((shader->inst[3] & NV40_VP_INST_SCA_DEST_TEMP_MASK) + >> NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + else + id = ((shader->inst[0] & NV40_VP_INST_VEC_DEST_TEMP_MASK) + >> NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + return id; + default: + return -1; + } +} + +static unsigned int +NV40VPGetDestMask(nvsFunc * shader, int merged) +{ + unsigned int mask = 0; + + if (shader->GetOpcodeSlot(shader, merged)) { + if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_X) mask |= SMASK_X; + if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_Y) mask |= SMASK_Y; + if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_Z) mask |= SMASK_Z; + if (shader->inst[3] & NV40_VP_INST_SCA_WRITEMASK_W) mask |= SMASK_W; + } else { + if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_X) mask |= SMASK_X; + if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_Y) mask |= SMASK_Y; + if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_Z) mask |= SMASK_Z; + if (shader->inst[3] & NV40_VP_INST_VEC_WRITEMASK_W) mask |= SMASK_W; + } + + return mask; +} + +static unsigned int +NV40VPGetSourceHW(nvsFunc * shader, int merged, int pos) +{ + struct _op_xlat *opr; + unsigned int src; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr) + return -1; + + switch (opr->srcpos[pos]) { + case 0: + src = ((shader->inst[1] & NV40_VP_INST_SRC0H_MASK) + >> NV40_VP_INST_SRC0H_SHIFT) + << NV40_VP_SRC0_HIGH_SHIFT; + src |= ((shader->inst[2] & NV40_VP_INST_SRC0L_MASK) + >> NV40_VP_INST_SRC0L_SHIFT); + break; + case 1: + src = ((shader->inst[2] & NV40_VP_INST_SRC1_MASK) + >> NV40_VP_INST_SRC1_SHIFT); + break; + case 2: + src = ((shader->inst[2] & NV40_VP_INST_SRC2H_MASK) + >> NV40_VP_INST_SRC2H_SHIFT) + << NV40_VP_SRC2_HIGH_SHIFT; + src |= ((shader->inst[3] & NV40_VP_INST_SRC2L_MASK) + >> NV40_VP_INST_SRC2L_SHIFT); + break; + default: + src = -1; + } + + return src; +} + +static nvsRegFile +NV40VPGetSourceFile(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + struct _op_xlat *opr; + int file; + + opr = shader->GetOPTXRec(shader, merged); + if (!opr || opr->srcpos[pos] == -1) + return -1; + + switch (opr->srcpos[pos]) { + case SPOS_ADDRESS: return NVS_FILE_ADDRESS; + default: + src = shader->GetSourceHW(shader, merged, pos); + file = (src & NV40_VP_SRC_REG_TYPE_MASK) >> NV40_VP_SRC_REG_TYPE_SHIFT; + + switch (file) { + case NV40_VP_SRC_REG_TYPE_TEMP : return NVS_FILE_TEMP; + case NV40_VP_SRC_REG_TYPE_INPUT: return NVS_FILE_ATTRIB; + case NV40_VP_SRC_REG_TYPE_CONST: return NVS_FILE_CONST; + default: + return NVS_FILE_UNKNOWN; + } + } +} + +static int +NV40VPGetSourceID(nvsFunc * shader, int merged, int pos) +{ + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_ATTRIB: + switch ((shader->inst[1] & NV40_VP_INST_INPUT_SRC_MASK) + >> NV40_VP_INST_INPUT_SRC_SHIFT) { + case NV40_VP_INST_IN_POS: return NVS_FR_POSITION; + case NV40_VP_INST_IN_WEIGHT: return NVS_FR_WEIGHT; + case NV40_VP_INST_IN_NORMAL: return NVS_FR_NORMAL; + case NV40_VP_INST_IN_COL0: return NVS_FR_COL0; + case NV40_VP_INST_IN_COL1: return NVS_FR_COL1; + case NV40_VP_INST_IN_FOGC: return NVS_FR_FOGCOORD; + case NV40_VP_INST_IN_TC(0): return NVS_FR_TEXCOORD0; + case NV40_VP_INST_IN_TC(1): return NVS_FR_TEXCOORD1; + case NV40_VP_INST_IN_TC(2): return NVS_FR_TEXCOORD2; + case NV40_VP_INST_IN_TC(3): return NVS_FR_TEXCOORD3; + case NV40_VP_INST_IN_TC(4): return NVS_FR_TEXCOORD4; + case NV40_VP_INST_IN_TC(5): return NVS_FR_TEXCOORD5; + case NV40_VP_INST_IN_TC(6): return NVS_FR_TEXCOORD6; + case NV40_VP_INST_IN_TC(7): return NVS_FR_TEXCOORD7; + default: + return -1; + } + break; + case NVS_FILE_CONST: + return ((shader->inst[1] & NV40_VP_INST_CONST_SRC_MASK) + >> NV40_VP_INST_CONST_SRC_SHIFT); + case NVS_FILE_TEMP: + { + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + return ((src & NV40_VP_SRC_TEMP_SRC_MASK) >> + NV40_VP_SRC_TEMP_SRC_SHIFT); + } + default: + return -1; + } +} + +static int +NV40VPGetSourceNegate(nvsFunc * shader, int merged, int pos) +{ + unsigned int src; + + src = shader->GetSourceHW(shader, merged, pos); + + if (src == -1) + return -1; + return ((src & NV40_VP_SRC_NEGATE) ? 1 : 0); +} + +static void +NV40VPGetSourceSwizzle(nvsFunc * shader, int merged, int pos, nvsSwzComp *swz) +{ + unsigned int src; + int swzbits; + + src = shader->GetSourceHW(shader, merged, pos); + swzbits = (src & NV40_VP_SRC_SWZ_ALL_MASK) >> NV40_VP_SRC_SWZ_ALL_SHIFT; + NV20VPTXSwizzle(swzbits, swz); +} + +static int +NV40VPGetSourceIndexed(nvsFunc * shader, int merged, int pos) +{ + switch (shader->GetSourceFile(shader, merged, pos)) { + case NVS_FILE_ATTRIB: + return ((shader->inst[0] & NV40_VP_INST_INDEX_INPUT) ? 1 : 0); + case NVS_FILE_CONST: + return ((shader->inst[3] & NV40_VP_INST_INDEX_CONST) ? 1 : 0); + default: + return 0; + } +} + +static nvsSwzComp +NV40VPGetAddressRegSwizzle(nvsFunc * shader) +{ + nvsSwzComp swz; + + swz = NV20VP_TX_SWIZZLE[(shader->inst[0] & NV40_VP_INST_ADDR_SWZ_MASK) + >> NV40_VP_INST_ADDR_SWZ_SHIFT]; + return swz; +} + +static int +NV40VPSupportsConditional(nvsFunc * shader) +{ + /*FIXME: Is this true of all ops? */ + return 1; +} + +static int +NV40VPGetConditionUpdate(nvsFunc * shader) +{ + return ((shader->inst[0] & NV40_VP_INST_COND_UPDATE_ENABLE) ? 1 : 0); +} + +static int +NV40VPGetConditionTest(nvsFunc * shader) +{ + int op; + + /* The condition test is unconditionally enabled on some + * instructions. ie: the condition test bit does *NOT* have + * to be set. + * + * FIXME: check other relevant ops for this situation. + */ + op = shader->GetOpcodeHW(shader, 1); + switch (op) { + case NV40_VP_INST_OP_BRA: + return 1; + default: + return ((shader->inst[0] & NV40_VP_INST_COND_TEST_ENABLE) ? 1 : 0); + } +} + +static nvsCond +NV40VPGetCondition(nvsFunc * shader) +{ + int cond; + + cond = ((shader->inst[0] & NV40_VP_INST_COND_MASK) + >> NV40_VP_INST_COND_SHIFT); + + switch (cond) { + case NV40_VP_INST_COND_FL: return NVS_COND_FL; + case NV40_VP_INST_COND_LT: return NVS_COND_LT; + case NV40_VP_INST_COND_EQ: return NVS_COND_EQ; + case NV40_VP_INST_COND_LE: return NVS_COND_LE; + case NV40_VP_INST_COND_GT: return NVS_COND_GT; + case NV40_VP_INST_COND_NE: return NVS_COND_NE; + case NV40_VP_INST_COND_GE: return NVS_COND_GE; + case NV40_VP_INST_COND_TR: return NVS_COND_TR; + default: + return NVS_COND_UNKNOWN; + } +} + +static void +NV40VPGetCondRegSwizzle(nvsFunc * shader, nvsSwzComp *swz) +{ + int swzbits; + + swzbits = (shader->inst[0] & NV40_VP_INST_COND_SWZ_ALL_MASK) + >> NV40_VP_INST_COND_SWZ_ALL_SHIFT; + NV20VPTXSwizzle(swzbits, swz); +} + +static int +NV40VPGetCondRegID(nvsFunc * shader) +{ + return ((shader->inst[0] & NV40_VP_INST_COND_REG_SELECT_1) ? 1 : 0); +} + +static int +NV40VPGetBranch(nvsFunc * shader) +{ + int addr; + + addr = ((shader->inst[2] & NV40_VP_INST_IADDRH_MASK) + >> NV40_VP_INST_IADDRH_SHIFT) << 3; + addr |= ((shader->inst[3] & NV40_VP_INST_IADDRL_MASK) + >> NV40_VP_INST_IADDRL_SHIFT); + return addr; +} + +void +NV40VPInitShaderFuncs(nvsFunc * shader) +{ + /* Inherit NV30 VP code, we share some of it */ + NV30VPInitShaderFuncs(shader); + + /* Limits */ + shader->MaxInst = 4096; + shader->MaxAttrib = 16; + shader->MaxTemp = 32; + shader->MaxAddress = 2; + shader->MaxConst = 256; + shader->caps = SCAP_SRC_ABS; + + /* Add extra opcodes for NV40+ */ +// MOD_OPCODE(NVVP_TX_VOP, NV40_VP_INST_OP_TXWHAT, NVS_OP_TEX , 0, 4, -1); + MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_PUSHA, NVS_OP_PUSHA, 3, -1, -1); + MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_POPA , NVS_OP_POPA , -1, -1, -1); + + shader->SupportsOpcode = NV40VPSupportsOpcode; + shader->SetOpcode = NV40VPSetOpcode; + shader->SetCCUpdate = NV40VPSetCCUpdate; + shader->SetCondition = NV40VPSetCondition; + shader->SetResult = NV40VPSetResult; + shader->SetSource = NV40VPSetSource; + shader->SetUnusedSource = NV40VPSetUnusedSource; + shader->SetLastInst = NV40VPSetLastInst; + + shader->HasMergedInst = NV40VPHasMergedInst; + shader->GetOpcodeHW = NV40VPGetOpcodeHW; + + shader->GetDestFile = NV40VPGetDestFile; + shader->GetDestID = NV40VPGetDestID; + shader->GetDestMask = NV40VPGetDestMask; + + shader->GetSourceHW = NV40VPGetSourceHW; + shader->GetSourceFile = NV40VPGetSourceFile; + shader->GetSourceID = NV40VPGetSourceID; + shader->GetSourceNegate = NV40VPGetSourceNegate; + shader->GetSourceSwizzle = NV40VPGetSourceSwizzle; + shader->GetSourceIndexed = NV40VPGetSourceIndexed; + + shader->GetRelAddressSwizzle = NV40VPGetAddressRegSwizzle; + + shader->SupportsConditional = NV40VPSupportsConditional; + shader->GetConditionUpdate = NV40VPGetConditionUpdate; + shader->GetConditionTest = NV40VPGetConditionTest; + shader->GetCondition = NV40VPGetCondition; + shader->GetCondRegSwizzle = NV40VPGetCondRegSwizzle; + shader->GetCondRegID = NV40VPGetCondRegID; + + shader->GetBranch = NV40VPGetBranch; +} -- cgit v1.2.3 From b4dcb99cbc21ad1dafa12e31086f9e0d5fc05e81 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Sun, 3 Dec 2006 02:01:49 +0000 Subject: Fix a copy+paste'o that caused some *very* strange bugs.. --- src/mesa/drivers/dri/nouveau/nouveau_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index cd46feff7c..032cdee2f7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -33,7 +33,7 @@ static GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, dma.target = target; dma.access = access; dma.offset = offset; - dma.handle = handle; + dma.size = size; ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_DMA_OBJECT_INIT, &dma, sizeof(dma)); return ret == 0; -- cgit v1.2.3 From 4cfb762c3eb2ea9a764c7ba0811c338ef5fba8fe Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 8 Dec 2006 03:01:33 +0000 Subject: Some work on buffer handling, most likely not entirely correct and incomplete. But, it works well enough that windows can be moved/resized. --- src/mesa/drivers/dri/nouveau/Makefile | 1 + src/mesa/drivers/dri/nouveau/nouveau_buffers.c | 331 +++++++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_buffers.h | 41 +++ src/mesa/drivers/dri/nouveau/nouveau_context.c | 79 +++++- src/mesa/drivers/dri/nouveau/nouveau_context.h | 20 ++ src/mesa/drivers/dri/nouveau/nouveau_driver.c | 29 +++ src/mesa/drivers/dri/nouveau/nouveau_driver.h | 5 +- src/mesa/drivers/dri/nouveau/nouveau_lock.c | 18 ++ src/mesa/drivers/dri/nouveau/nouveau_object.c | 23 +- src/mesa/drivers/dri/nouveau/nouveau_object.h | 9 +- src/mesa/drivers/dri/nouveau/nouveau_screen.c | 128 +++++----- src/mesa/drivers/dri/nouveau/nouveau_shader.c | 2 + src/mesa/drivers/dri/nouveau/nouveau_shader.h | 2 + src/mesa/drivers/dri/nouveau/nouveau_span.c | 14 +- src/mesa/drivers/dri/nouveau/nouveau_span.h | 3 +- src/mesa/drivers/dri/nouveau/nouveau_state.c | 42 ++-- src/mesa/drivers/dri/nouveau/nouveau_state.h | 6 +- src/mesa/drivers/dri/nouveau/nv30_fragprog.c | 29 +-- src/mesa/drivers/dri/nouveau/nv30_state.c | 152 +++++++++++- 19 files changed, 784 insertions(+), 150 deletions(-) create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_buffers.c create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_buffers.h (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 384713eeeb..1a76169156 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -8,6 +8,7 @@ LIBNAME = nouveau_dri.so MINIGLX_SOURCES = DRIVER_SOURCES = \ + nouveau_buffers.c \ nouveau_card.c \ nouveau_context.c \ nouveau_driver.c \ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_buffers.c b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c new file mode 100644 index 0000000000..a356fd1212 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c @@ -0,0 +1,331 @@ +#include "utils.h" +#include "framebuffer.h" +#include "renderbuffer.h" +#include "fbobject.h" + +#include "nouveau_context.h" +#include "nouveau_buffers.h" + +void +nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + drm_nouveau_mem_free_t memf; + + if (mem->map) + drmUnmap(mem->map, mem->size); + memf.flags = mem->type; + memf.region_offset = mem->offset; + drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_MEM_FREE, &memf, sizeof(memf)); + FREE(mem); +} + +nouveau_mem * +nouveau_mem_alloc(GLcontext *ctx, int type, GLuint size, GLuint align) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + drm_nouveau_mem_alloc_t mema; + nouveau_mem *mem; + int ret; + + mem = CALLOC(sizeof(nouveau_mem)); + if (!mem) + return NULL; + + mema.flags = mem->type = type; + mema.size = mem->size = size; + mema.alignment = align; + mem->map = NULL; + ret = drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_MEM_ALLOC, + &mema, sizeof(mema)); + if (ret) { + FREE(mem); + return NULL; + } + mem->offset = mema.region_offset; + + if (type & NOUVEAU_MEM_MAPPED) + ret = drmMap(nmesa->driFd, mem->offset, mem->size, &mem->map); + if (ret) { + mem->map = NULL; + nouveau_mem_free(ctx, mem); + mem = NULL; + } + + return mem; +} + +uint32_t +nouveau_mem_gpu_offset_get(GLcontext *ctx, nouveau_mem *mem) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + if (mem->type & NOUVEAU_MEM_FB) + return (uint32_t)mem->offset - nmesa->vram_phys; + else if (mem->type & NOUVEAU_MEM_AGP) + return (uint32_t)mem->offset - nmesa->agp_phys; + else + return 0xDEADF00D; +} + +static GLboolean +nouveau_renderbuffer_pixelformat(nouveau_renderbuffer *nrb, + GLenum internalFormat) +{ + nrb->mesa.InternalFormat = internalFormat; + + /*TODO: We probably want to extend this a bit, and maybe make + * card-specific? + */ + switch (internalFormat) { + case GL_RGBA: + case GL_RGBA8: + nrb->mesa._BaseFormat = GL_RGBA; + nrb->mesa._ActualFormat= GL_RGBA8; + nrb->mesa.DataType = GL_UNSIGNED_BYTE; + nrb->mesa.RedBits = 8; + nrb->mesa.GreenBits = 8; + nrb->mesa.BlueBits = 8; + nrb->mesa.AlphaBits = 8; + nrb->cpp = 4; + break; + case GL_RGB5: + nrb->mesa._BaseFormat = GL_RGB; + nrb->mesa._ActualFormat= GL_RGB5; + nrb->mesa.DataType = GL_UNSIGNED_BYTE; + nrb->mesa.RedBits = 5; + nrb->mesa.GreenBits = 6; + nrb->mesa.BlueBits = 5; + nrb->mesa.AlphaBits = 0; + nrb->cpp = 2; + break; + case GL_DEPTH_COMPONENT16: + nrb->mesa._BaseFormat = GL_DEPTH_COMPONENT; + nrb->mesa._ActualFormat= GL_DEPTH_COMPONENT16; + nrb->mesa.DataType = GL_UNSIGNED_SHORT; + nrb->mesa.DepthBits = 16; + nrb->cpp = 2; + break; + case GL_DEPTH_COMPONENT24: + nrb->mesa._BaseFormat = GL_DEPTH_COMPONENT; + nrb->mesa._ActualFormat= GL_DEPTH24_STENCIL8_EXT; + nrb->mesa.DataType = GL_UNSIGNED_INT_24_8_EXT; + nrb->mesa.DepthBits = 24; + nrb->cpp = 4; + break; + case GL_STENCIL_INDEX8_EXT: + nrb->mesa._BaseFormat = GL_STENCIL_INDEX; + nrb->mesa._ActualFormat= GL_DEPTH24_STENCIL8_EXT; + nrb->mesa.DataType = GL_UNSIGNED_INT_24_8_EXT; + nrb->mesa.StencilBits = 8; + nrb->cpp = 4; + break; + case GL_DEPTH24_STENCIL8_EXT: + nrb->mesa._BaseFormat = GL_DEPTH_STENCIL_EXT; + nrb->mesa._ActualFormat= GL_DEPTH24_STENCIL8_EXT; + nrb->mesa.DataType = GL_UNSIGNED_INT_24_8_EXT; + nrb->mesa.DepthBits = 24; + nrb->mesa.StencilBits = 8; + nrb->cpp = 4; + break; + default: + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +static GLboolean +nouveau_renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, + GLuint height) +{ + nouveau_renderbuffer *nrb = (nouveau_renderbuffer*)rb; + + if (!nouveau_renderbuffer_pixelformat(nrb, internalFormat)) { + fprintf(stderr, "%s: unknown internalFormat\n", __func__); + return GL_FALSE; + } + + /* If this buffer isn't statically alloc'd, we may need to ask the + * drm for more memory */ + if (!nrb->map && (rb->Width != width || rb->Height != height)) { + GLuint pitch; + + /* align pitches to 64 bytes */ + pitch = ((width * nrb->cpp) + 63) & ~63; + + if (nrb->mem) + nouveau_mem_free(ctx, nrb->mem); + nrb->mem = nouveau_mem_alloc(ctx, + NOUVEAU_MEM_FB | NOUVEAU_MEM_MAPPED, + pitch*height, + 0); + if (!nrb->mem) + return GL_FALSE; + + /* update nouveau_renderbuffer info */ + nrb->offset = nouveau_mem_gpu_offset_get(ctx, nrb->mem); + nrb->pitch = pitch; + } + + rb->Width = width; + rb->Height = height; + rb->InternalFormat = internalFormat; + return GL_TRUE; +} + +static void +nouveau_renderbuffer_delete(struct gl_renderbuffer *rb) +{ + GET_CURRENT_CONTEXT(ctx); + nouveau_renderbuffer *nrb = (nouveau_renderbuffer*)rb; + + if (nrb->mem) + nouveau_mem_free(ctx, nrb->mem); + FREE(nrb); +} + +nouveau_renderbuffer * +nouveau_renderbuffer_new(GLenum internalFormat, GLvoid *map, + GLuint offset, GLuint pitch, + __DRIdrawablePrivate *dPriv) +{ + nouveau_renderbuffer *nrb; + + nrb = CALLOC_STRUCT(nouveau_renderbuffer_t); + if (nrb) { + _mesa_init_renderbuffer(&nrb->mesa, 0); + + nouveau_renderbuffer_pixelformat(nrb, internalFormat); + + nrb->mesa.AllocStorage = nouveau_renderbuffer_storage; + nrb->mesa.Delete = nouveau_renderbuffer_delete; + + nrb->dPriv = dPriv; + nrb->offset = offset; + nrb->pitch = pitch; + nrb->map = map; + } + + return nrb; +} + +void +nouveau_window_moved(GLcontext *ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + /* Viewport depends on window size/position, nouveauCalcViewport + * will take care of calling the hw-specific WindowMoved + */ + ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y, + ctx->Viewport.Width, ctx->Viewport.Height); + /* Scissor depends on window position */ + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); +} + +GLboolean +nouveau_build_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_renderbuffer *color[MAX_DRAW_BUFFERS]; + nouveau_renderbuffer *depth; + + _mesa_update_framebuffer(ctx); + _mesa_update_draw_buffer_bounds(ctx); + + color[0] = (nouveau_renderbuffer *)fb->_ColorDrawBuffers[0][0]; + depth = (nouveau_renderbuffer *)fb->_DepthBuffer; + + if (!nmesa->hw_func.BindBuffers(nmesa, 1, color, depth)) + return GL_FALSE; + nouveau_window_moved(ctx); + + return GL_TRUE; +} + +nouveau_renderbuffer * +nouveau_current_draw_buffer(GLcontext *ctx) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + nouveau_renderbuffer *nrb; + + if (!fb) + return NULL; + + if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_FRONT_LEFT) + nrb = (nouveau_renderbuffer *) + fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + else if (fb->_ColorDrawBufferMask[0] == BUFFER_BIT_BACK_LEFT) + nrb = (nouveau_renderbuffer *) + fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + else + nrb = NULL; + return nrb; +} + +static struct gl_framebuffer * +nouveauNewFramebuffer(GLcontext *ctx, GLuint name) +{ + return _mesa_new_framebuffer(ctx, name); +} + +static struct gl_renderbuffer * +nouveauNewRenderbuffer(GLcontext *ctx, GLuint name) +{ + nouveau_renderbuffer *nrb; + + nrb = CALLOC_STRUCT(nouveau_renderbuffer_t); + if (nrb) { + _mesa_init_renderbuffer(&nrb->mesa, name); + + nrb->mesa.AllocStorage = nouveau_renderbuffer_storage; + nrb->mesa.Delete = nouveau_renderbuffer_delete; + } + return &nrb->mesa; +} + +static void +nouveauBindFramebuffer(GLcontext *ctx, GLenum target, struct gl_framebuffer *fb) +{ + nouveau_build_framebuffer(ctx, fb); +} + +static void +nouveauFramebufferRenderbuffer(GLcontext *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb) +{ + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + nouveau_build_framebuffer(ctx, fb); +} + +static void +nouveauRenderTexture(GLcontext *ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ +} + +static void +nouveauFinishRenderTexture(GLcontext *ctx, + struct gl_renderbuffer_attachment *att) +{ +} + +void +nouveauInitBufferFuncs(struct dd_function_table *func) +{ + func->NewFramebuffer = nouveauNewFramebuffer; + func->NewRenderbuffer = nouveauNewRenderbuffer; + func->BindFramebuffer = nouveauBindFramebuffer; + func->FramebufferRenderbuffer = nouveauFramebufferRenderbuffer; + func->RenderTexture = nouveauRenderTexture; + func->FinishRenderTexture = nouveauFinishRenderTexture; +} + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_buffers.h b/src/mesa/drivers/dri/nouveau/nouveau_buffers.h new file mode 100644 index 0000000000..bb297ad558 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_buffers.h @@ -0,0 +1,41 @@ +#ifndef __NOUVEAU_BUFFERS_H__ +#define __NOUVEAU_BUFFERS_H__ + +#include +#include "mtypes.h" +#include "utils.h" +#include "renderbuffer.h" + +typedef struct nouveau_mem_t { + int type; + uint64_t offset; + uint64_t size; + void* map; +} nouveau_mem; + +extern nouveau_mem *nouveau_mem_alloc(GLcontext *ctx, int type, + GLuint size, GLuint align); +extern void nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem); +extern uint32_t nouveau_mem_gpu_offset_get(GLcontext *ctx, nouveau_mem *mem); + +typedef struct nouveau_renderbuffer_t { + struct gl_renderbuffer mesa; /* must be first! */ + __DRIdrawablePrivate *dPriv; + + nouveau_mem *mem; + void * map; + + int cpp; + uint32_t offset; + uint32_t pitch; +} nouveau_renderbuffer; + +extern nouveau_renderbuffer *nouveau_renderbuffer_new(GLenum internalFormat, + GLvoid *map, GLuint offset, GLuint pitch, __DRIdrawablePrivate *dPriv); +extern void nouveau_window_moved(GLcontext *ctx); +extern GLboolean nouveau_build_framebuffer(GLcontext *, struct gl_framebuffer *); +extern nouveau_renderbuffer *nouveau_current_draw_buffer(GLcontext *ctx); + +extern void nouveauInitBufferFuncs(struct dd_function_table *func); + +#endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index b208d6c9f5..f48c54416a 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -32,6 +32,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "array_cache/acache.h" +#include "framebuffer.h" #include "tnl/tnl.h" #include "tnl/t_pipeline.h" @@ -47,6 +48,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_fifo.h" #include "nouveau_tex.h" #include "nouveau_msg.h" +#include "nouveau_reg.h" #include "nv10_swtcl.h" #include "vblank.h" @@ -96,10 +98,17 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, screen=nmesa->screen; /* Create the hardware context */ + if (!nouveauDRMGetParam(nmesa, NOUVEAU_GETPARAM_FB_PHYSICAL, + &nmesa->vram_phys)) + return GL_FALSE; + if (!nouveauDRMGetParam(nmesa, NOUVEAU_GETPARAM_AGP_PHYSICAL, + &nmesa->agp_phys)) + return GL_FALSE; if (!nouveauFifoInit(nmesa)) return GL_FALSE; nouveauObjectInit(nmesa); + /* Init default driver functions then plug in our nouveau-specific functions * (the texture functions are especially important) */ @@ -169,6 +178,7 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, break; } + nmesa->hw_func.InitCard(nmesa); nouveauInitState(ctx); driContextPriv->driverPrivate = (void *)nmesa; @@ -208,17 +218,26 @@ GLboolean nouveauMakeCurrent( __DRIcontextPrivate *driContextPriv, __DRIdrawablePrivate *driReadPriv ) { if ( driContextPriv ) { - GET_CURRENT_CONTEXT(ctx); - nouveauContextPtr oldNOUVEAUCtx = ctx ? NOUVEAU_CONTEXT(ctx) : NULL; - nouveauContextPtr newNOUVEAUCtx = (nouveauContextPtr) driContextPriv->driverPrivate; - - driDrawableInitVBlank(driDrawPriv, newNOUVEAUCtx->vblank_flags, &newNOUVEAUCtx->vblank_seq ); - newNOUVEAUCtx->driDrawable = driDrawPriv; - - _mesa_make_current( newNOUVEAUCtx->glCtx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate ); - + nouveauContextPtr nmesa = (nouveauContextPtr) driContextPriv->driverPrivate; + struct gl_framebuffer *draw_fb = + (struct gl_framebuffer*)driDrawPriv->driverPrivate; + struct gl_framebuffer *read_fb = + (struct gl_framebuffer*)driReadPriv->driverPrivate; + + driDrawableInitVBlank(driDrawPriv, nmesa->vblank_flags, &nmesa->vblank_seq ); + nmesa->driDrawable = driDrawPriv; + + _mesa_resize_framebuffer(nmesa->glCtx, draw_fb, + driDrawPriv->w, driDrawPriv->h); + if (draw_fb != read_fb) { + _mesa_resize_framebuffer(nmesa->glCtx, draw_fb, + driReadPriv->w, + driReadPriv->h); + } + _mesa_make_current(nmesa->glCtx, draw_fb, read_fb); + + nouveau_build_framebuffer(nmesa->glCtx, + driDrawPriv->driverPrivate); } else { _mesa_make_current( NULL, NULL, NULL ); } @@ -234,8 +253,46 @@ GLboolean nouveauUnbindContext( __DRIcontextPrivate *driContextPriv ) return GL_TRUE; } +static void nouveauDoSwapBuffers(nouveauContextPtr nmesa, + __DRIdrawablePrivate *dPriv) +{ + struct gl_framebuffer *fb; + nouveau_renderbuffer *src, *dst; + + fb = (struct gl_framebuffer *)dPriv->driverPrivate; + dst = (nouveau_renderbuffer*) + fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + src = (nouveau_renderbuffer*) + fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + +#ifdef ALLOW_MULTI_SUBCHANNEL + /* Ignore this.. it's a hack to test double-buffering, and not how + * SwapBuffers should look :) + */ + BEGIN_RING_SIZE(NvSubCtxSurf2D, NV10_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (6); /* X8R8G8B8 */ + OUT_RING ((dst->pitch << 16) | src->pitch); + OUT_RING (src->offset); + OUT_RING (dst->offset); + + BEGIN_RING_SIZE(NvSubImageBlit, NV10_IMAGE_BLIT_SET_POINT, 3); + OUT_RING ((0 << 16) | 0); /* src point */ + OUT_RING ((0 << 16) | 0); /* dst point */ + OUT_RING ((fb->Height << 16) | fb->Width); /* width/height */ +#endif +} + void nouveauSwapBuffers(__DRIdrawablePrivate *dPriv) { + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + nouveauContextPtr nmesa = dPriv->driContextPriv->driverPrivate; + + if (nmesa->glCtx->Visual.doubleBufferMode) { + _mesa_notifySwapBuffers(nmesa->glCtx); + nouveauDoSwapBuffers(nmesa, dPriv); + } + + } } void nouveauCopySubBuffer(__DRIdrawablePrivate *dPriv, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index 947e95d18b..211d4e0a6d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -38,6 +38,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_screen.h" #include "nouveau_state_cache.h" +#include "nouveau_buffers.h" #include "nouveau_shader.h" #include "xmlconfig.h" @@ -75,6 +76,17 @@ typedef void (*nouveau_line_func)( struct nouveau_context*, typedef void (*nouveau_point_func)( struct nouveau_context*, nouveauVertex * ); +typedef struct nouveau_hw_func_t { + /* Initialise any card-specific non-GL related state */ + GLboolean (*InitCard)(struct nouveau_context *); + /* Update buffer offset/pitch/format */ + GLboolean (*BindBuffers)(struct nouveau_context *, int num_color, + nouveau_renderbuffer **color, + nouveau_renderbuffer *depth); + /* Update anything that depends on the window position/size */ + void (*WindowMoved)(struct nouveau_context *); +} nouveau_hw_func; + typedef struct nouveau_context { /* Mesa context */ GLcontext *glCtx; @@ -85,6 +97,13 @@ typedef struct nouveau_context { /* The read-only regs */ volatile unsigned char* mmio; + /* Physical addresses of AGP/VRAM apertures */ + uint64_t vram_phys; + uint64_t agp_phys; + + /* Additional hw-specific functions */ + nouveau_hw_func hw_func; + /* FIXME : do we want to put all state into a separate struct ? */ /* State for tris */ GLuint color_offset; @@ -132,6 +151,7 @@ typedef struct nouveau_context { __DRIcontextPrivate *driContext; /* DRI context */ __DRIscreenPrivate *driScreen; /* DRI screen */ __DRIdrawablePrivate *driDrawable; /* DRI drawable bound to this ctx */ + GLint lastStamp; drm_context_t hHWContext; drm_hw_lock_t *driHwLock; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index a45530e451..f85dc62e74 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -36,6 +36,35 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" +/* Wrapper for DRM_NOUVEAU_GETPARAM ioctl */ +GLboolean nouveauDRMGetParam(nouveauContextPtr nmesa, + unsigned int param, + uint64_t* value) +{ + drm_nouveau_getparam_t getp; + + getp.param = param; + if (!value || drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_GETPARAM, + &getp, sizeof(getp))) + return GL_FALSE; + *value = getp.value; + return GL_TRUE; +} + +/* Wrapper for DRM_NOUVEAU_GETPARAM ioctl */ +GLboolean nouveauDRMSetParam(nouveauContextPtr nmesa, + unsigned int param, + uint64_t value) +{ + drm_nouveau_setparam_t setp; + + setp.param = param; + setp.value = value; + if (drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_SETPARAM, &setp, + sizeof(setp))) + return GL_FALSE; + return GL_TRUE; +} /* Return the width and height of the current color buffer */ static void nouveauGetBufferSize( GLframebuffer *buffer, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h index e1541aa3c5..6164012b5b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h @@ -33,7 +33,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define DRIVER_AUTHOR "Stephane Marchesin" extern void nouveauDriverInitFunctions( struct dd_function_table *functions ); - +extern GLboolean nouveauDRMGetParam(nouveauContextPtr nmesa, unsigned int param, + uint64_t *value); +extern GLboolean nouveauDRMSetParam(nouveauContextPtr nmesa, unsigned int param, + uint64_t value); #endif /* __NOUVEAU_DRIVER_H__ */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_lock.c b/src/mesa/drivers/dri/nouveau/nouveau_lock.c index 7dd67a143a..c119d14dd7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_lock.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_lock.c @@ -29,6 +29,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_lock.h" #include "drirenderbuffer.h" +#include "framebuffer.h" /* Update the hardware state. This is called if another context has @@ -57,6 +58,23 @@ void nouveauGetLock( nouveauContextPtr nmesa, GLuint flags ) */ DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv ); + /* If timestamps don't match, the window has been changed */ + if (nmesa->lastStamp != dPriv->lastStamp) { + struct gl_framebuffer *fb = (struct gl_framebuffer *)dPriv->driverPrivate; + + /* _mesa_resize_framebuffer will take care of calling the renderbuffer's + * AllocStorage function if we need more memory to hold it */ + if (fb->Width != dPriv->w || fb->Height != dPriv->h) { + _mesa_resize_framebuffer(nmesa->glCtx, fb, dPriv->w, dPriv->h); + /* resize buffers, will call nouveau_window_moved */ + nouveau_build_framebuffer(nmesa->glCtx, fb); + } else { + nouveau_window_moved(nmesa->glCtx); + } + + nmesa->lastStamp = dPriv->lastStamp; + } + nmesa->numClipRects = dPriv->numClipRects; nmesa->pClipRects = dPriv->pClipRects; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 032cdee2f7..ef8a428c22 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -1,6 +1,7 @@ #include "nouveau_fifo.h" #include "nouveau_object.h" +#include "nouveau_reg.h" static GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) @@ -51,14 +52,30 @@ void nouveauObjectInit(nouveauContextPtr nmesa) return; #endif - nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); - nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); /* We need to know vram size.. */ -#if 0 nouveauCreateDmaObject( nmesa, NvDmaFB, 0, (256*1024*1024), 0 /*NV_DMA_TARGET_FB*/, 0 /*NV_DMA_ACCESS_RW*/); + + nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, + 0, 0, 0, 0); + nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV10_CONTEXT_SURFACES_2D, + 0, 0, 0, 0); + nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT, + NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY, 0, 0, 0); + +#ifdef ALLOW_MULTI_SUBCHANNEL + nouveauObjectOnSubchannel(nmesa, NvSubCtxSurf2D, NvCtxSurf2D); + BEGIN_RING_SIZE(NvSubCtxSurf2D, NV10_CONTEXT_SURFACES_2D_SET_DMA_IN_MEMORY0, 2); + OUT_RING(NvDmaFB); + OUT_RING(NvDmaFB); + + nouveauObjectOnSubchannel(nmesa, NvSubImageBlit, NvImageBlit); + BEGIN_RING_SIZE(NvSubImageBlit, NV10_IMAGE_BLIT_SET_CONTEXT_SURFACES_2D, 1); + OUT_RING(NvCtxSurf2D); #endif + + nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.h b/src/mesa/drivers/dri/nouveau/nouveau_object.h index 8386f923c3..f555eba9b4 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.h @@ -3,14 +3,21 @@ #include "nouveau_context.h" +#define ALLOW_MULTI_SUBCHANNEL + void nouveauObjectInit(nouveauContextPtr nmesa); enum DMAObjects { Nv3D = 0x80000019, - NvDmaFB = 0xD0FB0001 + NvCtxSurf2D = 0x80000020, + NvImageBlit = 0x80000021, + NvDmaFB = 0xD0FB0001, + NvDmaAGP = 0xD0AA0001 }; enum DMASubchannel { + NvSubCtxSurf2D = 0, + NvSubImageBlit = 1, NvSub3D = 7, }; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c index 51f9fb1487..15c1c40925 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c @@ -120,86 +120,69 @@ nouveauCreateBuffer(__DRIscreenPrivate *driScrnPriv, GLboolean isPixmap) { nouveauScreenPtr screen = (nouveauScreenPtr) driScrnPriv->private; + nouveau_renderbuffer *nrb; + struct gl_framebuffer *fb; + const GLboolean swAccum = mesaVis->accumRedBits > 0; + const GLboolean swStencil = mesaVis->stencilBits > 0 && mesaVis->depthBits != 24; - if (isPixmap) { + if (isPixmap) return GL_FALSE; /* not implemented */ - } - else { - const GLboolean swDepth = GL_FALSE; - const GLboolean swAlpha = GL_FALSE; - const GLboolean swAccum = mesaVis->accumRedBits > 0; - const GLboolean swStencil = mesaVis->stencilBits > 0 && mesaVis->depthBits != 24; - struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); - - /* front color renderbuffer */ - { - driRenderbuffer *frontRb - = driNewRenderbuffer(GL_RGBA, - driScrnPriv->pFB + screen->frontOffset, - screen->fbFormat, - screen->frontOffset, screen->frontPitch, - driDrawPriv); - nouveauSpanSetFunctions(frontRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); - } - /* back color renderbuffer */ - if (mesaVis->doubleBufferMode) { - driRenderbuffer *backRb - = driNewRenderbuffer(GL_RGBA, - driScrnPriv->pFB + screen->backOffset, - screen->fbFormat, - screen->backOffset, screen->backPitch, - driDrawPriv); - nouveauSpanSetFunctions(backRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); - } + fb = _mesa_create_framebuffer(mesaVis); + if (!fb) + return GL_FALSE; - /* depth renderbuffer */ - if (mesaVis->depthBits == 16) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT16, - driScrnPriv->pFB + screen->depthOffset, - screen->fbFormat, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - nouveauSpanSetFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - } - else if (mesaVis->depthBits == 24) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT24, - driScrnPriv->pFB + screen->depthOffset, - screen->fbFormat, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - nouveauSpanSetFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); + /* Front buffer */ + nrb = nouveau_renderbuffer_new(GL_RGBA, + driScrnPriv->pFB + screen->frontOffset, + screen->frontOffset, + screen->frontPitch * 4, + driDrawPriv); + nouveauSpanSetFunctions(nrb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &nrb->mesa); + + if (0 /* unified buffers if we choose to support them.. */) { + } else { + if (mesaVis->doubleBufferMode) { + nrb = nouveau_renderbuffer_new(GL_RGBA, NULL, + 0, 0, + driDrawPriv); + nouveauSpanSetFunctions(nrb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &nrb->mesa); } - /* stencil renderbuffer */ - if (mesaVis->stencilBits > 0 && !swStencil) { - driRenderbuffer *stencilRb - = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, - driScrnPriv->pFB + screen->depthOffset, - screen->fbFormat, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - nouveauSpanSetFunctions(stencilRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); + if (mesaVis->depthBits == 24 && mesaVis->stencilBits == 8) { + nrb = nouveau_renderbuffer_new(GL_DEPTH24_STENCIL8_EXT, NULL, + 0, 0, + driDrawPriv); + nouveauSpanSetFunctions(nrb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &nrb->mesa); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &nrb->mesa); + } else if (mesaVis->depthBits == 24) { + nrb = nouveau_renderbuffer_new(GL_DEPTH_COMPONENT24, NULL, + 0, 0, + driDrawPriv); + nouveauSpanSetFunctions(nrb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &nrb->mesa); + } else if (mesaVis->depthBits == 16) { + nrb = nouveau_renderbuffer_new(GL_DEPTH_COMPONENT16, NULL, + 0, 0, + driDrawPriv); + nouveauSpanSetFunctions(nrb, mesaVis); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &nrb->mesa); } + } - _mesa_add_soft_renderbuffers(fb, - GL_FALSE, /* color */ - swDepth, - swStencil, - swAccum, - swAlpha, - GL_FALSE /* aux */); - driDrawPriv->driverPrivate = (void *) fb; + _mesa_add_soft_renderbuffers(fb, + GL_FALSE, /* color */ + GL_FALSE, /* depth */ + swStencil, + swAccum, + GL_FALSE, /* alpha */ + GL_FALSE /* aux */); - return (driDrawPriv->driverPrivate != NULL); - } + driDrawPriv->driverPrivate = (void *) fb; + return (driDrawPriv->driverPrivate != NULL); } @@ -363,7 +346,8 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc *driver_modes = nouveauFillInModes(dri_priv->bpp, (dri_priv->bpp == 16) ? 16 : 24, (dri_priv->bpp == 16) ? 0 : 8, - (dri_priv->back_offset != dri_priv->depth_offset)); + 1 + ); /* Calling driInitExtensions here, with a NULL context pointer, does not actually * enable the extensions. It just makes sure that all the dispatch offsets for all diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.c b/src/mesa/drivers/dri/nouveau/nouveau_shader.c index 63da8420b2..4dedefe5a3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.c @@ -132,6 +132,8 @@ nvsUpdateShader(GLcontext *ctx, nouveauShader *nvs) */ nvs->func->UpdateConst(ctx, nvs, i); } else if (plist->Parameters[i].Type == PROGRAM_STATE_VAR) { + if (!nvs->params[i].source_val) /* this is a workaround when consts aren't alloc'd from id=0.. */ + continue; /* update any changed state parameters */ if (!TEST_EQ_4V(nvs->params[i].val, nvs->params[i].source_val)) nvs->func->UpdateConst(ctx, nvs, i); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index a1e7794487..dce2e23f46 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -2,6 +2,7 @@ #define __SHADER_COMMON_H__ #include "mtypes.h" +#include "nouveau_buffers.h" typedef struct _nvsFunc nvsFunc; @@ -40,6 +41,7 @@ typedef struct _nouveauShader { unsigned int program_alloc_size; unsigned int program_start_id; unsigned int program_current; + nouveau_mem *program_buffer; unsigned int inputs_read; unsigned int outputs_written; int inst_count; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_span.c b/src/mesa/drivers/dri/nouveau/nouveau_span.c index 1763b37e53..6d99728b85 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_span.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_span.c @@ -109,14 +109,10 @@ void nouveauSpanInitFunctions( GLcontext *ctx ) * Plug in the Get/Put routines for the given driRenderbuffer. */ void -nouveauSpanSetFunctions(driRenderbuffer *drb, const GLvisual *vis) +nouveauSpanSetFunctions(nouveau_renderbuffer *nrb, const GLvisual *vis) { - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { - nouveauInitPointers_RGB565(&drb->Base); - } - else { - nouveauInitPointers_ARGB8888(&drb->Base); - } - } + if (nrb->mesa._ActualFormat == GL_RGBA8) + nouveauInitPointers_ARGB8888(&nrb->mesa); + else if (nrb->mesa._ActualFormat == GL_RGB5) + nouveauInitPointers_RGB565(&nrb->mesa); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_span.h b/src/mesa/drivers/dri/nouveau/nouveau_span.h index f5e5733ba8..bc39ecd17b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_span.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_span.h @@ -30,9 +30,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define __NOUVEAU_SPAN_H__ #include "drirenderbuffer.h" +#include "nouveau_buffers.h" extern void nouveauSpanInitFunctions( GLcontext *ctx ); -extern void nouveauSpanSetFunctions(driRenderbuffer *rb, const GLvisual *vis); +extern void nouveauSpanSetFunctions(nouveau_renderbuffer *nrb, const GLvisual *vis); #endif /* __NOUVEAU_SPAN_H__ */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c b/src/mesa/drivers/dri/nouveau/nouveau_state.c index 88a8c9ed59..6406b2d9cd 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c @@ -59,20 +59,33 @@ static void nouveauCalcViewport(GLcontext *ctx) /* Calculate the Viewport Matrix */ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_renderbuffer *nrb; const GLfloat *v = ctx->Viewport._WindowMap.m; GLfloat *m = nmesa->viewport.m; + GLfloat xoffset, yoffset; GLint h = 0; - - if (nmesa->driDrawable) - h = nmesa->driDrawable->h + SUBPIXEL_Y; - + + nrb = nouveau_current_draw_buffer(ctx); + nmesa->depth_scale = 1.0 / ctx->DrawBuffer->_DepthMaxF; + + if (nrb && nrb->map) { + /* Window */ + xoffset = nrb->dPriv->x; + yoffset = nrb->dPriv->y; + } else { + /* Offscreen or back buffer */ + xoffset = 0.0; + yoffset = 0.0; + } + m[MAT_SX] = v[MAT_SX]; - m[MAT_TX] = v[MAT_TX] + SUBPIXEL_X; + m[MAT_TX] = v[MAT_TX] + xoffset + SUBPIXEL_X; m[MAT_SY] = - v[MAT_SY]; - m[MAT_TY] = - v[MAT_TY] + h; + m[MAT_TY] = v[MAT_TY] + yoffset + SUBPIXEL_Y; m[MAT_SZ] = v[MAT_SZ] * nmesa->depth_scale; m[MAT_TZ] = v[MAT_TZ] * nmesa->depth_scale; + nmesa->hw_func.WindowMoved(nmesa); } static void nouveauViewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) @@ -96,7 +109,7 @@ static void nouveauViewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei nouveauCalcViewport(ctx); } -static void nouveauDepthRange(GLcontext *ctx) +static void nouveauDepthRange(GLcontext *ctx, GLclampd near, GLclampd far) { nouveauCalcViewport(ctx); } @@ -161,15 +174,15 @@ void nouveauDDInitState(nouveauContextPtr nmesa) /* No TCL engines for these ones */ break; case NV_10: - nv10InitStateFuncs(&nmesa->glCtx->Driver); + nv10InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; case NV_20: - nv20InitStateFuncs(&nmesa->glCtx->Driver); + nv20InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; case NV_30: case NV_40: case NV_50: - nv30InitStateFuncs(&nmesa->glCtx->Driver); + nv30InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; default: break; @@ -270,7 +283,6 @@ void nouveauInitState(GLcontext *ctx) STATE_INIT(CullFace)( ctx, ctx->Polygon.CullFaceMode ); STATE_INIT(DepthFunc)( ctx, ctx->Depth.Func ); STATE_INIT(DepthMask)( ctx, ctx->Depth.Mask ); - STATE_INIT(DepthRange)( ctx, ctx->Viewport.Near, ctx->Viewport.Far ); STATE_INIT(Enable)( ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled ); STATE_INIT(Enable)( ctx, GL_BLEND, ctx->Color.BlendEnabled ); @@ -320,8 +332,6 @@ void nouveauInitState(GLcontext *ctx) ctx->Polygon.OffsetFactor, ctx->Polygon.OffsetUnits ); STATE_INIT(PolygonStipple)( ctx, (const GLubyte *)ctx->PolygonStipple ); - STATE_INIT(Scissor)( ctx, ctx->Scissor.X, ctx->Scissor.Y, - ctx->Scissor.Width, ctx->Scissor.Height ); STATE_INIT(ShadeModel)( ctx, ctx->Light.ShadeModel ); STATE_INIT(StencilFuncSeparate)( ctx, GL_FRONT, ctx->Stencil.Function[0], @@ -341,10 +351,4 @@ void nouveauInitState(GLcontext *ctx) ctx->Stencil.FailFunc[1], ctx->Stencil.ZFailFunc[1], ctx->Stencil.ZPassFunc[1]); - - STATE_INIT(Viewport)( ctx, - ctx->Viewport.X, ctx->Viewport.Y, - ctx->Viewport.Width, ctx->Viewport.Height ); - - STATE_INIT(DrawBuffer)( ctx, ctx->Color.DrawBuffer[0] ); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.h b/src/mesa/drivers/dri/nouveau/nouveau_state.h index 37f04f41bd..16d63a6ac2 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.h @@ -32,9 +32,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. extern void nouveauDDInitState(nouveauContextPtr nmesa); extern void nouveauDDInitStateFuncs(GLcontext *ctx); -extern void nv10InitStateFuncs(struct dd_function_table *func); -extern void nv20InitStateFuncs(struct dd_function_table *func); -extern void nv30InitStateFuncs(struct dd_function_table *func); +extern void nv10InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); +extern void nv20InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); +extern void nv30InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); extern void nouveauInitState(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c index 1c2664ec70..98aa27ea9c 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -10,6 +10,7 @@ #include "nouveau_shader.h" #include "nouveau_object.h" #include "nouveau_msg.h" +#include "nouveau_buffers.h" #include "nv30_shader.h" unsigned int NVFP_TX_AOP_COUNT = 64; @@ -19,31 +20,22 @@ struct _op_xlat NVFP_TX_AOP[64]; * Support routines */ -/*XXX: bad bad bad bad */ -static uint64_t fragprog_ofs; -static uint32_t *fragprog_buf = NULL; - static void NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); drm_nouveau_mem_alloc_t mem; - if (!fragprog_buf) { - mem.flags = NOUVEAU_MEM_FB|NOUVEAU_MEM_MAPPED; - mem.size = nvs->program_size * sizeof(uint32_t); - mem.alignment = 0; - mem.region_offset = &fragprog_ofs; - if (drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_MEM_ALLOC, &mem, - sizeof(mem))) { - fprintf(stderr, "MEM_ALLOC fail\n"); - return; - } + if (!nvs->program_buffer) { + nouveau_mem *fpbuf; - if (drmMap(nmesa->driFd, fragprog_ofs, mem.size, &fragprog_buf)) { - fprintf(stderr, "MEM_MAP fail\n"); + fpbuf = nouveau_mem_alloc(ctx, NOUVEAU_MEM_FB|NOUVEAU_MEM_MAPPED, + nvs->program_size * sizeof(uint32_t), 0); + if (!fpbuf) { + fprintf(stderr, "fragprog vram alloc fail!\n"); return; } + nvs->program_buffer = fpbuf; } /*XXX: should do a DMA.. and not copy over a possibly in-use program.. */ @@ -52,9 +44,10 @@ NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) * caches the program somewhere? so, maybe not so bad to just clobber the * old program in vram.. */ - memcpy(fragprog_buf, nvs->program, nvs->program_size * sizeof(uint32_t)); + memcpy(nvs->program_buffer->map, nvs->program, + nvs->program_size * sizeof(uint32_t)); BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1); - OUT_RING(((uint32_t)fragprog_ofs-0xE0000000)|1); + OUT_RING(nouveau_mem_gpu_offset_get(ctx, nvs->program_buffer) | 1); } static void diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index 3ffb5d3a41..3228320623 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -304,10 +304,16 @@ static void nv30Enable(GLcontext *ctx, GLenum cap, GLboolean state) // case GL_POST_COLOR_MATRIX_COLOR_TABLE: // case GL_POST_CONVOLUTION_COLOR_TABLE: // case GL_RESCALE_NORMAL: -// case GL_SCISSOR_TEST: + case GL_SCISSOR_TEST: + /* No enable bit, nv30Scissor will adjust to max range */ + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); + break; // case GL_SEPARABLE_2D: case GL_STENCIL_TEST: // TODO BACK and FRONT ? + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_FRONT_ENABLE, 1); + OUT_RING_CACHE(state); BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_BACK_ENABLE, 1); OUT_RING_CACHE(state); break; @@ -514,9 +520,26 @@ void (*RenderMode)(GLcontext *ctx, GLenum mode ); static void nv30Scissor(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_renderbuffer *nrb; + + /* Adjust offsets if drawing to a window */ + nrb = nouveau_current_draw_buffer(ctx); + if (nrb && nrb->map) { + x += nrb->dPriv->x; + y += nrb->dPriv->y; + } + + /* There's no scissor enable bit, so adjust the scissor to cover the + * maximum draw buffer bounds + */ + if (!ctx->Scissor.Enabled) { + x = y = 0; + w = h = 4095; + } + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_SCISSOR_WIDTH_XPOS, 2); - OUT_RING_CACHE((w << 16) | x); - OUT_RING_CACHE((h << 16) | y); + OUT_RING_CACHE(((w) << 16) | x); + OUT_RING_CACHE(((h) << 16) | y); } /** Select flat or smooth shading */ @@ -602,18 +625,117 @@ static void nv30TextureMatrix(GLcontext *ctx, GLuint unit, const GLmatrix *mat) OUT_RING_CACHEp(mat->m, 16); } -/** Set the viewport */ -static void nv30Viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) +static void nv30WindowMoved(nouveauContextPtr nmesa) { - /* TODO: Where do the VIEWPORT_XFRM_* regs come in? */ - nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + GLcontext *ctx = nmesa->glCtx; + nouveau_renderbuffer *nrb; + GLfloat *v = nmesa->viewport.m; + GLuint w = ctx->Viewport.Width; + GLuint h = ctx->Viewport.Height; + GLuint x = ctx->Viewport.X; + GLuint y = ctx->Viewport.Y; + + /* Adjust offsets if drawing to a window */ + nrb = nouveau_current_draw_buffer(ctx); + if (nrb && nrb->map) { + x += nrb->dPriv->x; + y += nrb->dPriv->y; + } + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_DIMS_0, 2); OUT_RING_CACHE((w << 16) | x); OUT_RING_CACHE((h << 16) | y); -} - -void nv30InitStateFuncs(struct dd_function_table *func) + /* something to do with clears, possibly doesn't belong here */ + BEGIN_RING_CACHE(NvSub3D, + NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_OFS0, 2); + OUT_RING_CACHE(((w+x) << 16) | x); + OUT_RING_CACHE(((h+y) << 16) | y); + /* viewport transform */ + BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_XFRM_OX, 8); + OUT_RING_CACHEf (v[MAT_TX]); + OUT_RING_CACHEf (v[MAT_TY]); + OUT_RING_CACHEf (v[MAT_TZ]); + OUT_RING_CACHEf (0.0); + OUT_RING_CACHEf (v[MAT_SX]); + OUT_RING_CACHEf (v[MAT_SY]); + OUT_RING_CACHEf (v[MAT_SZ]); + OUT_RING_CACHEf (0.0); + + ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y, + ctx->Scissor.Width, ctx->Scissor.Height); +} + +static GLboolean nv30InitCard(nouveauContextPtr nmesa) +{ + /* Need some love.. */ + return GL_FALSE; +} + +static GLboolean nv40InitCard(nouveauContextPtr nmesa) +{ + nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); + + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT1, 2); + OUT_RING(NvDmaFB); + OUT_RING(NvDmaFB); + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT8, 1); + OUT_RING(NvDmaFB); + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT4, 2); + OUT_RING(NvDmaFB); + OUT_RING(NvDmaFB); + BEGIN_RING_SIZE(NvSub3D, 0x0220, 1); + OUT_RING(1); + BEGIN_RING_SIZE(NvSub3D, 0x1fc8, 2); + OUT_RING(0xedcba987); + OUT_RING(0x00000021); + BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); + OUT_RING(0x03008000); + + return GL_TRUE; +} + +static GLboolean nv30BindBuffers(nouveauContextPtr nmesa, int num_color, + nouveau_renderbuffer **color, + nouveau_renderbuffer *depth) +{ + nouveau_renderbuffer *nrb; + GLuint x, y, w, h; + + /* Adjust offsets if drawing to a window */ + nrb = nouveau_current_draw_buffer(nmesa->glCtx); + w = nrb->mesa.Width; + h = nrb->mesa.Height; + if (nrb && nrb->map) { + x = nrb->dPriv->x; + y = nrb->dPriv->y; + } else { + x = 0; + y = 0; + } + + if (num_color != 1) + return GL_FALSE; + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_DIM0, 5); + OUT_RING (((w+x)<<16)|x); + OUT_RING (((h+y)<<16)|y); + OUT_RING (0x148); + OUT_RING (color[0]->pitch); + OUT_RING (color[0]->offset); + + if (depth) { + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_DEPTH_OFFSET, 1); + OUT_RING (depth->offset); + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_LMA_DEPTH_BUFFER_PITCH, 1); + OUT_RING (depth->pitch); + } + + return GL_TRUE; +} + +void nv30InitStateFuncs(GLcontext *ctx, struct dd_function_table *func) { + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + func->AlphaFunc = nv30AlphaFunc; func->BlendColor = nv30BlendColor; func->BlendEquationSeparate = nv30BlendEquationSeparate; @@ -628,7 +750,6 @@ void nv30InitStateFuncs(struct dd_function_table *func) func->FrontFace = nv30FrontFace; func->DepthFunc = nv30DepthFunc; func->DepthMask = nv30DepthMask; - func->DepthRange = nv30DepthRange; func->Enable = nv30Enable; func->Fogfv = nv30Fogfv; func->Hint = nv30Hint; @@ -656,6 +777,13 @@ void nv30InitStateFuncs(struct dd_function_table *func) func->TexParameter = nv30TexParameter; #endif func->TextureMatrix = nv30TextureMatrix; - func->Viewport = nv30Viewport; + + + if (nmesa->screen->card->type >= NV_40) + nmesa->hw_func.InitCard = nv40InitCard; + else + nmesa->hw_func.InitCard = nv30InitCard; + nmesa->hw_func.BindBuffers = nv30BindBuffers; + nmesa->hw_func.WindowMoved = nv30WindowMoved; } -- cgit v1.2.3 From 15c7e8896ba4c0fedbe3510cb04c44ba3e8d644b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 14 Dec 2006 03:24:57 +0000 Subject: Some more voodoo to get 3D going with a minimal initial context. --- src/mesa/drivers/dri/nouveau/nouveau_object.c | 2 ++ src/mesa/drivers/dri/nouveau/nv30_state.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index ef8a428c22..dda547c916 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -73,6 +73,8 @@ void nouveauObjectInit(nouveauContextPtr nmesa) nouveauObjectOnSubchannel(nmesa, NvSubImageBlit, NvImageBlit); BEGIN_RING_SIZE(NvSubImageBlit, NV10_IMAGE_BLIT_SET_CONTEXT_SURFACES_2D, 1); OUT_RING(NvCtxSurf2D); + BEGIN_RING_SIZE(NvSubImageBlit, NV10_IMAGE_BLIT_SET_OPERATION, 1); + OUT_RING(3); /* SRCCOPY */ #endif nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index e30dc8a37c..eb3606b6e1 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -736,9 +736,28 @@ static GLboolean nv40InitCard(nouveauContextPtr nmesa) OUT_RING(NvDmaFB); BEGIN_RING_SIZE(NvSub3D, 0x0220, 1); OUT_RING(1); + + BEGIN_RING_SIZE(NvSub3D, 0x1ea4, 3); + OUT_RING(0x00000010); + OUT_RING(0x01000100); + OUT_RING(0xff800006); + BEGIN_RING_SIZE(NvSub3D, 0x1fc4, 1); + OUT_RING(0x06144321); BEGIN_RING_SIZE(NvSub3D, 0x1fc8, 2); OUT_RING(0xedcba987); OUT_RING(0x00000021); + BEGIN_RING_SIZE(NvSub3D, 0x1fd0, 1); + OUT_RING(0x00171615); + BEGIN_RING_SIZE(NvSub3D, 0x1fd4, 1); + OUT_RING(0x001b1a19); + + BEGIN_RING_SIZE(NvSub3D, 0x1ef8, 1); + OUT_RING(0x0020ffff); + BEGIN_RING_SIZE(NvSub3D, 0x1d64, 1); + OUT_RING(0x00d30000); + BEGIN_RING_SIZE(NvSub3D, 0x1e94, 1); + OUT_RING(0x00000001); + BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); OUT_RING(0x03008000); -- cgit v1.2.3 From c0a63d8e5e33b7fe3057e32f04c22969ac2adc1d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 26 Dec 2006 20:59:49 +1100 Subject: nouveau: Add notifier support functions --- src/mesa/drivers/dri/nouveau/Makefile | 1 + src/mesa/drivers/dri/nouveau/nouveau_context.c | 1 + src/mesa/drivers/dri/nouveau/nouveau_context.h | 4 + src/mesa/drivers/dri/nouveau/nouveau_object.c | 14 +-- src/mesa/drivers/dri/nouveau/nouveau_object.h | 16 +++- src/mesa/drivers/dri/nouveau/nouveau_sync.c | 115 +++++++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_sync.h | 36 ++++++++ 7 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_sync.c create mode 100644 src/mesa/drivers/dri/nouveau/nouveau_sync.h (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 1a76169156..962978dc7f 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -24,6 +24,7 @@ DRIVER_SOURCES = \ nouveau_shader_2.c \ nouveau_tex.c \ nouveau_swtcl.c \ + nouveau_sync.c \ nv10_swtcl.c \ nv10_state.c \ nv20_state.c \ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index ac940ac595..7aca31d0d3 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -222,6 +222,7 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, break; } + nouveauSyncInitFuncs(ctx); nmesa->hw_func.InitCard(nmesa); nouveauInitState(ctx); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index ea28506b74..f54ac9a7c8 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -40,6 +40,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_state_cache.h" #include "nouveau_buffers.h" #include "nouveau_shader.h" +#include "nouveau_sync.h" #include "xmlconfig.h" @@ -101,6 +102,9 @@ typedef struct nouveau_context { uint64_t vram_phys; uint64_t agp_phys; + /* Channel synchronisation */ + nouveau_notifier *syncNotifier; + /* Additional hw-specific functions */ nouveau_hw_func hw_func; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index dda547c916..cf7284d2d5 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -4,7 +4,7 @@ #include "nouveau_reg.h" -static GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) +GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, int handle, int class, uint32_t flags, uint32_t dma_in, uint32_t dma_out, uint32_t dma_notifier) { drm_nouveau_object_init_t cto; int ret; @@ -20,12 +20,12 @@ static GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, int handle, return ret == 0; } -static GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, - uint32_t handle, - uint32_t offset, - uint32_t size, - int target, - int access) +GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, + uint32_t handle, + uint32_t offset, + uint32_t size, + int target, + int access) { drm_nouveau_dma_object_init_t dma; int ret; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.h b/src/mesa/drivers/dri/nouveau/nouveau_object.h index a49a39719b..87f2dc9ae7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.h @@ -12,7 +12,8 @@ enum DMAObjects { NvCtxSurf2D = 0x80000020, NvImageBlit = 0x80000021, NvDmaFB = 0xD0FB0001, - NvDmaAGP = 0xD0AA0001 + NvDmaAGP = 0xD0AA0001, + NvSyncNotify = 0xD0000001 }; enum DMASubchannel { @@ -22,4 +23,17 @@ enum DMASubchannel { }; extern void nouveauObjectOnSubchannel(nouveauContextPtr nmesa, int subchannel, int handle); + +extern GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, + int handle, int class, + uint32_t flags, + uint32_t dma_in, + uint32_t dma_out, + uint32_t dma_notifier); +extern GLboolean nouveauCreateDmaObject(nouveauContextPtr nmesa, + uint32_t handle, + uint32_t offset, + uint32_t size, + int target, + int access); #endif diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.c b/src/mesa/drivers/dri/nouveau/nouveau_sync.c new file mode 100644 index 0000000000..698f778c4b --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.c @@ -0,0 +1,115 @@ +#include "vblank.h" /* for DO_USLEEP */ + +#include "nouveau_context.h" +#include "nouveau_buffers.h" +#include "nouveau_object.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" +#include "nouveau_msg.h" +#include "nouveau_sync.h" + +nouveau_notifier * +nouveau_notifier_new(GLcontext *ctx, GLuint handle) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + nouveau_notifier *notifier; + + notifier = CALLOC_STRUCT(nouveau_notifier_t); + if (!notifier) + return NULL; + + notifier->mem = nouveau_mem_alloc(ctx, NOUVEAU_MEM_FB, 32, 0); + if (!notifier->mem) { + FREE(notifier); + return NULL; + } + + if (!nouveauCreateDmaObject(nmesa, handle, notifier->mem->offset, + notifier->mem->size, + 0 /* NV_DMA_TARGET_FB */, + 0 /* NV_DMA_ACCESS_RW */)) { + nouveau_mem_free(ctx, notifier->mem); + FREE(notifier); + return NULL; + } + + notifier->handle = handle; + return notifier; +} + +void +nouveau_notifier_destroy(GLcontext *ctx, nouveau_notifier *notifier) +{ + /*XXX: free DMA object.. */ + nouveau_mem_free(ctx, notifier->mem); + FREE(notifier); +} + +void +nouveau_notifier_reset(nouveau_notifier *notifier) +{ + volatile GLuint *n = notifier->mem->map; + + n[NV_NOTIFY_TIME_0 /4] = 0x00000000; + n[NV_NOTIFY_TIME_1 /4] = 0x00000000; + n[NV_NOTIFY_RETURN_VALUE/4] = 0x00000000; + n[NV_NOTIFY_STATE /4] = (NV_NOTIFY_STATE_STATUS_IN_PROCESS << + NV_NOTIFY_STATE_STATUS_SHIFT); +} + +GLboolean +nouveau_notifier_wait_status(nouveau_notifier *notifier, GLuint status, + GLuint timeout) +{ + volatile GLuint *n = notifier->mem->map; + unsigned int time = 0; + + while (time <= timeout) { + if (n[NV_NOTIFY_STATE] & NV_NOTIFY_STATE_ERROR_CODE_MASK) { + MESSAGE("Notifier returned error: 0x%04x\n", + n[NV_NOTIFY_STATE] & + NV_NOTIFY_STATE_ERROR_CODE_MASK); + return GL_FALSE; + } + + if (((n[NV_NOTIFY_STATE] & NV_NOTIFY_STATE_STATUS_MASK) >> + NV_NOTIFY_STATE_STATUS_SHIFT) == status) + return GL_TRUE; + + if (timeout) { + DO_USLEEP(1); + time++; + } + } + + MESSAGE("Notifier timed out\n"); + return GL_FALSE; +} + +void +nouveau_notifier_wait_nop(GLcontext *ctx, nouveau_notifier *notifier, + GLuint subc) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + GLboolean ret; + + nouveau_notifier_reset(notifier); + + BEGIN_RING_SIZE(subc, NV_NOTIFY, 1); + OUT_RING (NV_NOTIFY_STYLE_WRITE_ONLY); + BEGIN_RING_SIZE(subc, NV_NOP, 1); + OUT_RING (0); + + ret = nouveau_notifier_wait_status(notifier, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0 /* no timeout */); + if (ret) MESSAGE("wait on notifier failed\n"); +} + +void nouveauSyncInitFuncs(GLcontext *ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + nmesa->syncNotifier = nouveau_notifier_new(ctx, NvSyncNotify); +} + diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.h b/src/mesa/drivers/dri/nouveau/nouveau_sync.h new file mode 100644 index 0000000000..b20c2565ca --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.h @@ -0,0 +1,36 @@ +#ifndef __NOUVEAU_SYNC_H__ +#define __NOUVEAU_SYNC_H__ + +#include "nouveau_buffers.h" + +#define NV_NOTIFY_TIME_0 0x00000000 +#define NV_NOTIFY_TIME_1 0x00000004 +#define NV_NOTIFY_RETURN_VALUE 0x00000008 +#define NV_NOTIFY_STATE 0x0000000C +#define NV_NOTIFY_STATE_STATUS_MASK 0xFF000000 +#define NV_NOTIFY_STATE_STATUS_SHIFT 24 +#define NV_NOTIFY_STATE_STATUS_COMPLETED 0x00 +#define NV_NOTIFY_STATE_STATUS_IN_PROCESS 0x01 +#define NV_NOTIFY_STATE_ERROR_CODE_MASK 0x0000FFFF +#define NV_NOTIFY_STATE_ERROR_CODE_SHIFT 0 + +/* Methods that (hopefully) all objects have */ +#define NV_NOP 0x00000100 +#define NV_NOTIFY 0x00000104 +#define NV_NOTIFY_STYLE_WRITE_ONLY 0 + +typedef struct nouveau_notifier_t { + GLuint handle; + nouveau_mem *mem; +} nouveau_notifier; + +extern nouveau_notifier *nouveau_notifier_new(GLcontext *, GLuint handle); +extern void nouveau_notifier_destroy(GLcontext *, nouveau_notifier *); +extern void nouveau_notifier_reset(nouveau_notifier *); +extern GLboolean nouveau_notifier_wait_status(nouveau_notifier *r, + GLuint status, GLuint timeout); +extern void nouveau_notifier_wait_nop(GLcontext *ctx, + nouveau_notifier *, GLuint subc); + +extern void nouveauSyncInitFuncs(GLcontext *ctx); +#endif -- cgit v1.2.3 From 297a35eb69382193a4cc9ba4b51619984a8969db Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 27 Dec 2006 00:02:38 +1100 Subject: nouveau: Add simple wrapper for NV_MEMORY_TO_MEMORY_FORMAT. --- src/mesa/drivers/dri/nouveau/nouveau_buffers.c | 59 ++++++++++++++++++++++++++ src/mesa/drivers/dri/nouveau/nouveau_buffers.h | 5 +++ src/mesa/drivers/dri/nouveau/nouveau_object.c | 10 ++++- src/mesa/drivers/dri/nouveau/nouveau_object.h | 2 + 4 files changed, 75 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_buffers.c b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c index f6a03ecd9c..92329e514f 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_buffers.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c @@ -5,6 +5,65 @@ #include "nouveau_context.h" #include "nouveau_buffers.h" +#include "nouveau_object.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" +#include "nouveau_msg.h" + +#define MAX_MEMFMT_LENGTH 32768 + +/* Unstrided blit using NV_MEMORY_TO_MEMORY_FORMAT */ +GLboolean +nouveau_memformat_flat_emit(GLcontext *ctx, + nouveau_mem *dst, nouveau_mem *src, + GLuint dst_offset, GLuint src_offset, + GLuint size) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + uint32_t src_handle, dst_handle; + GLuint count; + + if (src_offset + size > src->size) { + MESSAGE("src out of nouveau_mem bounds\n"); + return GL_FALSE; + } + if (dst_offset + size > dst->size) { + MESSAGE("dst out of nouveau_mem bounds\n"); + return GL_FALSE; + } + + src_handle = (src->type & NOUVEAU_MEM_FB) ? NvDmaFB : NvDmaAGP; + dst_handle = (src->type & NOUVEAU_MEM_FB) ? NvDmaFB : NvDmaAGP; + src_offset += nouveau_mem_gpu_offset_get(ctx, src); + dst_offset += nouveau_mem_gpu_offset_get(ctx, dst); + + BEGIN_RING_SIZE(NvSubMemFormat, NV_MEMORY_TO_MEMORY_FORMAT_OBJECT_IN, 2); + OUT_RING (src_handle); + OUT_RING (dst_handle); + + count = (size / MAX_MEMFMT_LENGTH) + ((size % MAX_MEMFMT_LENGTH) ? 1 : 0); + + while (count--) { + GLuint length = (size > MAX_MEMFMT_LENGTH) ? MAX_MEMFMT_LENGTH : size; + + BEGIN_RING_SIZE(NvSubMemFormat, NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RING (src_offset); + OUT_RING (dst_offset); + OUT_RING (0); /* pitch in */ + OUT_RING (0); /* pitch out */ + OUT_RING (length); /* line length */ + OUT_RING (1); /* number of lines */ + OUT_RING ((1 << 8) /* dst_inc */ | (1 << 0) /* src_inc */); + OUT_RING (0); /* buffer notify? */ + FIRE_RING(); + + src_offset += length; + dst_offset += length; + size -= length; + } + + return GL_TRUE; +} void nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_buffers.h b/src/mesa/drivers/dri/nouveau/nouveau_buffers.h index bb297ad558..a8d85b279b 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_buffers.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_buffers.h @@ -18,6 +18,11 @@ extern nouveau_mem *nouveau_mem_alloc(GLcontext *ctx, int type, extern void nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem); extern uint32_t nouveau_mem_gpu_offset_get(GLcontext *ctx, nouveau_mem *mem); +extern GLboolean nouveau_memformat_flat_emit(GLcontext *ctx, + nouveau_mem *dst, nouveau_mem *src, + GLuint dst_offset, GLuint src_offset, + GLuint size); + typedef struct nouveau_renderbuffer_t { struct gl_renderbuffer mesa; /* must be first! */ __DRIdrawablePrivate *dPriv; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index cf7284d2d5..1558f2963d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -52,10 +52,13 @@ void nouveauObjectInit(nouveauContextPtr nmesa) return; #endif -/* We need to know vram size.. */ +/* We need to know vram size.. and AGP size (and even if the card is AGP..) */ nouveauCreateDmaObject( nmesa, NvDmaFB, 0, (256*1024*1024), 0 /*NV_DMA_TARGET_FB*/, 0 /*NV_DMA_ACCESS_RW*/); + nouveauCreateDmaObject( nmesa, NvDmaAGP, + nmesa->agp_phys, (128*1024*1024), + 3 /* AGP */, 0 /* RW */); nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); @@ -63,6 +66,9 @@ void nouveauObjectInit(nouveauContextPtr nmesa) 0, 0, 0, 0); nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT, NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY, 0, 0, 0); + nouveauCreateContextObject(nmesa, NvMemFormat, + NV_MEMORY_TO_MEMORY_FORMAT, + 0, 0, 0, 0); #ifdef ALLOW_MULTI_SUBCHANNEL nouveauObjectOnSubchannel(nmesa, NvSubCtxSurf2D, NvCtxSurf2D); @@ -75,6 +81,8 @@ void nouveauObjectInit(nouveauContextPtr nmesa) OUT_RING(NvCtxSurf2D); BEGIN_RING_SIZE(NvSubImageBlit, NV10_IMAGE_BLIT_SET_OPERATION, 1); OUT_RING(3); /* SRCCOPY */ + + nouveauObjectOnSubchannel(nmesa, NvSubMemFormat, NvMemFormat); #endif nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.h b/src/mesa/drivers/dri/nouveau/nouveau_object.h index 87f2dc9ae7..d5fcc6df8d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.h @@ -11,6 +11,7 @@ enum DMAObjects { Nv3D = 0x80000019, NvCtxSurf2D = 0x80000020, NvImageBlit = 0x80000021, + NvMemFormat = 0x80000022, NvDmaFB = 0xD0FB0001, NvDmaAGP = 0xD0AA0001, NvSyncNotify = 0xD0000001 @@ -19,6 +20,7 @@ enum DMAObjects { enum DMASubchannel { NvSubCtxSurf2D = 0, NvSubImageBlit = 1, + NvSubMemFormat = 2, NvSub3D = 7, }; -- cgit v1.2.3 From 3c0961d29993a2203323b4c308ae6d7e418ac5aa Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 21 Jan 2007 04:06:57 +0100 Subject: nouveau: add nv04 state support, and small nv04 fixes. --- src/mesa/drivers/dri/nouveau/Makefile | 11 +- src/mesa/drivers/dri/nouveau/nouveau_context.c | 3 +- src/mesa/drivers/dri/nouveau/nouveau_context.h | 18 +- src/mesa/drivers/dri/nouveau/nouveau_object.c | 11 +- src/mesa/drivers/dri/nouveau/nouveau_state.c | 4 +- src/mesa/drivers/dri/nouveau/nouveau_state.h | 1 + src/mesa/drivers/dri/nouveau/nv04_state.c | 497 +++++++++++++++++++++++++ 7 files changed, 527 insertions(+), 18 deletions(-) create mode 100644 src/mesa/drivers/dri/nouveau/nv04_state.c (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile index 9718f3bf46..492e743360 100644 --- a/src/mesa/drivers/dri/nouveau/Makefile +++ b/src/mesa/drivers/dri/nouveau/Makefile @@ -19,6 +19,7 @@ DRIVER_SOURCES = \ nouveau_screen.c \ nouveau_span.c \ nouveau_state.c \ + nouveau_state_cache.c \ nouveau_shader.c \ nouveau_shader_0.c \ nouveau_shader_1.c \ @@ -26,18 +27,18 @@ DRIVER_SOURCES = \ nouveau_tex.c \ nouveau_swtcl.c \ nouveau_sync.c \ + nv04_state.c \ nv04_swtcl.c \ - nv10_swtcl.c \ nv10_state.c \ + nv10_swtcl.c \ nv20_state.c \ - nv30_state.c \ - nv50_state.c \ - nouveau_state_cache.c \ nv20_vertprog.c \ + nv30_state.c \ nv30_fragprog.c \ nv30_vertprog.c \ nv40_fragprog.c \ - nv40_vertprog.c + nv40_vertprog.c \ + nv50_state.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index 79da46fc0b..5db93eb012 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -50,6 +50,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "nouveau_msg.h" #include "nouveau_reg.h" #include "nouveau_lock.h" +#include "nv04_swtcl.h" #include "nv10_swtcl.h" #include "vblank.h" @@ -212,7 +213,7 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, break; case NV_04: case NV_05: - //nv04TriInitFunctions( ctx ); + nv04TriInitFunctions( ctx ); break; case NV_10: case NV_20: diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index c7bf387210..bcfbb9fb8d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -170,15 +170,15 @@ typedef struct nouveau_context { /* Configuration cache */ driOptionCache optionCache; - /* vblank stuff */ - uint32_t vblank_flags; - uint32_t vblank_seq; - - GLuint new_state; - GLuint new_render_state; - GLuint render_index; - GLmatrix viewport; - GLfloat depth_scale; + /* vblank stuff */ + uint32_t vblank_flags; + uint32_t vblank_seq; + + GLuint new_state; + GLuint new_render_state; + GLuint render_index; + GLmatrix viewport; + GLfloat depth_scale; }nouveauContextRec, *nouveauContextPtr; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 1558f2963d..26086e16e4 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -62,10 +62,17 @@ void nouveauObjectInit(nouveauContextPtr nmesa) nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d, 0, 0, 0, 0); - nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV10_CONTEXT_SURFACES_2D, + if (nmesa->screen->card->type>=NV_10) { + nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV10_CONTEXT_SURFACES_2D, 0, 0, 0, 0); - nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT, + nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT, NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY, 0, 0, 0); + } else { + nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D, + 0, 0, 0, 0); + nouveauCreateContextObject(nmesa, NvImageBlit, NV_IMAGE_BLIT, + NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY, 0, 0, 0); + } nouveauCreateContextObject(nmesa, NvMemFormat, NV_MEMORY_TO_MEMORY_FORMAT, 0, 0, 0, 0); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c b/src/mesa/drivers/dri/nouveau/nouveau_state.c index 1ff881f054..18f6ffb2ad 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c @@ -156,9 +156,11 @@ void nouveauDDInitState(nouveauContextPtr nmesa) switch(type) { case NV_03: + /* Unimplemented */ + break; case NV_04: case NV_05: - /* No TCL engines for these ones */ + nv04InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; case NV_10: nv10InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.h b/src/mesa/drivers/dri/nouveau/nouveau_state.h index 16d63a6ac2..5b85287445 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.h @@ -32,6 +32,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. extern void nouveauDDInitState(nouveauContextPtr nmesa); extern void nouveauDDInitStateFuncs(GLcontext *ctx); +extern void nv04InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); extern void nv10InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); extern void nv20InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); extern void nv30InitStateFuncs(GLcontext *ctx, struct dd_function_table *func); diff --git a/src/mesa/drivers/dri/nouveau/nv04_state.c b/src/mesa/drivers/dri/nouveau/nv04_state.c new file mode 100644 index 0000000000..83ad8ae432 --- /dev/null +++ b/src/mesa/drivers/dri/nouveau/nv04_state.c @@ -0,0 +1,497 @@ +/************************************************************************** + +Copyright 2007 Stephane Marchesin +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#include "nouveau_context.h" +#include "nouveau_object.h" +#include "nouveau_fifo.h" +#include "nouveau_reg.h" +#include "nouveau_msg.h" + +#include "tnl/t_pipeline.h" + +#include "mtypes.h" +#include "colormac.h" + +static uint32_t nv04_compare_func(GLcontext *ctx,GLuint f) +{ + switch ( ctx->Color.AlphaFunc ) { + case GL_NEVER: return 1; + case GL_LESS: return 2; + case GL_EQUAL: return 3; + case GL_LEQUAL: return 4; + case GL_GREATER: return 5; + case GL_NOTEQUAL: return 6; + case GL_GEQUAL: return 7; + case GL_ALWAYS: return 8; + } + WARN_ONCE("Unable to find the function\n"); + return 0; +} + +static uint32_t nv04_blend_func(GLcontext *ctx,GLuint f) +{ + switch ( ctx->Color.AlphaFunc ) { + case GL_ZERO: return 0x1; + case GL_ONE: return 0x2; + case GL_SRC_COLOR: return 0x3; + case GL_ONE_MINUS_SRC_COLOR: return 0x4; + case GL_SRC_ALPHA: return 0x5; + case GL_ONE_MINUS_SRC_ALPHA: return 0x6; + case GL_DST_ALPHA: return 0x7; + case GL_ONE_MINUS_DST_ALPHA: return 0x8; + case GL_DST_COLOR: return 0x9; + case GL_ONE_MINUS_DST_COLOR: return 0xA; + case GL_SRC_ALPHA_SATURATE: return 0xB; + } + WARN_ONCE("Unable to find the function\n"); + return 0; +} + +static void nv04_emit_control(GLcontext *ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + uint32_t control,cull; + GLubyte alpha_ref; + + CLAMPED_FLOAT_TO_UBYTE(alpha_ref, ctx->Color.AlphaRef); + control=alpha_ref; + control|=(nv04_compare_func(ctx,ctx->Color.AlphaFunc)<<8); + control|=(ctx->Color.AlphaEnabled<<12); + control|=(1<<13); + control|=(ctx->Depth.Test<<14); + control|=(nv04_compare_func(ctx,ctx->Depth.Func)<<16); + if ((ctx->Polygon.CullFlag)&&(ctx->Polygon.CullFaceMode!=GL_FRONT_AND_BACK)) + { + if ((ctx->Polygon.FrontFace==GL_CW)&&(ctx->Polygon.CullFaceMode==GL_FRONT)) + cull=2; + if ((ctx->Polygon.FrontFace==GL_CW)&&(ctx->Polygon.CullFaceMode==GL_BACK)) + cull=3; + if ((ctx->Polygon.FrontFace==GL_CCW)&&(ctx->Polygon.CullFaceMode==GL_FRONT)) + cull=3; + if ((ctx->Polygon.FrontFace==GL_CCW)&&(ctx->Polygon.CullFaceMode==GL_BACK)) + cull=2; + } + else + if (ctx->Polygon.CullFaceMode==GL_FRONT_AND_BACK) + cull=0; + else + cull=1; + control|=(cull<<20); + control|=(ctx->Color.DitherFlag<<22); + if ((ctx->Depth.Test)&&(ctx->Depth.Mask)) + control|=(1<<24); + + control|=(1<<30); // integer zbuffer format + + BEGIN_RING_CACHE(NvSub3D, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING_CACHE(control); +} + +static void nv04_emit_blend(GLcontext *ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + uint32_t blend; + + blend=0x4; // texture MODULATE_ALPHA + blend|=0x20; // alpha is MSB + switch(ctx->Light.ShadeModel) { + case GL_SMOOTH:blend|=(1<<6);break; + case GL_FLAT: blend|=(2<<6);break; + default:break; + } + if (ctx->Hint.PerspectiveCorrection!=GL_FASTEST) + blend|=(1<<8); + blend|=(ctx->Fog.Enabled<<16); + blend|=(ctx->Color.BlendEnabled<<20); + blend|=(nv04_blend_func(ctx,ctx->Color.BlendSrcRGB)<<24); + blend|=(nv04_blend_func(ctx,ctx->Color.BlendDstRGB)<<28); + + BEGIN_RING_CACHE(NvSub3D, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING_CACHE(blend); +} + +static void nv04_emit_fog_color(GLcontext *ctx) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + GLubyte c[4]; + c[0] = FLOAT_TO_UBYTE( ctx->Fog.Color[0] ); + c[1] = FLOAT_TO_UBYTE( ctx->Fog.Color[1] ); + c[2] = FLOAT_TO_UBYTE( ctx->Fog.Color[2] ); + c[3] = FLOAT_TO_UBYTE( ctx->Fog.Color[3] ); + BEGIN_RING_CACHE(NvSub3D, NV04_DX5_TEXTURED_TRIANGLE_FOG_COLOR, 1); + OUT_RING_CACHE(PACK_COLOR_8888_REV(c[0],c[1],c[2],c[3])); +} + +static void nv04AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref) +{ + nv04_emit_control(ctx); +} + +static void nv04BlendColor(GLcontext *ctx, const GLfloat color[4]) +{ + nv04_emit_blend(ctx); +} + +static void nv04BlendEquationSeparate(GLcontext *ctx, GLenum modeRGB, GLenum modeA) +{ + nv04_emit_blend(ctx); +} + + +static void nv04BlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB, GLenum dfactorRGB, + GLenum sfactorA, GLenum dfactorA) +{ + nv04_emit_blend(ctx); +} + +static void nv04Clear(GLcontext *ctx, GLbitfield mask) +{ + /* TODO */ +} + +static void nv04ClearColor(GLcontext *ctx, const GLfloat color[4]) +{ + /* TODO */ +} + +static void nv04ClearDepth(GLcontext *ctx, GLclampd d) +{ + /* TODO */ +} + +static void nv04ClearStencil(GLcontext *ctx, GLint s) +{ + /* TODO */ +} + +static void nv04ClipPlane(GLcontext *ctx, GLenum plane, const GLfloat *equation) +{ + /* TODO */ +} + +static void nv04ColorMask(GLcontext *ctx, GLboolean rmask, GLboolean gmask, + GLboolean bmask, GLboolean amask ) +{ + /* TODO */ +} + +static void nv04ColorMaterial(GLcontext *ctx, GLenum face, GLenum mode) +{ + /* TODO I need love */ +} + +static void nv04CullFace(GLcontext *ctx, GLenum mode) +{ + nv04_emit_control(ctx); +} + +static void nv04FrontFace(GLcontext *ctx, GLenum mode) +{ + /* TODO */ +} + +static void nv04DepthFunc(GLcontext *ctx, GLenum func) +{ + nv04_emit_control(ctx); +} + +static void nv04DepthMask(GLcontext *ctx, GLboolean flag) +{ + /* TODO */ +} + +static void nv04DepthRange(GLcontext *ctx, GLclampd nearval, GLclampd farval) +{ + /* TODO */ +} + +/** Specify the current buffer for writing */ +//void (*DrawBuffer)( GLcontext *ctx, GLenum buffer ); +/** Specify the buffers for writing for fragment programs*/ +//void (*DrawBuffers)( GLcontext *ctx, GLsizei n, const GLenum *buffers ); + +static void nv04Enable(GLcontext *ctx, GLenum cap, GLboolean state) +{ + switch(cap) + { + case GL_ALPHA_TEST: + nv04_emit_control(ctx); + break; +// case GL_AUTO_NORMAL: + case GL_BLEND: + nv04_emit_blend(ctx); + break; +// case GL_CLIP_PLANE0: +// case GL_CLIP_PLANE1: +// case GL_CLIP_PLANE2: +// case GL_CLIP_PLANE3: +// case GL_CLIP_PLANE4: +// case GL_CLIP_PLANE5: +// case GL_COLOR_LOGIC_OP: +// case GL_COLOR_MATERIAL: +// case GL_COLOR_SUM_EXT: +// case GL_COLOR_TABLE: +// case GL_CONVOLUTION_1D: +// case GL_CONVOLUTION_2D: + case GL_CULL_FACE: + nv04_emit_control(ctx); + break; + case GL_DEPTH_TEST: + nv04_emit_control(ctx); + break; + case GL_DITHER: + nv04_emit_control(ctx); + break; + case GL_FOG: + nv04_emit_blend(ctx); + break; +// case GL_HISTOGRAM: +// case GL_INDEX_LOGIC_OP: +// case GL_LIGHT0: +// case GL_LIGHT1: +// case GL_LIGHT2: +// case GL_LIGHT3: +// case GL_LIGHT4: +// case GL_LIGHT5: +// case GL_LIGHT6: +// case GL_LIGHT7: +// case GL_LIGHTING: +// case GL_LINE_SMOOTH: +// case GL_LINE_STIPPLE: +// case GL_MAP1_COLOR_4: +// case GL_MAP1_INDEX: +// case GL_MAP1_NORMAL: +// case GL_MAP1_TEXTURE_COORD_1: +// case GL_MAP1_TEXTURE_COORD_2: +// case GL_MAP1_TEXTURE_COORD_3: +// case GL_MAP1_TEXTURE_COORD_4: +// case GL_MAP1_VERTEX_3: +// case GL_MAP1_VERTEX_4: +// case GL_MAP2_COLOR_4: +// case GL_MAP2_INDEX: +// case GL_MAP2_NORMAL: +// case GL_MAP2_TEXTURE_COORD_1: +// case GL_MAP2_TEXTURE_COORD_2: +// case GL_MAP2_TEXTURE_COORD_3: +// case GL_MAP2_TEXTURE_COORD_4: +// case GL_MAP2_VERTEX_3: +// case GL_MAP2_VERTEX_4: +// case GL_MINMAX: +// case GL_NORMALIZE: +// case GL_POINT_SMOOTH: +// case GL_POLYGON_OFFSET_POINT: +// case GL_POLYGON_OFFSET_LINE: +// case GL_POLYGON_OFFSET_FILL: +// case GL_POLYGON_SMOOTH: +// case GL_POLYGON_STIPPLE: +// case GL_POST_COLOR_MATRIX_COLOR_TABLE: +// case GL_POST_CONVOLUTION_COLOR_TABLE: +// case GL_RESCALE_NORMAL: +// case GL_SCISSOR_TEST: +// case GL_SEPARABLE_2D: +// case GL_STENCIL_TEST: +// case GL_TEXTURE_GEN_Q: +// case GL_TEXTURE_GEN_R: +// case GL_TEXTURE_GEN_S: +// case GL_TEXTURE_GEN_T: +// case GL_TEXTURE_1D: +// case GL_TEXTURE_2D: +// case GL_TEXTURE_3D: + } +} + +static void nv04Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *params) +{ + nv04_emit_blend(ctx); + nv04_emit_fog_color(ctx); +} + +static void nv04Hint(GLcontext *ctx, GLenum target, GLenum mode) +{ + switch(target) + { + case GL_PERSPECTIVE_CORRECTION_HINT:nv04_emit_blend(ctx);break; + default:break; + } +} + +static void nv04LineStipple(GLcontext *ctx, GLint factor, GLushort pattern ) +{ + /* TODO not even in your dreams */ +} + +static void nv04LineWidth(GLcontext *ctx, GLfloat width) +{ + /* TODO */ +} + +static void nv04LogicOpcode(GLcontext *ctx, GLenum opcode) +{ + /* TODO */ +} + +static void nv04PointParameterfv(GLcontext *ctx, GLenum pname, const GLfloat *params) +{ + /* TODO */ +} + +static void nv04PointSize(GLcontext *ctx, GLfloat size) +{ + /* TODO */ +} + +static void nv04PolygonMode(GLcontext *ctx, GLenum face, GLenum mode) +{ + /* TODO */ +} + +/** Set the scale and units used to calculate depth values */ +static void nv04PolygonOffset(GLcontext *ctx, GLfloat factor, GLfloat units) +{ + /* TODO */ +} + +/** Set the polygon stippling pattern */ +static void nv04PolygonStipple(GLcontext *ctx, const GLubyte *mask ) +{ + /* TODO */ +} + +/* Specifies the current buffer for reading */ +void (*ReadBuffer)( GLcontext *ctx, GLenum buffer ); +/** Set rasterization mode */ +void (*RenderMode)(GLcontext *ctx, GLenum mode ); + +/** Define the scissor box */ +static void nv04Scissor(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + /* TODO */ +} + +/** Select flat or smooth shading */ +static void nv04ShadeModel(GLcontext *ctx, GLenum mode) +{ + nv04_emit_blend(ctx); +} + +/** OpenGL 2.0 two-sided StencilFunc */ +static void nv04StencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, + GLint ref, GLuint mask) +{ + /* TODO */ +} + +/** OpenGL 2.0 two-sided StencilMask */ +static void nv04StencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask) +{ + /* TODO */ +} + +/** OpenGL 2.0 two-sided StencilOp */ +static void nv04StencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, + GLenum zfail, GLenum zpass) +{ + /* TODO */ +} + +/** Control the generation of texture coordinates */ +void (*TexGen)(GLcontext *ctx, GLenum coord, GLenum pname, + const GLfloat *params); +/** Set texture environment parameters */ +void (*TexEnv)(GLcontext *ctx, GLenum target, GLenum pname, + const GLfloat *param); +/** Set texture parameters */ +void (*TexParameter)(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj, + GLenum pname, const GLfloat *params); + +/* Update anything that depends on the window position/size */ +static void nv04WindowMoved(nouveauContextPtr nmesa) +{ +} + +/* Initialise any card-specific non-GL related state */ +static GLboolean nv04InitCard(nouveauContextPtr nmesa) +{ + return GL_TRUE; +} + +/* Update buffer offset/pitch/format */ +static GLboolean nv04BindBuffers(nouveauContextPtr nmesa, int num_color, + nouveau_renderbuffer **color, + nouveau_renderbuffer *depth) +{ + return GL_TRUE; +} + +void nv04InitStateFuncs(GLcontext *ctx, struct dd_function_table *func) +{ + nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); + + func->AlphaFunc = nv04AlphaFunc; + func->BlendColor = nv04BlendColor; + func->BlendEquationSeparate = nv04BlendEquationSeparate; + func->BlendFuncSeparate = nv04BlendFuncSeparate; + func->Clear = nv04Clear; + func->ClearColor = nv04ClearColor; + func->ClearDepth = nv04ClearDepth; + func->ClearStencil = nv04ClearStencil; + func->ClipPlane = nv04ClipPlane; + func->ColorMask = nv04ColorMask; + func->ColorMaterial = nv04ColorMaterial; + func->CullFace = nv04CullFace; + func->FrontFace = nv04FrontFace; + func->DepthFunc = nv04DepthFunc; + func->DepthMask = nv04DepthMask; + func->DepthRange = nv04DepthRange; + func->Enable = nv04Enable; + func->Fogfv = nv04Fogfv; + func->Hint = nv04Hint; +/* func->Lightfv = nv04Lightfv;*/ +/* func->LightModelfv = nv04LightModelfv; */ + func->LineStipple = nv04LineStipple; /* Not for NV04 */ + func->LineWidth = nv04LineWidth; + func->LogicOpcode = nv04LogicOpcode; + func->PointParameterfv = nv04PointParameterfv; + func->PointSize = nv04PointSize; + func->PolygonMode = nv04PolygonMode; + func->PolygonOffset = nv04PolygonOffset; + func->PolygonStipple = nv04PolygonStipple; /* Not for NV04 */ +/* func->ReadBuffer = nv04ReadBuffer;*/ +/* func->RenderMode = nv04RenderMode;*/ + func->Scissor = nv04Scissor; + func->ShadeModel = nv04ShadeModel; + func->StencilFuncSeparate = nv04StencilFuncSeparate; + func->StencilMaskSeparate = nv04StencilMaskSeparate; + func->StencilOpSeparate = nv04StencilOpSeparate; +/* func->TexGen = nv04TexGen;*/ +/* func->TexParameter = nv04TexParameter;*/ +/* func->TextureMatrix = nv04TextureMatrix;*/ + + nmesa->hw_func.InitCard = nv04InitCard; + nmesa->hw_func.BindBuffers = nv04BindBuffers; + nmesa->hw_func.WindowMoved = nv04WindowMoved; +} -- cgit v1.2.3 From 0931e21eb62af217564f450e9e56bc7b6f0e15c7 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sat, 27 Jan 2007 03:03:25 +0100 Subject: nouveau: more work on nv04, this time buffer format/pitches/... --- src/mesa/drivers/dri/nouveau/nouveau_object.c | 2 ++ src/mesa/drivers/dri/nouveau/nouveau_object.h | 2 ++ src/mesa/drivers/dri/nouveau/nv04_state.c | 45 +++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/nouveau/nouveau_object.c') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 26086e16e4..302009c8b1 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -70,6 +70,8 @@ void nouveauObjectInit(nouveauContextPtr nmesa) } else { nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D, 0, 0, 0, 0); + nouveauCreateContextObject(nmesa, NvCtxSurf3D, NV04_CONTEXT_SURFACES_3D, + 0, 0, 0, 0); nouveauCreateContextObject(nmesa, NvImageBlit, NV_IMAGE_BLIT, NV_DMA_CONTEXT_FLAGS_PATCH_SRCCOPY, 0, 0, 0); } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.h b/src/mesa/drivers/dri/nouveau/nouveau_object.h index b1ff5a5d0d..e154e0acff 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.h @@ -12,6 +12,7 @@ enum DMAObjects { NvCtxSurf2D = 0x80000020, NvImageBlit = 0x80000021, NvMemFormat = 0x80000022, + NvCtxSurf3D = 0x80000023, NvDmaFB = 0xD0FB0001, NvDmaAGP = 0xD0AA0001, NvSyncNotify = 0xD0000001 @@ -21,6 +22,7 @@ enum DMASubchannel { NvSubCtxSurf2D = 0, NvSubImageBlit = 1, NvSubMemFormat = 2, + NvSubCtxSurf3D = 3, NvSub3D = 7, }; diff --git a/src/mesa/drivers/dri/nouveau/nv04_state.c b/src/mesa/drivers/dri/nouveau/nv04_state.c index 83ad8ae432..ec4cd40d87 100644 --- a/src/mesa/drivers/dri/nouveau/nv04_state.c +++ b/src/mesa/drivers/dri/nouveau/nv04_state.c @@ -267,6 +267,7 @@ static void nv04Enable(GLcontext *ctx, GLenum cap, GLboolean state) break; case GL_FOG: nv04_emit_blend(ctx); + nv04_emit_fog_color(ctx); break; // case GL_HISTOGRAM: // case GL_INDEX_LOGIC_OP: @@ -436,14 +437,54 @@ static void nv04WindowMoved(nouveauContextPtr nmesa) /* Initialise any card-specific non-GL related state */ static GLboolean nv04InitCard(nouveauContextPtr nmesa) { + nouveauObjectOnSubchannel(nmesa, NvSub3D, Nv3D); + nouveauObjectOnSubchannel(nmesa, NvSubCtxSurf3D, NvCtxSurf3D); + + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY, 3); + OUT_RING(NvDmaFB); + OUT_RING(NvDmaFB); + OUT_RING(NvDmaFB); + BEGIN_RING_SIZE(NvSub3D, NV04_DX5_TEXTURED_TRIANGLE_SURFACE, 1); + OUT_RING(NvCtxSurf3D); return GL_TRUE; } /* Update buffer offset/pitch/format */ static GLboolean nv04BindBuffers(nouveauContextPtr nmesa, int num_color, - nouveau_renderbuffer **color, - nouveau_renderbuffer *depth) + nouveau_renderbuffer **color, + nouveau_renderbuffer *depth) { + GLuint x, y, w, h; + + w = color[0]->mesa.Width; + h = color[0]->mesa.Height; + x = nmesa->drawX; + y = nmesa->drawY; + + /* FIXME pitches have to be aligned ! */ + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(color[0]->pitch|(depth->pitch<<16)); + OUT_RING(color[0]->offset); + + if (depth) { + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RING(depth->offset); + } + + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL, 2); + OUT_RING((w<<16)|x); + OUT_RING((h<<16)|y); + + /* FIXME not sure... */ + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_CLIP_SIZE, 1); + OUT_RING((h<<16)|w); + + BEGIN_RING_SIZE(NvSubCtxSurf3D, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + if (color[0]->mesa._ActualFormat == GL_RGBA8) + OUT_RING(108/*A8R8G8B8*/); + else + OUT_RING(103/*R5G6B5*/); + return GL_TRUE; } -- cgit v1.2.3