diff options
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r-- | src/gallium/drivers/nv50/Makefile | 29 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_clear.c | 92 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.c | 90 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.h | 207 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_draw.c | 89 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_miptree.c | 320 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 1784 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.h | 45 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_query.c | 134 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 356 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.h | 34 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state.c | 664 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state_validate.c | 313 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_surface.c | 230 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_tex.c | 163 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_texture.h | 129 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_vbo.c | 254 |
17 files changed, 4933 insertions, 0 deletions
diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile new file mode 100644 index 0000000000..be30400c03 --- /dev/null +++ b/src/gallium/drivers/nv50/Makefile @@ -0,0 +1,29 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv50 + +DRIVER_SOURCES = \ + nv50_clear.c \ + nv50_context.c \ + nv50_draw.c \ + nv50_miptree.c \ + nv50_query.c \ + nv50_program.c \ + nv50_screen.c \ + nv50_state.c \ + nv50_state_validate.c \ + nv50_surface.c \ + nv50_tex.c \ + nv50_vbo.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c new file mode 100644 index 0000000000..f9bc3b53ca --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -0,0 +1,92 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +void +nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; + struct pipe_scissor_state sc, s_sc = nv50->scissor; + unsigned dirty = nv50->dirty; + + nv50->dirty = 0; + + if (ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM) { + fb.nr_cbufs = 0; + fb.zsbuf = ps; + } else { + fb.nr_cbufs = 1; + fb.cbufs[0] = ps; + fb.zsbuf = NULL; + } + fb.width = ps->width; + fb.height = ps->height; + pipe->set_framebuffer_state(pipe, &fb); + + sc.minx = sc.miny = 0; + sc.maxx = fb.width; + sc.maxy = fb.height; + pipe->set_scissor_state(pipe, &sc); + + nv50_state_validate(nv50); + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + BEGIN_RING(chan, tesla, 0x0d80, 4); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 8) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 0) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff)); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x3c); + break; + case PIPE_FORMAT_Z24S8_UNORM: + BEGIN_RING(chan, tesla, 0x0d90, 1); + OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0)); + BEGIN_RING(chan, tesla, 0x0da0, 1); + OUT_RING (chan, clearValue & 0xff); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x03); + break; + default: + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + clearValue); + break; + } + + pipe->set_framebuffer_state(pipe, &s_fb); + pipe->set_scissor_state(pipe, &s_sc); + nv50->dirty |= dirty; + + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} + diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c new file mode 100644 index 0000000000..565a5da668 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static void +nv50_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + FIRE_RING(nv50->screen->nvws->channel); +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + draw_destroy(nv50->draw); + FREE(nv50); +} + + +static void +nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + nv50->screen = screen; + nv50->pctx_id = pctx_id; + + nv50->pipe.winsys = pipe_winsys; + nv50->pipe.screen = pscreen; + + nv50->pipe.destroy = nv50_destroy; + + nv50->pipe.set_edgeflags = nv50_set_edgeflags; + nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.clear = nv50_clear; + + nv50->pipe.flush = nv50_flush; + + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_query_functions(nv50); + + nv50->draw = draw_create(); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + + return &nv50->pipe; +} + + diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h new file mode 100644 index 0000000000..1e9d45cb34 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -0,0 +1,207 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv50_screen.h" +#include "nv50_program.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 +#define NV50_CB_PUPLOAD 6 + +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_ZSA (1 << 1) +#define NV50_NEW_BLEND_COLOUR (1 << 2) +#define NV50_NEW_STIPPLE (1 << 3) +#define NV50_NEW_SCISSOR (1 << 4) +#define NV50_NEW_VIEWPORT (1 << 5) +#define NV50_NEW_RASTERIZER (1 << 6) +#define NV50_NEW_FRAMEBUFFER (1 << 7) +#define NV50_NEW_VERTPROG (1 << 8) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) +#define NV50_NEW_SAMPLER (1 << 13) +#define NV50_NEW_TEXTURE (1 << 14) + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_miptree_level { + struct pipe_buffer **image; + int *image_offset; + unsigned image_dirty_cpu[512/32]; + unsigned image_dirty_gpu[512/32]; +}; + +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; + + struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS]; + int image_nr; + int total_size; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + +struct nv50_surface { + struct pipe_surface base; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *pt) +{ + return (struct nv50_surface *)pt; +} + +static INLINE struct pipe_buffer * +nv50_surface_buffer(struct pipe_surface *surface) +{ + struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture; + return mt->buffer; +} + +struct nv50_state { + unsigned dirty; + + struct nouveau_stateobj *fb; + struct nouveau_stateobj *blend; + struct nouveau_stateobj *blend_colour; + struct nouveau_stateobj *zsa; + struct nouveau_stateobj *rast; + struct nouveau_stateobj *stipple; + struct nouveau_stateobj *scissor; + unsigned scissor_enabled; + struct nouveau_stateobj *viewport; + unsigned viewport_bypass; + struct nouveau_stateobj *tsc_upload; + struct nouveau_stateobj *tic_upload; + struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vtxfmt; + struct nouveau_stateobj *vtxbuf; +}; + +struct nv50_context { + struct pipe_context pipe; + + struct nv50_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + struct nv50_state state; + + unsigned dirty; + struct nv50_blend_stateobj *blend; + struct nv50_zsa_stateobj *zsa; + struct nv50_rasterizer_stateobj *rasterizer; + struct pipe_blend_color blend_colour; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct nv50_program *vertprog; + struct nv50_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + unsigned *sampler[PIPE_MAX_SAMPLERS]; + unsigned sampler_nr; + struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; + unsigned miptree_nr; +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} + +extern void nv50_init_surface_functions(struct nv50_context *nv50); +extern void nv50_init_state_functions(struct nv50_context *nv50); +extern void nv50_init_query_functions(struct nv50_context *nv50); + +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + +extern int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); + +/* nv50_vbo.c */ +extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); +extern void nv50_vbo_validate(struct nv50_context *nv50); + +/* nv50_clear.c */ +extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv50_program.c */ +extern void nv50_vertprog_validate(struct nv50_context *nv50); +extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); + +/* nv50_state_validate.c */ +extern boolean nv50_state_validate(struct nv50_context *nv50); + +/* nv50_tex.c */ +extern void nv50_tex_validate(struct nv50_context *); + +/* nv50_miptree.c */ +extern void nv50_miptree_sync(struct pipe_screen *, struct nv50_miptree *, + unsigned level, unsigned image); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c new file mode 100644 index 0000000000..2f6f607261 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -0,0 +1,89 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50_context.h" + +struct nv50_render_stage { + struct draw_stage stage; + struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + + return &rs->stage; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c new file mode 100644 index 0000000000..91091d53f5 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -0,0 +1,320 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +static struct pipe_texture * +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_texture *pt = &mt->base; + unsigned usage, width = tmp->width[0], height = tmp->height[0]; + unsigned depth = tmp->depth[0]; + int i, l; + + mt->base = *tmp; + mt->base.refcount = 1; + mt->base.screen = pscreen; + + usage = PIPE_BUFFER_USAGE_PIXEL; + switch (pt->format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + usage |= NOUVEAU_BUFFER_USAGE_ZETA; + break; + default: + break; + } + + switch (pt->target) { + case PIPE_TEXTURE_3D: + mt->image_nr = pt->depth[0]; + break; + case PIPE_TEXTURE_CUBE: + mt->image_nr = 6; + break; + default: + mt->image_nr = 1; + break; + } + + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); + lvl->image = CALLOC(mt->image_nr, sizeof(struct pipe_buffer *)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (i = 0; i < mt->image_nr; i++) { + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + int size; + + size = align(pt->width[l], 8) * pt->block.size; + size = align(size, 64); + size *= align(pt->height[l], 8) * pt->block.size; + + lvl->image[i] = ws->buffer_create(ws, 256, 0, size); + lvl->image_offset[i] = mt->total_size; + + mt->total_size += size; + } + } + + mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv50_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv50_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->image_nr = 1; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static INLINE void +mark_dirty(uint32_t *flags, unsigned image) +{ + flags[image / 32] |= (1 << (image % 32)); +} + +static INLINE void +mark_clean(uint32_t *flags, unsigned image) +{ + flags[image / 32] &= ~(1 << (image % 32)); +} + +static INLINE int +is_dirty(uint32_t *flags, unsigned image) +{ + return !!(flags[image / 32] & (1 << (image % 32))); +} + +static void +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + + *ppt = NULL; + + if (--pt->refcount <= 0) { + struct nv50_miptree *mt = nv50_miptree(pt); + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + FREE(mt); + } +} + +void +nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt, + unsigned level, unsigned image) +{ + struct nv50_screen *nvscreen = nv50_screen(pscreen); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *dst, *src; + unsigned face = 0, zslice = 0; + + if (!is_dirty(lvl->image_dirty_cpu, image)) + return; + + if (mt->base.target == PIPE_TEXTURE_CUBE) + face = image; + else + if (mt->base.target == PIPE_TEXTURE_3D) + zslice = image; + + /* Mark as clean already - so we don't continually call this function + * trying to get a GPU_WRITE pipe_surface! + */ + mark_clean(lvl->image_dirty_cpu, image); + + /* Pretend we're doing CPU access so we get the backing pipe_surface + * and not a view into the larger miptree. + */ + src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + /* Pretend we're only reading with the GPU so surface doesn't get marked + * as dirtied by the GPU. + */ + dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_GPU_READ); + + nv50_surface_do_copy(nvscreen, dst, 0, 0, src, 0, 0, dst->width, dst->height); + + pscreen->tex_surface_release(pscreen, &dst); + pscreen->tex_surface_release(pscreen, &src); +} + +/* The reverse of the above */ +static void +nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt, + unsigned level, unsigned image) +{ + struct nv50_screen *nvscreen = nv50_screen(pscreen); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *dst, *src; + unsigned face = 0, zslice = 0; + + if (!is_dirty(lvl->image_dirty_gpu, image)) + return; + + if (mt->base.target == PIPE_TEXTURE_CUBE) + face = image; + else + if (mt->base.target == PIPE_TEXTURE_3D) + zslice = image; + + mark_clean(lvl->image_dirty_gpu, image); + + src = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_GPU_READ); + dst = pscreen->get_tex_surface(pscreen, &mt->base, face, level, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + nv50_surface_do_copy(nvscreen, dst, 0, 0, src, 0, 0, dst->width, dst->height); + + pscreen->tex_surface_release(pscreen, &dst); + pscreen->tex_surface_release(pscreen, &src); +} + +static struct pipe_surface * +nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *ps; + int img; + + if (pt->target == PIPE_TEXTURE_CUBE) + img = face; + else + if (pt->target == PIPE_TEXTURE_3D) + img = zslice; + else + img = 0; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = ps->width * ps->block.size; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + nv50_miptree_sync_cpu(pscreen, mt, level, img); + + ps->offset = 0; + pipe_texture_reference(&ps->texture, pt); + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + mark_dirty(lvl->image_dirty_cpu, img); + } else { + nv50_miptree_sync(pscreen, mt, level, img); + + ps->offset = lvl->image_offset[img]; + pipe_texture_reference(&ps->texture, pt); + + if (flags & PIPE_BUFFER_USAGE_GPU_WRITE) + mark_dirty(lvl->image_dirty_gpu, img); + } + + return ps; +} + +static void +nv50_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + struct nv50_surface *s = nv50_surface(ps); + + *psurface = NULL; + + if (--ps->refcount <= 0) { + pipe_texture_reference(&ps->texture, NULL); + FREE(s); + } +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_blanket = nv50_miptree_blanket; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface_new; + pscreen->tex_surface_release = nv50_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 0000000000..14c5d47e79 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,1784 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv50_context.h" + +#define NV50_SU_MAX_TEMP 64 +//#define NV50_PROGRAM_DUMP + +/* ARL - gallium craps itself on progs/vp/arl.txt + * + * MSB - Like MAD, but MUL+SUB + * - Fuck it off, introduce a way to negate args for ops that + * support it. + * + * Look into inlining IMMD for ops other than MOV (make it general?) + * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * ie. SUB R0, R0.yzxw, R0 + * + * Things to check with renouveau: + * FP attr/result assignment - how? + * attrib + * - 0x16bc maps vp output onto fp hpos + * - 0x16c0 maps vp output onto fp col0 + * result + * - colr always 0-3 + * - depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * - 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * - XX == FP high something + */ +struct nv50_reg { + enum { + P_TEMP, + P_ATTR, + P_RESULT, + P_CONST, + P_IMMD + } type; + int index; + + int hw; + int neg; +}; + +struct nv50_pc { + struct nv50_program *p; + + /* hw resources */ + struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + + /* tgsi resources */ + struct nv50_reg *temp; + int temp_nr; + struct nv50_reg *attr; + int attr_nr; + struct nv50_reg *result; + int result_nr; + struct nv50_reg *param; + int param_nr; + struct nv50_reg *immd; + float *immd_buf; + int immd_nr; + + struct nv50_reg *temp_temp[16]; + unsigned temp_temp_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ + int i; + + if (reg->type == P_RESULT) { + if (pc->p->cfg.high_result < (reg->hw + 1)) + pc->p->cfg.high_result = reg->hw + 1; + } + + if (reg->type != P_TEMP) + return; + + if (reg->hw >= 0) { + /*XXX: do this here too to catch FP temp-as-attr usage.. + * not clean, but works */ + if (pc->p->cfg.high_temp < (reg->hw + 1)) + pc->p->cfg.high_temp = reg->hw + 1; + return; + } + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!(pc->r_temp[i])) { + pc->r_temp[i] = reg; + reg->hw = i; + if (pc->p->cfg.high_temp < (i + 1)) + pc->p->cfg.high_temp = i + 1; + return; + } + } + + assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ + struct nv50_reg *r; + int i; + + if (dst && dst->type == P_TEMP && dst->hw == -1) + return dst; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!pc->r_temp[i]) { + r = CALLOC_STRUCT(nv50_reg); + r->type = P_TEMP; + r->index = -1; + r->hw = i; + pc->r_temp[i] = r; + return r; + } + } + + assert(0); + return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ + if (r->index == -1) { + unsigned hw = r->hw; + + FREE(pc->r_temp[hw]); + pc->r_temp[hw] = NULL; + } +} + +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ + int i; + + if ((idx + 4) >= NV50_SU_MAX_TEMP) + return 1; + + if (pc->r_temp[idx] || pc->r_temp[idx + 1] || + pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) + return alloc_temp4(pc, dst, idx + 1); + + for (i = 0; i < 4; i++) { + dst[i] = CALLOC_STRUCT(nv50_reg); + dst[i]->type = P_TEMP; + dst[i]->index = -1; + dst[i]->hw = idx + i; + pc->r_temp[idx + i] = dst[i]; + } + + return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ + int i; + + for (i = 0; i < 4; i++) + free_temp(pc, reg[i]); +} + +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ + if (pc->temp_temp_nr >= 16) + assert(0); + + pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ + int i; + + for (i = 0; i < pc->temp_temp_nr; i++) + free_temp(pc, pc->temp_temp[i]); + pc->temp_temp_nr = 0; +} + +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + (pc->immd_nr + 1) * 4 * sizeof(float)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = y; + pc->immd_buf[(pc->immd_nr * 4) + 2] = z; + pc->immd_buf[(pc->immd_nr * 4) + 3] = w; + + return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + unsigned hw; + + hw = ctor_immd(pc, f, 0, 0, 0) * 4; + r->type = P_IMMD; + r->hw = hw; + r->index = -1; + return r; +} + +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + + e->param.index = -1; + return e; +} + +static void +emit(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + struct nv50_program *p = pc->p; + + if (p->exec_tail) + p->exec_tail->next = e; + if (!p->exec_head) + p->exec_head = e; + p->exec_tail = e; + p->exec_size += (e->inst[0] & 1) ? 2 : 1; +} + +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); + +static boolean +is_long(struct nv50_program_exec *e) +{ + if (e->inst[0] & 1) + return TRUE; + return FALSE; +} + +static boolean +is_immd(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 3) + return TRUE; + return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + e->inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x3 << 4) | (1 << 6)); + e->inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + if (is_long(e)) + return; + + e->inst[0] |= 1; + set_pred(pc, 0xf, 0, e); + set_pred_wr(pc, 0, 0, e); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) +{ + if (dst->type == P_RESULT) { + set_long(pc, e); + e->inst[1] |= 0x00000008; + } + + alloc_reg(pc, dst); + e->inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) +{ + unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + + set_long(pc, e); + /*XXX: can't be predicated - bits overlap.. catch cases where both + * are required and avoid them. */ + set_pred(pc, 0, 0, e); + set_pred_wr(pc, 0, 0, e); + + e->inst[1] |= 0x00000002 | 0x00000001; + e->inst[0] |= (val & 0x3f) << 16; + e->inst[1] |= (val >> 6) << 2; +} + +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, struct nv50_reg *iv) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x80000000; + set_dst(pc, dst, e); + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); + if (iv) { + e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } + + emit(pc, e); +} + +static void +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, + struct nv50_program_exec *e) +{ + set_long(pc, e); +#if 1 + e->inst[1] |= (1 << 22); +#else + if (src->type == P_IMMD) { + e->inst[1] |= (NV50_CB_PMISC << 22); + } else { + if (pc->p->type == PIPE_SHADER_VERTEX) + e->inst[1] |= (NV50_CB_PVP << 22); + else + e->inst[1] |= (NV50_CB_PFP << 22); + } +#endif + + e->param.index = src->hw; + e->param.shift = s; + e->param.mask = m << (s % 32); +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x10000000; + + set_dst(pc, dst, e); + + if (0 && dst->type != P_RESULT && src->type == P_IMMD) { + set_immd(pc, src, e); + /*XXX: 32-bit, but steals part of "half" reg space - need to + * catch and handle this case if/when we do half-regs + */ + e->inst[0] |= 0x00008000; + } else + if (src->type == P_IMMD || src->type == P_CONST) { + set_long(pc, e); + set_data(pc, src, 0x7f, 9, e); + e->inst[1] |= 0x20000000; /* src0 const? */ + } else { + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); + } + + /* We really should support "half" instructions here at some point, + * but I don't feel confident enough about them yet. + */ + set_long(pc, e); + if (is_long(e) && !is_immd(e)) { + e->inst[1] |= 0x04000000; /* 32-bit */ + e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + } + + emit(pc, e); +} + +static boolean +check_swap_src_0_1(struct nv50_pc *pc, + struct nv50_reg **s0, struct nv50_reg **s1) +{ + struct nv50_reg *src0 = *s0, *src1 = *s1; + + if (src0->type == P_CONST) { + if (src1->type != P_CONST) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } else + if (src1->type == P_ATTR) { + if (src0->type != P_ATTR) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } + + return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x00800000)); + if (e->inst[0] & 0x01000000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 16, e); + e->inst[0] |= 0x00800000; + } + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + set_long(pc, e); + + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x01000000)); + if (e->inst[0] & 0x00800000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 32+14, e); + e->inst[0] |= 0x01000000; + } + } + + alloc_reg(pc, src); + e->inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xc0000000; + set_long(pc, e); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + if (is_long(e)) + set_src_2(pc, src1, e); + else + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (sub << 29); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_long(pc, e); + if (check_swap_src_0_1(pc, &src0, &src1)) + e->inst[1] |= 0x04000000; + else + e->inst[1] |= 0x08000000; + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); + + emit(pc, e); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + set_long(pc, e); + e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, + struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x90000000; + if (sub) { + set_long(pc, e); + e->inst[1] |= (sub << 29); + } + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29) | 0x00004000; + + emit(pc, e); +} + +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29); + + emit(pc, e); +} + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_reg *rdst; + + assert(c_op <= 7); + if (check_swap_src_0_1(pc, &src0, &src1)) + c_op = inv_cop[c_op]; + + rdst = dst; + if (dst->type != P_TEMP) + dst = alloc_temp(pc, NULL); + + /* set.u32 */ + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (3 << 29); + e->inst[1] |= (c_op << 14); + /*XXX: breaks things, .u32 by default? + * decuda will disasm as .u16 and use .lo/.hi regs, but this + * doesn't seem to match what the hw actually does. + inst[1] |= 0x04000000; << breaks things.. .u32 by default? + */ + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + emit(pc, e); + + /* cvt.f32.u32 */ + e = exec(pc); + e->inst[0] = 0xa0000001; + e->inst[1] = 0x64014780; + set_dst(pc, rdst, e); + set_src_0(pc, dst, e); + emit(pc, e); + + if (dst != rdst) + free_temp(pc, dst); +} + +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x08000000; /* integer mode */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *v, struct nv50_reg *e) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + + emit_flop(pc, 3, temp, v); + emit_mul(pc, temp, temp, e); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, dst, temp); + + free_temp(pc, temp); +} + +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0); + struct nv50_reg *zero = alloc_immd(pc, 0.0); + struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); + struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); + struct nv50_reg *tmp[4]; + + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + if (mask & (3 << 1)) { + if (mask & (1 << 1)) + tmp[0] = dst[1]; + else + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, tmp[0], src[0], zero); + } + + if (mask & (1 << 2)) { + set_pred_wr(pc, 1, 0, pc->p->exec_tail); + + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); + + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, pc->p->exec_tail); + } +} + +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xa0000000; /* delta */ + e->inst[1] |= (7 << 29); /* delta */ + e->inst[1] |= 0x04000000; /* negate arg0? probably not */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_kil(struct nv50_pc *pc, struct nv50_reg *src) +{ + struct nv50_program_exec *e; + const int r_pred = 1; + + /* Sets predicate reg ? */ + e = exec(pc); + e->inst[0] = 0xa00001fd; + e->inst[1] = 0xc4014788; + set_src_0(pc, src, e); + set_pred_wr(pc, 1, r_pred, e); + emit(pc, e); + + /* This is probably KILP */ + e = exec(pc); + e->inst[0] = 0x000001fe; + set_long(pc, e); + set_pred(pc, 1 /* LT? */, r_pred, e); + emit(pc, e); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ + struct nv50_reg *r = NULL; + struct nv50_reg *temp; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); + switch (c) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_SAMPLER: + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; + default: + assert(0); + break; + } + + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_TOGGLE: + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_SET: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + emit_neg(pc, temp, r); + r = temp; + break; + default: + assert(0); + break; + } + + return r; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ + const struct tgsi_full_instruction *inst = &tok->FullInstruction; + struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + unsigned mask, sat, unit; + int i, c; + + mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + else + dst[c] = NULL; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + + if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) + unit = fs->SrcRegister.Index; + + for (c = 0; c < 4; c++) + src[i][c] = tgsi_src(pc, c, fs); + } + + if (sat) { + for (c = 0; c < 4; c++) { + rdst[c] = dst[c]; + dst[c] = temp_temp(pc); + } + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_abs(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_COS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_DP3: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DP4: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_mad(pc, temp, src[0][3], src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DPH: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_add(pc, temp, src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DST: + { + struct nv50_reg *one = alloc_immd(pc, 1.0); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + if (mask & (1 << 1)) + emit_mul(pc, dst[1], src[0][1], src[1][1]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], src[0][2]); + if (mask & (1 << 3)) + emit_mov(pc, dst[3], src[1][3]); + FREE(one); + } + break; + case TGSI_OPCODE_EX2: + temp = alloc_temp(pc, NULL); + emit_preex2(pc, temp, src[0][0]); + emit_flop(pc, 6, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_FLR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_FRC: + temp = alloc_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, temp, src[0][c]); + emit_sub(pc, dst[c], src[0][c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_KIL: + emit_kil(pc, src[0][0]); + emit_kil(pc, src[0][1]); + emit_kil(pc, src[0][2]); + emit_kil(pc, src[0][3]); + break; + case TGSI_OPCODE_LIT: + emit_lit(pc, &dst[0], mask, &src[0][0]); + break; + case TGSI_OPCODE_LG2: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_LRP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + /*XXX: we can do better than this */ + temp = alloc_temp(pc, NULL); + emit_neg(pc, temp, src[0][c]); + emit_mad(pc, temp, temp, src[2][c], src[2][c]); + emit_mad(pc, dst[c], src[0][c], src[1][c], temp); + free_temp(pc, temp); + } + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } + break; + case TGSI_OPCODE_MAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MOV: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_POW: + temp = alloc_temp(pc, NULL); + emit_pow(pc, temp, src[0][0], src[1][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 0, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_RSQ: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 2, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_SCS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + if (mask & (1 << 0)) + emit_flop(pc, 5, dst[0], temp); + if (mask & (1 << 1)) + emit_flop(pc, 4, dst[1], temp); + break; + case TGSI_OPCODE_SGE: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 6, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SIN: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_SLT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sub(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + struct nv50_reg *t[4]; + struct nv50_program_exec *e; + + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); + + e = exec(pc); + e->inst[0] = 0xf6400000; + e->inst[0] |= (unit << 9); + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t[0], e); + emit(pc, e); + + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); + + free_temp4(pc, t); + } + break; + case TGSI_OPCODE_XPD: + temp = alloc_temp(pc, NULL); + if (mask & (1 << 0)) { + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + } + if (mask & (1 << 1)) { + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + } + if (mask & (1 << 2)) { + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_END: + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); + return FALSE; + } + + if (sat) { + for (c = 0; c < 4; c++) { + struct nv50_program_exec *e; + + if (!(mask & (1 << c))) + continue; + e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], e); + set_src_0(pc, dst[c], e); + emit(pc, e); + } + } + + kill_temp_temp(pc); + return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context p; + boolean ret = FALSE; + unsigned i, c; + + tgsi_parse_init(&p, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm = + &p.FullToken.FullImmediate; + + ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *d; + unsigned last; + + d = &p.FullToken.FullDeclaration; + last = d->DeclarationRange.Last; + + switch (d->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (pc->temp_nr < (last + 1)) + pc->temp_nr = last + 1; + break; + case TGSI_FILE_OUTPUT: + if (pc->result_nr < (last + 1)) + pc->result_nr = last + 1; + break; + case TGSI_FILE_INPUT: + if (pc->attr_nr < (last + 1)) + pc->attr_nr = last + 1; + break; + case TGSI_FILE_CONSTANT: + if (pc->param_nr < (last + 1)) + pc->param_nr = last + 1; + break; + case TGSI_FILE_SAMPLER: + break; + default: + NOUVEAU_ERR("bad decl file %d\n", + d->Declaration.File); + goto out_err; + } + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + break; + default: + break; + } + } + + if (pc->temp_nr) { + pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); + if (!pc->temp) + goto out_err; + + for (i = 0; i < pc->temp_nr; i++) { + for (c = 0; c < 4; c++) { + pc->temp[i*4+c].type = P_TEMP; + pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].index = i; + } + } + } + + if (pc->attr_nr) { + struct nv50_reg *iv = NULL; + int aid = 0; + + pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); + if (!pc->attr) + goto out_err; + + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + iv = alloc_temp(pc, NULL); + emit_interp(pc, iv, iv, NULL); + emit_flop(pc, 0, iv, iv); + aid++; + } + + for (i = 0; i < pc->attr_nr; i++) { + struct nv50_reg *a = &pc->attr[i*4]; + + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg *at = + alloc_temp(pc, NULL); + pc->attr[i*4+c].type = at->type; + pc->attr[i*4+c].hw = at->hw; + pc->attr[i*4+c].index = at->index; + } else { + pc->p->cfg.vp.attr[aid/32] |= + (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } + } + + if (pc->p->type != PIPE_SHADER_FRAGMENT) + continue; + + emit_interp(pc, &a[0], &a[0], iv); + emit_interp(pc, &a[1], &a[1], iv); + emit_interp(pc, &a[2], &a[2], iv); + emit_interp(pc, &a[3], &a[3], iv); + } + + if (iv) + free_temp(pc, iv); + } + + if (pc->result_nr) { + int rid = 0; + + pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); + if (!pc->result) + goto out_err; + + for (i = 0; i < pc->result_nr; i++) { + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + pc->result[i*4+c].type = P_TEMP; + pc->result[i*4+c].hw = -1; + } else { + pc->result[i*4+c].type = P_RESULT; + pc->result[i*4+c].hw = rid++; + } + pc->result[i*4+c].index = i; + } + } + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); + if (!pc->param) + goto out_err; + + for (i = 0; i < pc->param_nr; i++) { + for (c = 0; c < 4; c++) { + pc->param[i*4+c].type = P_CONST; + pc->param[i*4+c].hw = rid++; + pc->param[i*4+c].index = i; + } + } + } + + if (pc->immd_nr) { + int rid = pc->param_nr * 4; + + pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + if (!pc->immd) + goto out_err; + + for (i = 0; i < pc->immd_nr; i++) { + for (c = 0; c < 4; c++) { + pc->immd[i*4+c].type = P_IMMD; + pc->immd[i*4+c].hw = rid++; + pc->immd[i*4+c].index = i; + } + } + } + + ret = TRUE; +out_err: + tgsi_parse_free(&p); + return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ + struct tgsi_parse_context parse; + struct nv50_pc *pc; + boolean ret; + + pc = CALLOC_STRUCT(nv50_pc); + if (!pc) + return FALSE; + pc->p = p; + pc->p->cfg.high_temp = 4; + + ret = nv50_program_tx_prep(pc); + if (ret == FALSE) + goto out_cleanup; + + tgsi_parse_init(&parse, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + const union tgsi_full_token *tok = &parse.FullToken; + + tgsi_parse_token(&parse); + + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + ret = nv50_program_tx_insn(pc, tok); + if (ret == FALSE) + goto out_err; + break; + default: + break; + } + } + + if (p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg out; + + out.type = P_TEMP; + for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) + emit_mov(pc, &out, &pc->result[out.hw]); + } + + assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); + pc->p->exec_tail->inst[1] |= 0x00000001; + + p->param_nr = pc->param_nr * 4; + p->immd_nr = pc->immd_nr * 4; + p->immd = pc->immd_buf; + +out_err: + tgsi_parse_free(&parse); + +out_cleanup: + return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ + if (nv50_program_tx(p) == FALSE) + assert(0); + p->translated = TRUE; +} + +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(chan, tesla, 0x00000f00, 1); + OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, map, nr); + + map += nr; + start += nr; + count -= nr; + } +} + +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct pipe_winsys *ws = nv50->pipe.winsys; + unsigned nr = p->param_nr + p->immd_nr; + + if (!p->data && nr) { + struct nouveau_resource *heap = nv50->screen->vp_data_heap; + + if (nvws->res_alloc(heap, nr, p, &p->data)) { + while (heap->next && heap->size < nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, nr, p, &p->data)) + assert(0); + } + } + + if (p->param_nr) { + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); + nv50_program_upload_data(nv50, map, p->data->start, + p->param_nr); + ws->buffer_unmap(ws, nv50->constbuf[p->type]); + } + + if (p->immd_nr) { + nv50_program_upload_data(nv50, p->immd, + p->data->start + p->param_nr, + p->immd_nr); + } +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nv50_program_exec *e; + struct nouveau_stateobj *so; + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + unsigned start, count, *up, *ptr; + boolean upload = FALSE; + + if (!p->buffer) { + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + upload = TRUE; + } + + if (p->data && p->data->start != p->data_start) { + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci; + + if (e->param.index < 0) + continue; + ei = e->param.shift >> 5; + ci = e->param.index + p->data->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } + + p->data_start = p->data->start; + upload = TRUE; + } + + if (!upload) + return; + +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("-------\n"); + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + } + +#endif + + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + *(ptr++) = e->inst[0]; + if (is_long(e)) + *(ptr++) = e->inst[1]; + } + + so = so_new(4,2); + so_method(so, nv50->screen->tesla, 0x1280, 3); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + + start = 0; count = p->exec_size; + while (count) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + unsigned nr; + + so_emit(nvws, so); + + nr = MIN2(count, 2047); + nr = MIN2(nvws->channel->pushbuf->remaining, nr); + if (nvws->channel->pushbuf->remaining < (nr + 3)) { + FIRE_RING(chan); + continue; + } + + BEGIN_RING(chan, tesla, 0x0f00, 1); + OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, up + start, nr); + + start += nr; + count -= nr; + } + + FREE(up); + so_ref(NULL, &so); +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->vertprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(13, 2); + so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x16ac, 2); + so_data (so, p->cfg.high_result); //8); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x140c, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.vertprog); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->fragprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(64, 2); + so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x00040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_data (so, 0x0b0a0908); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1414, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.fragprog); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_screen *pscreen = nv50->pipe.screen; + + while (p->exec_head) { + struct nv50_program_exec *e = p->exec_head; + + p->exec_head = e->next; + FREE(e); + } + p->exec_tail = NULL; + p->exec_size = 0; + + if (p->buffer) + pipe_buffer_reference(pscreen, &p->buffer, NULL); + + nv50->screen->nvws->res_free(&p->data); + + p->translated = 0; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 0000000000..78deed6a38 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,45 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv50_program_exec { + struct nv50_program_exec *next; + + unsigned inst[2]; + struct { + int index; + unsigned mask; + unsigned shift; + } param; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned type; + struct nv50_program_exec *exec_head; + struct nv50_program_exec *exec_tail; + unsigned exec_size; + struct nouveau_resource *data; + unsigned data_start; + + struct pipe_buffer *buffer; + + float *immd; + unsigned immd_nr; + unsigned param_nr; + + struct { + unsigned high_temp; + unsigned high_result; + struct { + unsigned attr[2]; + } vp; + } cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c new file mode 100644 index 0000000000..20745ceab8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -0,0 +1,134 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +struct nv50_query { + struct pipe_buffer *buffer; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = CALLOC_STRUCT(nv50_query); + + assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); + q->type = type; + + q->buffer = ws->buffer_create(ws, 256, 0, 16); + if (!q->buffer) { + FREE(q); + return NULL; + } + + return (struct pipe_query *)q; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + + if (q) { + pipe_buffer_reference(pipe->screen, &q->buffer, NULL); + FREE(q); + } +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_query *q = nv50_query(pq); + + BEGIN_RING(chan, tesla, 0x1530, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, 0x1514, 1); + OUT_RING (chan, 1); + + q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_query *q = nv50_query(pq); + + WAIT_RING (chan, 5); + BEGIN_RING(chan, tesla, 0x1b00, 4); + OUT_RELOCh(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, q->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0100f002); + FIRE_RING (chan); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = nv50_query(pq); + + /*XXX: Want to be able to return FALSE here instead of blocking + * until the result is available.. + */ + + if (!q->ready) { + uint32_t *map = ws->buffer_map(ws, q->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + q->result = map[1]; + q->ready = TRUE; + ws->buffer_unmap(ws, q->buffer); + } + + *result = q->result; + return q->ready; +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_query = nv50_query_create; + nv50->pipe.destroy_query = nv50_query_destroy; + nv50->pipe.begin_query = nv50_query_begin; + nv50->pipe.end_query = nv50_query_end; + nv50->pipe.get_query_result = nv50_query_result; +} diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 0000000000..58d7a621a8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,356 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_screen.h" + +#include "util/u_simple_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +#include "nouveau/nouveau_stateobj.h" + +#define NV5X_GRCLASS5097_CHIPSETS 0x00000001 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000050 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000014 + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + struct nouveau_stateobj *so; + unsigned tesla_class = 0, ret; + unsigned chipset = nvws->channel->device->chipset; + int i; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D object */ + ret = nvws->grobj_alloc(nvws, NV50_2D, &screen->eng2d); + if (ret) { + NOUVEAU_ERR("Error creating 2D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* 3D object */ + if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { + NOUVEAU_ERR("Not a G8x chipset\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + switch (chipset & 0xf0) { + case 0x50: + if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x5097; + break; + case 0x80: + if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + case 0x90: + if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + default: + break; + } + + if (tesla_class == 0) { + NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Sync notifier */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static 2D init */ + so = so_new(64, 0); + so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); + so_data (so, screen->sync->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); + so_data (so, NV50_2D_OPERATION_SRCCOPY); + so_method(so, screen->eng2d, 0x0290, 1); + so_data (so, 0); + so_method(so, screen->eng2d, 0x0888, 1); + so_data (so, 1); + so_emit(nvws, so); + so_ref(NULL, &so); + + /* Static tesla init */ + so = so_new(256, 20); + + so_method(so, screen->tesla, 0x1558, 1); + so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), + NV50TCL_DMA_UNK0__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), + NV50TCL_DMA_UNK1__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, 0x121c, 1); + so_data (so, 1); + + so_method(so, screen->tesla, 0x13bc, 1); + so_data (so, 0x54); + so_method(so, screen->tesla, 0x13ac, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x16b8, 1); + so_data (so, 8); + + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + + /* Vertex array limits - max them out */ + for (i = 0; i < 16; i++) { + so_method(so, screen->tesla, 0x1080 + (i * 8), 2); + so_data (so, 0x000000ff); + so_data (so, 0xffffffff); + } + + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->tesla, 0x1234, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x1458, 1); + so_data (so, 1); + + so_emit(nvws, so); + so_ref(so, &screen->static_init); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + + screen->pipe.destroy = nv50_screen_destroy; + + screen->pipe.get_name = nv50_screen_get_name; + screen->pipe.get_vendor = nv50_screen_get_vendor; + screen->pipe.get_param = nv50_screen_get_param; + screen->pipe.get_paramf = nv50_screen_get_paramf; + + screen->pipe.is_format_supported = nv50_screen_is_format_supported; + + nv50_screen_init_miptree_functions(&screen->pipe); + nv50_surface_init_screen_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 0000000000..c888ca071c --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,34 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + struct nouveau_grobj *tesla; + struct nouveau_grobj *eng2d; + struct nouveau_notifier *sync; + + struct pipe_buffer *constbuf; + struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; + + struct nouveau_stateobj *static_init; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +void nv50_surface_init_screen_functions(struct pipe_screen *); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c new file mode 100644 index 0000000000..787ff958ec --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -0,0 +1,664 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); + unsigned cmask = 0, i; + + /*XXX ignored: + * - dither + */ + + if (cso->blend_enable == 0) { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 0); + } else { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 1); + so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); + so_data (so, nvgl_blend_eqn(cso->rgb_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, nvgl_blend_eqn(cso->alpha_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); + so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); + } + + if (cso->logicop_enable == 0 ) { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } else { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } + + if (cso->colormask & PIPE_MASK_R) + cmask |= (1 << 0); + if (cso->colormask & PIPE_MASK_G) + cmask |= (1 << 4); + if (cso->colormask & PIPE_MASK_B) + cmask |= (1 << 8); + if (cso->colormask & PIPE_MASK_A) + cmask |= (1 << 12); + so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); + for (i = 0; i < 8; i++) + so_data(so, cmask); + + bso->pipe = *cso; + so_ref(so, &bso->so); + return (void *)bso; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_blend_stateobj *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static INLINE unsigned +wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return NV50TSC_1_0_WRAPS_REPEAT; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return NV50TSC_1_0_WRAPS_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50TSC_1_0_WRAPS_REPEAT; + } +} +static void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + unsigned *tsc = CALLOC(8, sizeof(unsigned)); + + tsc[0] = (0x00024000 | + (wrap_mode(cso->wrap_s) << 0) | + (wrap_mode(cso->wrap_t) << 3) | + (wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + tsc[1] |= NV50TSC_1_1_MIPF_NONE; + break; + } + + if (cso->max_anisotropy >= 16.0) + tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12.0) + tsc[0] |= (6 << 20); + else + if (cso->max_anisotropy >= 10.0) + tsc[0] |= (5 << 20); + else + if (cso->max_anisotropy >= 8.0) + tsc[0] |= (4 << 20); + else + if (cso->max_anisotropy >= 6.0) + tsc[0] |= (3 << 20); + else + if (cso->max_anisotropy >= 4.0) + tsc[0] |= (2 << 20); + else + if (cso->max_anisotropy >= 2.0) + tsc[0] |= (1 << 20); + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + tsc[0] |= (1 << 8); + tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); + } + + return (void *)tsc; +} + +static void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + nv50->sampler_nr = nr; + for (i = 0; i < nv50->sampler_nr; i++) + nv50->sampler[i] = sampler[i]; + + nv50->dirty |= NV50_NEW_SAMPLER; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **pt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + for (i = 0; i < nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); + for (i = nr; i < nv50->miptree_nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], NULL); + + nv50->miptree_nr = nr; + nv50->dirty |= NV50_NEW_TEXTURE; +} + +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_rasterizer_stateobj *rso = + CALLOC_STRUCT(nv50_rasterizer_stateobj); + + /*XXX: ignored + * - light_twosize + * - point_smooth + * - multisample + * - point_sprite / sprite_coord_mode + */ + + so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : + NV50TCL_SHADE_MODEL_SMOOTH); + + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); + so_data (so, fui(cso->line_width)); + so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); + so_data (so, cso->line_smooth ? 1 : 0); + if (cso->line_stipple_enable) { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); + so_data (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, tesla, NV50TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + } + so_data(so, cso->poly_smooth ? 1 : 0); + + so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); + so_data (so, cso->cull_mode != PIPE_WINDING_NONE); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, NV50TCL_FRONT_FACE_CCW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } else { + so_data(so, NV50TCL_FRONT_FACE_CW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } + + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); + so_data (so, fui(cso->offset_scale)); + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); + so_data (so, fui(cso->offset_units)); + } + + rso->pipe = *cso; + so_ref(so, &rso->so); + return (void *)rso; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rasterizer = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_rasterizer_stateobj *rso = hwcso; + + so_ref(NULL, &rso->so); + FREE(rso); +} + +static void * +nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); + struct nouveau_stateobj *so = so_new(64, 0); + + so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); + so_data (so, cso->depth.writemask ? 1 : 0); + if (cso->depth.enabled) { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); + so_data (so, nvgl_comparison_op(cso->depth.func)); + } else { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 0); + } + + /*XXX: yes, I know they're backwards.. header needs fixing */ + if (cso->stencil[0].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].writemask); + so_data (so, cso->stencil[0].valuemask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].writemask); + so_data (so, cso->stencil[1].valuemask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->alpha.enabled) { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); + so_data (so, fui(cso->alpha.ref_value)); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + } else { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 0); + } + + zsa->pipe = *cso; + so_ref(so, &zsa->so); + return (void *)zsa; +} + +static void +nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_zsa_stateobj *zsa = hwcso; + + so_ref(NULL, &zsa->so); + FREE(zsa); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_VERTEX; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void +nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_FRAGMENT; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void +nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->dirty |= NV50_NEW_VERTPROG_CB; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } +} + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->framebuffer = *fb; + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *s; + nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); + nv50->vtxbuf_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); + nv50->vtxelt_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_blend_state = nv50_blend_state_create; + nv50->pipe.bind_blend_state = nv50_blend_state_bind; + nv50->pipe.delete_blend_state = nv50_blend_state_delete; + + nv50->pipe.create_sampler_state = nv50_sampler_state_create; + nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; + nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; + nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + + nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; + nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; + nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; + + nv50->pipe.create_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_create; + nv50->pipe.bind_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_bind; + nv50->pipe.delete_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_delete; + + nv50->pipe.create_vs_state = nv50_vp_state_create; + nv50->pipe.bind_vs_state = nv50_vp_state_bind; + nv50->pipe.delete_vs_state = nv50_vp_state_delete; + + nv50->pipe.create_fs_state = nv50_fp_state_create; + nv50->pipe.bind_fs_state = nv50_fp_state_bind; + nv50->pipe.delete_fs_state = nv50_fp_state_delete; + + nv50->pipe.set_blend_color = nv50_set_blend_color; + nv50->pipe.set_clip_state = nv50_set_clip_state; + nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; + nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; + nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; + nv50->pipe.set_scissor_state = nv50_set_scissor_state; + nv50->pipe.set_viewport_state = nv50_set_viewport_state; + + nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c new file mode 100644 index 0000000000..948112ffa9 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -0,0 +1,313 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nouveau/nouveau_stateobj.h" + +static void +nv50_state_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(128, 18); + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i, w, h, gw = 0; + + for (i = 0; i < fb->nr_cbufs; i++) { + if (!gw) { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + gw = 1; + } else { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + } + + so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); + so_data (so, fb->cbufs[i]->width); + so_data (so, fb->cbufs[i]->height); + + so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RDWR, 0, 0); + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RDWR, 0, 0); + switch (fb->cbufs[i]->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, 0xcf); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, 0xe8); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->cbufs[i]->format)); + so_data(so, 0xe6); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1224, 1); + so_data (so, 1); + } + + if (fb->zsbuf) { + if (!gw) { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + gw = 1; + } else { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } + + so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RDWR, 0, 0); + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RDWR, 0, 0); + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z24S8_UNORM: + so_data(so, 0x16); + break; + case PIPE_FORMAT_Z16_UNORM: + so_data(so, 0x15); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->zsbuf->format)); + so_data(so, 0x16); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1538, 1); + so_data (so, 1); + so_method(so, tesla, 0x1228, 3); + so_data (so, fb->zsbuf->width); + so_data (so, fb->zsbuf->height); + so_data (so, 0x00010001); + } + + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0x0e04, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0xdf8, 2); + so_data (so, 0); + so_data (so, h); + + so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; + + if (nv50->pctx_id != screen->cur_pctx) { + nv50->state.dirty |= 0xffffffff; + screen->cur_pctx = nv50->pctx_id; + } + + if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) + so_emit(nvws, nv50->state.fb); + if (nv50->state.dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->state.blend); + if (nv50->state.dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->state.zsa); + if (nv50->state.dirty & NV50_NEW_VERTPROG) + so_emit(nvws, nv50->state.vertprog); + if (nv50->state.dirty & NV50_NEW_FRAGPROG) + so_emit(nvws, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->state.rast); + if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) + so_emit(nvws, nv50->state.blend_colour); + if (nv50->state.dirty & NV50_NEW_STIPPLE) + so_emit(nvws, nv50->state.stipple); + if (nv50->state.dirty & NV50_NEW_SCISSOR) + so_emit(nvws, nv50->state.scissor); + if (nv50->state.dirty & NV50_NEW_VIEWPORT) + so_emit(nvws, nv50->state.viewport); + if (nv50->state.dirty & NV50_NEW_SAMPLER) + so_emit(nvws, nv50->state.tsc_upload); + if (nv50->state.dirty & NV50_NEW_TEXTURE) + so_emit(nvws, nv50->state.tic_upload); + if (nv50->state.dirty & NV50_NEW_ARRAYS) { + so_emit(nvws, nv50->state.vtxfmt); + so_emit(nvws, nv50->state.vtxbuf); + } + nv50->state.dirty = 0; + + so_emit_reloc_markers(nvws, nv50->state.fb); + so_emit_reloc_markers(nvws, nv50->state.vertprog); + so_emit_reloc_markers(nvws, nv50->state.fragprog); + so_emit_reloc_markers(nvws, nv50->state.vtxbuf); + so_emit_reloc_markers(nvws, nv50->screen->static_init); +} + +boolean +nv50_state_validate(struct nv50_context *nv50) +{ + const struct pipe_framebuffer_state *fb = &nv50->framebuffer; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + if (nv50->dirty & NV50_NEW_FRAMEBUFFER) + nv50_state_validate_fb(nv50); + + if (nv50->dirty & NV50_NEW_BLEND) + so_ref(nv50->blend->so, &nv50->state.blend); + + if (nv50->dirty & NV50_NEW_ZSA) + so_ref(nv50->zsa->so, &nv50->state.zsa); + + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) + nv50_vertprog_validate(nv50); + + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) + nv50_fragprog_validate(nv50); + + if (nv50->dirty & NV50_NEW_RASTERIZER) + so_ref(nv50->rasterizer->so, &nv50->state.rast); + + if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { + so = so_new(5, 0); + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); + so_data (so, fui(nv50->blend_colour.color[0])); + so_data (so, fui(nv50->blend_colour.color[1])); + so_data (so, fui(nv50->blend_colour.color[2])); + so_data (so, fui(nv50->blend_colour.color[3])); + so_ref(so, &nv50->state.blend_colour); + } + + if (nv50->dirty & NV50_NEW_STIPPLE) { + so = so_new(33, 0); + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv50->stipple.stipple[i]); + so_ref(so, &nv50->state.stipple); + } + + if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { + struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; + struct pipe_scissor_state *s = &nv50->scissor; + + if (nv50->state.scissor && + (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) + goto scissor_uptodate; + nv50->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, tesla, 0x0ff4, 2); + if (nv50->state.scissor_enabled) { + so_data(so, ((s->maxx - s->minx) << 16) | s->minx); + so_data(so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data(so, (8192 << 16)); + so_data(so, (8192 << 16)); + } + so_ref(so, &nv50->state.scissor); + nv50->state.dirty |= NV50_NEW_SCISSOR; + } +scissor_uptodate: + + if (nv50->dirty & NV50_NEW_VIEWPORT) { + unsigned bypass; + + if (!nv50->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv50->state.viewport && + (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && + nv50->state.viewport_bypass == bypass) + goto viewport_uptodate; + nv50->state.viewport_bypass = bypass; + + so = so_new(12, 0); + if (!bypass) { + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(-nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + so_method(so, tesla, 0x192c, 1); + so_data (so, 1); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); + } else { + so_method(so, tesla, 0x192c, 1); + so_data (so, 0); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 1); + } + + so_ref(so, &nv50->state.viewport); + } +viewport_uptodate: + + if (nv50->dirty & NV50_NEW_SAMPLER) { + int i; + + so = so_new(nv50->sampler_nr * 8 + 3, 0); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TSC); + so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) + so_datap (so, nv50->sampler[i], 8); + so_ref(so, &nv50->state.tsc_upload); + } + + if (nv50->dirty & NV50_NEW_TEXTURE) + nv50_tex_validate(nv50); + + if (nv50->dirty & NV50_NEW_ARRAYS) + nv50_vbo_validate(nv50); + + nv50->state.dirty |= nv50->dirty; + nv50->dirty = 0; + nv50_state_emit(nv50); + + return TRUE; +} + diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c new file mode 100644 index 0000000000..f2dd2eb30b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -0,0 +1,230 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define __NOUVEAU_PUSH_H__ +#include <stdint.h> +#include "nouveau/nouveau_pushbuf.h" +#include "nv50_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static INLINE int +nv50_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV50_2D_DST_FORMAT_32BPP; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return NV50_2D_DST_FORMAT_24BPP; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV50_2D_DST_FORMAT_16BPP; + case PIPE_FORMAT_A8_UNORM: + return NV50_2D_DST_FORMAT_8BPP; + default: + return -1; + } +} + +static int +nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) +{ + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + struct nouveau_bo *bo; + int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + + bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer); + if (!bo) + return 1; + + format = nv50_format(ps->format); + if (format < 0) + return 1; + + if (!bo->tiled) { + BEGIN_RING(chan, eng2d, mthd, 2); + OUT_RING (chan, format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, mthd + 0x14, 5); + OUT_RING (chan, ps->stride); + OUT_RING (chan, ps->width); + OUT_RING (chan, ps->height); + OUT_RELOCh(chan, bo, ps->offset, flags); + OUT_RELOCl(chan, bo, ps->offset, flags); + } else { + BEGIN_RING(chan, eng2d, mthd, 5); + OUT_RING (chan, format); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, mthd + 0x18, 4); + OUT_RING (chan, ps->width); + OUT_RING (chan, ps->height); + OUT_RELOCh(chan, bo, ps->offset, flags); + OUT_RELOCl(chan, bo, ps->offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + } +#endif + + return 0; +} + +int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + int ret; + + WAIT_RING (chan, 32); + + ret = nv50_surface_set(screen, dst, 1); + if (ret) + return ret; + + ret = nv50_surface_set(screen, src, 0); + if (ret) + return ret; + + BEGIN_RING(chan, eng2d, 0x088c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, eng2d, 0x08c0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, 0x08d0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); + + return 0; +} + +static void +nv50_surface_copy(struct pipe_context *pipe, boolean flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_screen *screen = nv50->screen; + + assert(src->format == dest->format); + + if (flip) { + desty += height; + while (height--) { + nv50_surface_do_copy(screen, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, + srcy, width, height); + } +} + +static void +nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + int format, ret; + + format = nv50_format(dest->format); + if (format < 0) + return; + + WAIT_RING (chan, 32); + + ret = nv50_surface_set(screen, dest, 1); + if (ret) + return; + + BEGIN_RING(chan, eng2d, 0x0580, 3); + OUT_RING (chan, 4); + OUT_RING (chan, format); + OUT_RING (chan, value); + BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + OUT_RING (chan, destx); + OUT_RING (chan, desty); + OUT_RING (chan, width); + OUT_RING (chan, height); +} + +static void * +nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + + return ws->buffer_map(ws, nv50_surface_buffer(ps), flags); +} + +static void +nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) +{ + struct pipe_winsys *ws = pscreen->winsys; + + ws->buffer_unmap(ws, nv50_surface_buffer(ps)); +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + nv50->pipe.surface_copy = nv50_surface_copy; + nv50->pipe.surface_fill = nv50_surface_fill; +} + +void +nv50_surface_init_screen_functions(struct pipe_screen *pscreen) +{ + pscreen->surface_map = nv50_surface_map; + pscreen->surface_unmap = nv50_surface_unmap; +} + diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c new file mode 100644 index 0000000000..675f9b20cb --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -0,0 +1,163 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ + switch (mt->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_1_5_5_5); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_4_4_4_4); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_5_6_5); + break; + case PIPE_FORMAT_L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_I8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8); + break; + case PIPE_FORMAT_DXT1_RGB: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT1_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT3_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT3); + break; + case PIPE_FORMAT_DXT5_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT5); + break; + default: + return 1; + } + + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RD, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RD, 0, 0); + + return 0; +} + +void +nv50_tex_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + int unit, level, image; + + so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TIC); + so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); + for (unit = 0; unit < nv50->miptree_nr; unit++) { + struct nv50_miptree *mt = nv50->miptree[unit]; + + for (level = 0; level <= mt->base.last_level; level++) { + for (image = 0; image < mt->image_nr; image++) { + nv50_miptree_sync(&nv50->screen->pipe, mt, + level, image); + } + } + + if (nv50_tex_construct(so, mt)) { + NOUVEAU_ERR("failed tex validate\n"); + so_ref(NULL, &so); + return; + } + } + + so_ref(so, &nv50->state.tic_upload); +} + diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h new file mode 100644 index 0000000000..aca622c73b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -0,0 +1,129 @@ +#ifndef __NV50_TEXTURE_H__ +#define __NV50_TEXTURE_H__ + +/* It'd be really nice to have these in nouveau_class.h generated by + * renouveau like the rest of the object header - but not sure it can + * handle non-object stuff nicely - need to look into it. + */ + +/* Texture image control block */ +#define NV50TIC_0_0_MAPA_MASK 0x38000000 +#define NV50TIC_0_0_MAPA_ZERO 0x00000000 +#define NV50TIC_0_0_MAPA_C0 0x10000000 +#define NV50TIC_0_0_MAPA_C1 0x18000000 +#define NV50TIC_0_0_MAPA_C2 0x20000000 +#define NV50TIC_0_0_MAPA_C3 0x28000000 +#define NV50TIC_0_0_MAPA_ONE 0x38000000 +#define NV50TIC_0_0_MAPR_MASK 0x07000000 +#define NV50TIC_0_0_MAPR_ZERO 0x00000000 +#define NV50TIC_0_0_MAPR_C0 0x02000000 +#define NV50TIC_0_0_MAPR_C1 0x03000000 +#define NV50TIC_0_0_MAPR_C2 0x04000000 +#define NV50TIC_0_0_MAPR_C3 0x05000000 +#define NV50TIC_0_0_MAPR_ONE 0x07000000 +#define NV50TIC_0_0_MAPG_MASK 0x00e00000 +#define NV50TIC_0_0_MAPG_ZERO 0x00000000 +#define NV50TIC_0_0_MAPG_C0 0x00400000 +#define NV50TIC_0_0_MAPG_C1 0x00600000 +#define NV50TIC_0_0_MAPG_C2 0x00800000 +#define NV50TIC_0_0_MAPG_C3 0x00a00000 +#define NV50TIC_0_0_MAPG_ONE 0x00e00000 +#define NV50TIC_0_0_MAPB_MASK 0x001c0000 +#define NV50TIC_0_0_MAPB_ZERO 0x00000000 +#define NV50TIC_0_0_MAPB_C0 0x00080000 +#define NV50TIC_0_0_MAPB_C1 0x000c0000 +#define NV50TIC_0_0_MAPB_C2 0x00100000 +#define NV50TIC_0_0_MAPB_C3 0x00140000 +#define NV50TIC_0_0_MAPB_ONE 0x001c0000 +#define NV50TIC_0_0_TYPEA_MASK 0x00038000 +#define NV50TIC_0_0_TYPEA_UNORM 0x00010000 +#define NV50TIC_0_0_TYPER_MASK 0x00007000 +#define NV50TIC_0_0_TYPER_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEG_MASK 0x00000e00 +#define NV50TIC_0_0_TYPEG_UNORM 0x00000400 +#define NV50TIC_0_0_TYPEB_MASK 0x000001c0 +#define NV50TIC_0_0_TYPEB_UNORM 0x00000080 +#define NV50TIC_0_0_FMT_MASK 0x0000003c +#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 +#define NV50TIC_0_0_FMT_5_6_5 0x00000015 +#define NV50TIC_0_0_FMT_8_8 0x00000018 +#define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_DXT1 0x00000024 +#define NV50TIC_0_0_FMT_DXT3 0x00000025 +#define NV50TIC_0_0_FMT_DXT5 0x00000026 + +#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff +#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 + +#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff +#define NV50TIC_0_4_WIDTH_SHIFT 0 + +#define NV50TIC_0_5_DEPTH_MASK 0xffff0000 +#define NV50TIC_0_5_DEPTH_SHIFT 16 +#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff +#define NV50TIC_0_5_HEIGHT_SHIFT 0 + +#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff +#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0 + +/* Texture sampler control block */ +#define NV50TSC_1_0_WRAPS_MASK 0x00000007 +#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003 +#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007 +#define NV50TSC_1_0_WRAPT_MASK 0x00000038 +#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018 +#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038 +#define NV50TSC_1_0_WRAPR_MASK 0x000001c0 +#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0 +#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 + +#define NV50TSC_1_1_MAGF_MASK 0x00000003 +#define NV50TSC_1_1_MAGF_NEAREST 0x00000001 +#define NV50TSC_1_1_MAGF_LINEAR 0x00000002 +#define NV50TSC_1_1_MINF_MASK 0x00000030 +#define NV50TSC_1_1_MINF_NEAREST 0x00000010 +#define NV50TSC_1_1_MINF_LINEAR 0x00000020 +#define NV50TSC_1_1_MIPF_MASK 0x000000c0 +#define NV50TSC_1_1_MIPF_NONE 0x00000040 +#define NV50TSC_1_1_MIPF_NEAREST 0x00000080 +#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 + +#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff + +#endif diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c new file mode 100644 index 0000000000..0c970adb03 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -0,0 +1,254 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + +boolean +nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + nv50_state_validate(nv50); + + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1334, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + if (count & 1) { + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING (chan, (map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + if (count & 1) { + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING (chan, (map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(chan, tesla, 0x400015e8, nr); + OUT_RINGp (chan, map, nr); + + count -= nr; + map += nr; + } +} + +boolean +nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_winsys *ws = pipe->winsys; + void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + nv50_state_validate(nv50); + + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + default: + assert(0); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +void +nv50_vbo_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + int i, vpi = 0; + + vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); + vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); + so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + + for (i = 0; i < nv50->vtxelt_nr; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt[i]; + struct pipe_vertex_buffer *vb = + &nv50->vtxbuf[ve->vertex_buffer_index]; + + switch (ve->src_format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so_data(vtxfmt, 0x7e080000 | i); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + so_data(vtxfmt, 0x7e100000 | i); + break; + case PIPE_FORMAT_R32G32_FLOAT: + so_data(vtxfmt, 0x7e200000 | i); + break; + case PIPE_FORMAT_R32_FLOAT: + so_data(vtxfmt, 0x7e900000 | i); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + so_data(vtxfmt, 0x24500000 | i); + break; + default: + { + NOUVEAU_ERR("invalid vbo format %s\n", + pf_name(ve->src_format)); + assert(0); + return; + } + } + + so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); + so_data (vtxbuf, 0x20000000 | vb->stride); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (vtxfmt, &nv50->state.vtxfmt); + so_ref (vtxbuf, &nv50->state.vtxbuf); +} + |