From c65a8f3ed2ab1650df38a3ed32d1e91e84b50520 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 12:39:21 +0100 Subject: nv30, nv40: partially unify nv[34]0_state.c state.c is identical except for: 1. Sampler state creation is different 2. nv40 swtnl support 3. Separate blend equations on nv40 This patch unifies nv[34]0_state.c, except the sampler state creation code. --- src/gallium/drivers/nvfx/nvfx_state.c | 605 ++++++++++++++++++++++++++++++++++ 1 file changed, 605 insertions(+) create mode 100644 src/gallium/drivers/nvfx/nvfx_state.c (limited to 'src/gallium/drivers/nvfx/nvfx_state.c') diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c new file mode 100644 index 0000000000..76780dc4f5 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -0,0 +1,605 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" + +static void * +nvfx_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + struct nvfx_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(5, 8, 0); + + if (cso->rt[0].blend_enable) { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->rt[0].alpha_src_factor) << 16) | + nvgl_blend_func(cso->rt[0].rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->rt[0].alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rt[0].rgb_dst_factor)); + if(nvfx->screen->base.device->chipset < 0x40) { + so_method(so, eng3d, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].rgb_func)); + } else { + so_method(so, eng3d, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rt[0].alpha_func) << 16 | + nvgl_blend_eqn(cso->rt[0].rgb_func)); + } + } else { + so_method(so, eng3d, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->rt[0].colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->rt[0].colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + /* TODO: add NV40 MRT color mask */ + + if (cso->logicop_enable) { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, eng3d, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, eng3d, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + so_ref(NULL, &so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nvfx_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend = hwcso; + nvfx->dirty |= NVFX_NEW_BLEND; +} + +static void +nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nvfx->tex_sampler[unit] = sampler[unit]; + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_samplers; unit++) { + nvfx->tex_sampler[unit] = NULL; + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_samplers = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], miptree[unit]); + nvfx->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nvfx->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nvfx->tex_miptree[unit], NULL); + nvfx->dirty_samplers |= (1 << unit); + } + + nvfx->nr_textures = nr; + nvfx->dirty |= NVFX_NEW_SAMPLER; +} + +static void * +nvfx_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(9, 19, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, eng3d, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, eng3d, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, eng3d, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, eng3d, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, eng3d, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, eng3d, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, eng3d, NV34TCL_POINT_SPRITE, 1); + if (cso->point_quad_rasterization) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if ((cso->sprite_coord_enable >> i) & 1) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + so_ref(NULL, &so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nvfx_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->rasterizer = hwcso; + nvfx->dirty |= NVFX_NEW_RAST; + nvfx->draw_dirty |= NVFX_NEW_RAST; +} + +static void +nvfx_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nvfx_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(6, 20, 0); + struct nouveau_grobj *eng3d = nvfx->screen->eng3d; + + so_method(so, eng3d, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, eng3d, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 3); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_FUNC_MASK, 4); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 3); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_method(so, eng3d, NV34TCL_STENCIL_BACK_FUNC_MASK, 4); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, eng3d, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + so_ref(NULL, &so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nvfx_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->zsa = hwcso; + nvfx->dirty |= NVFX_NEW_ZSA; +} + +static void +nvfx_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nvfx_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vertprog = hwcso; + nvfx->dirty |= NVFX_NEW_VERTPROG; + nvfx->draw_dirty |= NVFX_NEW_VERTPROG; +} + +static void +nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nvfx->draw, vp->draw); + if(!nvfx->is_nv4x) + nv30_vertprog_destroy(nvfx, vp); + else + nv40_vertprog_destroy(nvfx, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nvfx_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nvfx_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nvfx_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nvfx_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->fragprog = hwcso; + nvfx->dirty |= NVFX_NEW_FRAGPROG; +} + +static void +nvfx_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_fragment_program *fp = hwcso; + + nvfx_fragprog_destroy(nvfx, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nvfx_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->blend_colour = *bcol; + nvfx->dirty |= NVFX_NEW_BCOL; +} + +static void +nvfx_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->stencil_ref = *sr; + nvfx->dirty |= NVFX_NEW_SR; +} + +static void +nvfx_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->clip = *clip; + nvfx->dirty |= NVFX_NEW_UCP; + nvfx->draw_dirty |= NVFX_NEW_UCP; +} + +static void +nvfx_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + struct pipe_buffer *buf ) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->constbuf[shader] = buf; + nvfx->constbuf_nr[shader] = buf->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nvfx->dirty |= NVFX_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nvfx->dirty |= NVFX_NEW_FRAGPROG; + } +} + +static void +nvfx_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->framebuffer = *fb; + nvfx->dirty |= NVFX_NEW_FB; +} + +static void +nvfx_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->stipple, stipple->stipple, 4 * 32); + nvfx->dirty |= NVFX_NEW_STIPPLE; +} + +static void +nvfx_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->scissor = *s; + nvfx->dirty |= NVFX_NEW_SCISSOR; +} + +static void +nvfx_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->viewport = *vpt; + nvfx->dirty |= NVFX_NEW_VIEWPORT; + nvfx->draw_dirty |= NVFX_NEW_VIEWPORT; +} + +static void +nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + memcpy(nvfx->vtxbuf, vb, sizeof(*vb) * count); + nvfx->vtxbuf_nr = count; + + nvfx->dirty |= NVFX_NEW_ARRAYS; + nvfx->draw_dirty |= NVFX_NEW_ARRAYS; +} + +static void * +nvfx_vtxelts_state_create(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); + + assert(num_elements < 16); /* not doing fallbacks yet */ + cso->num_elements = num_elements; + memcpy(cso->pipe, elements, num_elements * sizeof(*elements)); + +/* nvfx_vtxelt_construct(cso);*/ + + return (void *)cso; +} + +static void +nvfx_vtxelts_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + + nvfx->vtxelt = hwcso; + nvfx->dirty |= NVFX_NEW_ARRAYS; + /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ +} + +void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso); + +void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso); + +void +nvfx_init_state_functions(struct nvfx_context *nvfx) +{ + nvfx->pipe.create_blend_state = nvfx_blend_state_create; + nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; + nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; + + if(nvfx->is_nv4x) + nvfx->pipe.create_sampler_state = nv40_sampler_state_create; + else + nvfx->pipe.create_sampler_state = nv30_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nv40_sampler_state_delete; + nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; + + nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; + nvfx->pipe.bind_rasterizer_state = nvfx_rasterizer_state_bind; + nvfx->pipe.delete_rasterizer_state = nvfx_rasterizer_state_delete; + + nvfx->pipe.create_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_create; + nvfx->pipe.bind_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_bind; + nvfx->pipe.delete_depth_stencil_alpha_state = + nvfx_depth_stencil_alpha_state_delete; + + nvfx->pipe.create_vs_state = nvfx_vp_state_create; + nvfx->pipe.bind_vs_state = nvfx_vp_state_bind; + nvfx->pipe.delete_vs_state = nvfx_vp_state_delete; + + nvfx->pipe.create_fs_state = nvfx_fp_state_create; + nvfx->pipe.bind_fs_state = nvfx_fp_state_bind; + nvfx->pipe.delete_fs_state = nvfx_fp_state_delete; + + nvfx->pipe.set_blend_color = nvfx_set_blend_color; + nvfx->pipe.set_stencil_ref = nvfx_set_stencil_ref; + nvfx->pipe.set_clip_state = nvfx_set_clip_state; + nvfx->pipe.set_constant_buffer = nvfx_set_constant_buffer; + nvfx->pipe.set_framebuffer_state = nvfx_set_framebuffer_state; + nvfx->pipe.set_polygon_stipple = nvfx_set_polygon_stipple; + nvfx->pipe.set_scissor_state = nvfx_set_scissor_state; + nvfx->pipe.set_viewport_state = nvfx_set_viewport_state; + + nvfx->pipe.create_vertex_elements_state = nvfx_vtxelts_state_create; + nvfx->pipe.delete_vertex_elements_state = nvfx_vtxelts_state_delete; + nvfx->pipe.bind_vertex_elements_state = nvfx_vtxelts_state_bind; + + nvfx->pipe.set_vertex_buffers = nvfx_set_vertex_buffers; +} -- cgit v1.2.3 From 840c36f5e6d940343a3154af7e76fec341ca46e6 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 21 Feb 2010 13:33:48 +0100 Subject: nv30, nv40: non-trivially unify nv[34]0_vertprog.c vertprog.c is similar but has substantial differences: 1. nv40 supports clip planes 2. nv40 uses a more advanced register allocator 3. Some register setup is different 4. Constants with the same name have different values This patch unifies the two files. nv30 gains clip plane support and the nv40 register allocator. A new NVFX_VP(x) macro is introduced that at runtime resolved to either the nv30 or the nv40 constant value. nv30 clip planes are not tested and might not work --- src/gallium/drivers/nv30/Makefile | 3 +- src/gallium/drivers/nv30/nv30_context.h | 3 - src/gallium/drivers/nv30/nv30_vertprog.c | 842 --------------------- src/gallium/drivers/nv40/Makefile | 3 +- src/gallium/drivers/nv40/nv40_context.h | 3 - src/gallium/drivers/nv40/nv40_vertprog.c | 1048 -------------------------- src/gallium/drivers/nvfx/Makefile | 3 +- src/gallium/drivers/nvfx/nvfx_context.h | 6 +- src/gallium/drivers/nvfx/nvfx_state.c | 5 +- src/gallium/drivers/nvfx/nvfx_state_emit.c | 2 +- src/gallium/drivers/nvfx/nvfx_vertprog.c | 1108 ++++++++++++++++++++++++++++ 11 files changed, 1117 insertions(+), 1909 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_vertprog.c delete mode 100644 src/gallium/drivers/nv40/nv40_vertprog.c create mode 100644 src/gallium/drivers/nvfx/nvfx_vertprog.c (limited to 'src/gallium/drivers/nvfx/nvfx_state.c') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 6ec93ee346..b71afbbb9a 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -7,8 +7,7 @@ C_SOURCES = \ nv30_context.c \ nv30_fragtex.c \ nv30_screen.c \ - nv30_state.c \ - nv30_vertprog.c + nv30_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h index 16cf3cbaa4..eacbb1753d 100644 --- a/src/gallium/drivers/nv30/nv30_context.h +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -5,9 +5,6 @@ /* nv30_fragtex.c */ extern void nv30_fragtex_bind(struct nvfx_context *); - -/* nv30_state.c and friends */ -extern struct nvfx_state_entry nv30_state_vertprog; extern struct nvfx_state_entry nv30_state_fragtex; /* nvfx_context.c */ diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c deleted file mode 100644 index ec6d63889b..0000000000 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ /dev/null @@ -1,842 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_dump.h" - -#include "nv30_context.h" -#include "nvfx_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 2. Arb. swz/negation - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv30_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) - -struct nv30_vpc { - struct nvfx_vertex_program *vp; - - struct nvfx_vertex_program_exec *vpi; - - unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; - - int high_temp; - int temp_temp_count; - - struct nvfx_sreg *imm; - unsigned nr_imm; -}; - -static struct nvfx_sreg -temp(struct nv30_vpc *vpc) -{ - int idx; - - idx = vpc->temp_temp_count++; - idx += vpc->high_temp + 1; - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static struct nvfx_sreg -constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nvfx_vertex_program *vp = vpc->vp; - struct nvfx_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nvfx_sr(NVFXSR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_TEMP: - sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); - break; - case NVFXSR_INPUT: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); - break; - case NVFXSR_CONST: - sr |= (NV30_VP_SRC_REG_TYPE_CONST << - NV30_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NVFXSR_NONE: - sr |= (NV30_VP_SRC_REG_TYPE_INPUT << - NV30_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV30_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); - -/* - * |VVV| - * d�.�b - * \u/ - * - */ - - switch (pos) { - case 0: - hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> - NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << - NV30_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> - NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << - NV30_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) -{ - struct nvfx_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NVFXSR_TEMP: - hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); - break; - case NVFXSR_OUTPUT: - switch (dst.index) { - case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - default: - break; - } - - hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); - hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - - /*XXX: no way this is entirely correct, someone needs to - * figure out what exactly it is. - */ - hw[3] |= 0x800; - break; - default: - assert(0); - } -} - -static void -nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, - struct nvfx_sreg s2) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NVFX_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); - - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); -// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; -// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); - - if (dst.type == NVFXSR_OUTPUT) { - if (slot) - hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); - } else { - if (slot) - hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); - else - hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - if (vpc->high_temp < fsrc->Register.Index) - vpc->high_temp = fsrc->Register.Index; - src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index); - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = nvfx_sr(NVFXSR_OUTPUT, - vpc->output_map[fdst->Register.Index]); - - break; - case TGSI_FILE_TEMPORARY: - dst = nvfx_sr(NVFXSR_TEMP, fdst->Register.Index); - if (vpc->high_temp < dst.index) - vpc->high_temp = dst.index; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nvfx_sreg src[3], dst, tmp; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int mask; - int ai = -1, ci = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - vpc->temp_temp_count = 0; - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - /*XXX: index comparison is broken now that consts come from - * two different register files. - */ - case TGSI_FILE_CONSTANT: - case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, VEC, ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, VEC, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, VEC, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, VEC, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, SCA, EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, SCA, RSQ, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_SGE: - arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SGT: - arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, VEC, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - -static boolean -nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV30_VP_INST_DEST_POS; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV30_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV30_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV30_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV30_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->output_map[fdec->Range.First] = hw; - return TRUE; -} - -static boolean -nv30_vertprog_prepare(struct nv30_vpc *vpc) -{ - struct tgsi_parse_context p; - int nr_imm = 0; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); - assert(vpc->imm); - } - - return TRUE; -} - -static void -nv30_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv30_vpc *vpc = NULL; - - tgsi_dump(vp->pipe.tokens,0); - - vpc = CALLOC(1, sizeof(struct nv30_vpc)); - if (!vpc) - return; - vpc->vp = vp; - vpc->high_temp = -1; - - if (!nv30_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - fdec = &parse.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_OUTPUT: - if (!nv30_vertprog_parse_decl_output(vpc, fdec)) - goto out_err; - break; - default: - break; - } - } - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv30_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - FREE(vpc); -} - -static boolean -nv30_vertprog_validate(struct nvfx_context *nvfx) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) { - nv30_vertprog_translate(nvfx, vp); - if (!vp->translated) - return FALSE; - } - - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(1, 1, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nvfx->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, - &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV30_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nvfx_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", - i, vp->insns[i].data[0], vp->insns[i].data[1], - vp->insns[i].data[2], vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv30_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = 0; - so_ref(NULL, &vp->so); -} - -struct nvfx_state_entry nv30_state_vertprog = { - .validate = nv30_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG /*| NVFX_NEW_UCP*/, - .hw = NVFX_STATE_VERTPROG, - } -}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 6908506906..c8f3858c36 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -7,8 +7,7 @@ C_SOURCES = \ nv40_context.c \ nv40_fragtex.c \ nv40_screen.c \ - nv40_state.c \ - nv40_vertprog.c + nv40_state.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h index 5ea2229fac..8dc87e426f 100644 --- a/src/gallium/drivers/nv40/nv40_context.h +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -5,9 +5,6 @@ /* nv40_fragtex.c */ extern void nv40_fragtex_bind(struct nvfx_context *); - -/* nv40_state.c and friends */ -extern struct nvfx_state_entry nv40_state_vertprog; extern struct nvfx_state_entry nv40_state_fragtex; /* nvfx_context.c */ diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c deleted file mode 100644 index 752cd0d1b3..0000000000 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ /dev/null @@ -1,1048 +0,0 @@ -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "util/u_inlines.h" - -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" - -#include "nv40_context.h" -#include "nvfx_state.h" - -/* TODO (at least...): - * 1. Indexed consts + ARL - * 3. NV_vp11, NV_vp2, NV_vp3 features - * - extra arith opcodes - * - branching - * - texture sampling - * - indexed attribs - * - indexed results - * 4. bugs - */ - -#define SWZ_X 0 -#define SWZ_Y 1 -#define SWZ_Z 2 -#define SWZ_W 3 -#define MASK_X 8 -#define MASK_Y 4 -#define MASK_Z 2 -#define MASK_W 1 -#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE 0 -#define DEF_CTEST 0 -#include "nv40_shader.h" - -#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nvfx_sr_neg((s)) -#define abs(s) nvfx_sr_abs((s)) - -#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) - -struct nv40_vpc { - struct nvfx_vertex_program *vp; - - struct nvfx_vertex_program_exec *vpi; - - unsigned r_temps; - unsigned r_temps_discard; - struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; - struct nvfx_sreg *r_address; - struct nvfx_sreg *r_temp; - - struct nvfx_sreg *imm; - unsigned nr_imm; - - unsigned hpos_idx; -}; - -static struct nvfx_sreg -temp(struct nv40_vpc *vpc) -{ - int idx = ffs(~vpc->r_temps) - 1; - - if (idx < 0) { - NOUVEAU_ERR("out of temps!!\n"); - assert(0); - return nvfx_sr(NVFXSR_TEMP, 0); - } - - vpc->r_temps |= (1 << idx); - vpc->r_temps_discard |= (1 << idx); - return nvfx_sr(NVFXSR_TEMP, idx); -} - -static INLINE void -release_temps(struct nv40_vpc *vpc) -{ - vpc->r_temps &= ~vpc->r_temps_discard; - vpc->r_temps_discard = 0; -} - -static struct nvfx_sreg -constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) -{ - struct nvfx_vertex_program *vp = vpc->vp; - struct nvfx_vertex_program_data *vpd; - int idx; - - if (pipe >= 0) { - for (idx = 0; idx < vp->nr_consts; idx++) { - if (vp->consts[idx].index == pipe) - return nvfx_sr(NVFXSR_CONST, idx); - } - } - - idx = vp->nr_consts++; - vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); - vpd = &vp->consts[idx]; - - vpd->index = pipe; - vpd->value[0] = x; - vpd->value[1] = y; - vpd->value[2] = z; - vpd->value[3] = w; - return nvfx_sr(NVFXSR_CONST, idx); -} - -#define arith(cc,s,o,d,m,s0,s1,s2) \ - nv40_vp_arith((cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) - -static void -emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t sr = 0; - - switch (src.type) { - case NVFXSR_TEMP: - sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); - sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); - break; - case NVFXSR_INPUT: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - vp->ir |= (1 << src.index); - hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); - break; - case NVFXSR_CONST: - sr |= (NV40_VP_SRC_REG_TYPE_CONST << - NV40_VP_SRC_REG_TYPE_SHIFT); - assert(vpc->vpi->const_index == -1 || - vpc->vpi->const_index == src.index); - vpc->vpi->const_index = src.index; - break; - case NVFXSR_NONE: - sr |= (NV40_VP_SRC_REG_TYPE_INPUT << - NV40_VP_SRC_REG_TYPE_SHIFT); - break; - default: - assert(0); - } - - if (src.negate) - sr |= NV40_VP_SRC_NEGATE; - - if (src.abs) - hw[0] |= (1 << (21 + pos)); - - sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | - (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | - (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | - (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); - - switch (pos) { - case 0: - hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> - NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; - hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << - NV40_VP_INST_SRC0L_SHIFT; - break; - case 1: - hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; - break; - case 2: - hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> - NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; - hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << - NV40_VP_INST_SRC2L_SHIFT; - break; - default: - assert(0); - } -} - -static void -emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) -{ - struct nvfx_vertex_program *vp = vpc->vp; - - switch (dst.type) { - case NVFXSR_TEMP: - hw[3] |= NV40_VP_INST_DEST_MASK; - if (slot == 0) { - hw[0] |= (dst.index << - NV40_VP_INST_VEC_DEST_TEMP_SHIFT); - } else { - hw[3] |= (dst.index << - NV40_VP_INST_SCA_DEST_TEMP_SHIFT); - } - break; - case NVFXSR_OUTPUT: - switch (dst.index) { - case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; - case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; - case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; - case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; - case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; - case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; - case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; - case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; - case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; - case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; - case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; - case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; - case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; - case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; - case NV40_VP_INST_DEST_CLIP(0): - vp->or |= (1 << 6); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(1): - vp->or |= (1 << 7); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(2): - vp->or |= (1 << 8); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; - dst.index = NV40_VP_INST_DEST_FOGC; - break; - case NV40_VP_INST_DEST_CLIP(3): - vp->or |= (1 << 9); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - case NV40_VP_INST_DEST_CLIP(4): - vp->or |= (1 << 10); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - case NV40_VP_INST_DEST_CLIP(5): - vp->or |= (1 << 11); - vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; - dst.index = NV40_VP_INST_DEST_PSZ; - break; - default: - break; - } - - hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); - if (slot == 0) { - hw[0] |= NV40_VP_INST_VEC_RESULT; - hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - } else { - hw[3] |= NV40_VP_INST_SCA_RESULT; - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - } - break; - default: - assert(0); - } -} - -static void -nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, - struct nvfx_sreg dst, int mask, - struct nvfx_sreg s0, struct nvfx_sreg s1, - struct nvfx_sreg s2) -{ - struct nvfx_vertex_program *vp = vpc->vp; - uint32_t *hw; - - vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); - vpc->vpi = &vp->insns[vp->nr_insns - 1]; - memset(vpc->vpi, 0, sizeof(*vpc->vpi)); - vpc->vpi->const_index = -1; - - hw = vpc->vpi->data; - - hw[0] |= (NVFX_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); - hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | - (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | - (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | - (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); - - if (slot == 0) { - hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); - hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); - } else { - hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); - hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); - hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); - } - - emit_dst(vpc, hw, slot, dst); - emit_src(vpc, hw, 0, s0); - emit_src(vpc, hw, 1, s1); - emit_src(vpc, hw, 2, s2); -} - -static INLINE struct nvfx_sreg -tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); - break; - case TGSI_FILE_CONSTANT: - src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); - break; - case TGSI_FILE_IMMEDIATE: - src = vpc->imm[fsrc->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - src = vpc->r_temp[fsrc->Register.Index]; - break; - default: - NOUVEAU_ERR("bad src file\n"); - break; - } - - src.abs = fsrc->Register.Absolute; - src.negate = fsrc->Register.Negate; - src.swz[0] = fsrc->Register.SwizzleX; - src.swz[1] = fsrc->Register.SwizzleY; - src.swz[2] = fsrc->Register.SwizzleZ; - src.swz[3] = fsrc->Register.SwizzleW; - return src; -} - -static INLINE struct nvfx_sreg -tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst; - - switch (fdst->Register.File) { - case TGSI_FILE_OUTPUT: - dst = vpc->r_result[fdst->Register.Index]; - break; - case TGSI_FILE_TEMPORARY: - dst = vpc->r_temp[fdst->Register.Index]; - break; - case TGSI_FILE_ADDRESS: - dst = vpc->r_address[fdst->Register.Index]; - break; - default: - NOUVEAU_ERR("bad dst file\n"); - break; - } - - return dst; -} - -static INLINE int -tgsi_mask(uint tgsi) -{ - int mask = 0; - - if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; - if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; - if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; - if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; - return mask; -} - -static boolean -src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, - struct nvfx_sreg *src) -{ - const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); - uint mask = 0; - uint c; - - for (c = 0; c < 4; c++) { - switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - mask |= tgsi_mask(1 << c); - break; - default: - assert(0); - } - } - - if (mask == MASK_ALL) - return TRUE; - - *src = temp(vpc); - - if (mask) - arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); - - return FALSE; -} - -static boolean -nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, - const struct tgsi_full_instruction *finst) -{ - struct nvfx_sreg src[3], dst, tmp; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int mask; - int ai = -1, ci = -1, ii = -1; - int i; - - if (finst->Instruction.Opcode == TGSI_OPCODE_END) - return TRUE; - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { - src[i] = tgsi_src(vpc, fsrc); - } - } - - for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *fsrc; - - fsrc = &finst->Src[i]; - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - case TGSI_FILE_CONSTANT: - case TGSI_FILE_TEMPORARY: - if (!src_native_swz(vpc, fsrc, &src[i])) - continue; - break; - default: - break; - } - - switch (fsrc->Register.File) { - case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->Register.Index) { - ai = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_CONSTANT: - if ((ci == -1 && ii == -1) || - ci == fsrc->Register.Index) { - ci = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_IMMEDIATE: - if ((ci == -1 && ii == -1) || - ii == fsrc->Register.Index) { - ii = fsrc->Register.Index; - src[i] = tgsi_src(vpc, fsrc); - } else { - src[i] = temp(vpc); - arith(vpc, VEC, MOV, src[i], MASK_ALL, - tgsi_src(vpc, fsrc), none, none); - } - break; - case TGSI_FILE_TEMPORARY: - /* handled above */ - break; - default: - NOUVEAU_ERR("bad src file\n"); - return FALSE; - } - } - - dst = tgsi_dst(vpc, &finst->Dst[0]); - mask = tgsi_mask(finst->Dst[0].Register.WriteMask); - - switch (finst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); - break; - case TGSI_OPCODE_ADD: - arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); - break; - case TGSI_OPCODE_ARL: - arith(vpc, VEC, ARL, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_DP3: - arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DP4: - arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DPH: - arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_DST: - arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_EX2: - arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_EXP: - arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_FLR: - arith(vpc, VEC, FLR, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_FRC: - arith(vpc, VEC, FRC, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_LG2: - arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LIT: - arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_LOG: - arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_MAD: - arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); - break; - case TGSI_OPCODE_MAX: - arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MIN: - arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_MOV: - arith(vpc, VEC, MOV, dst, mask, src[0], none, none); - break; - case TGSI_OPCODE_MUL: - arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_POW: - tmp = temp(vpc); - arith(vpc, SCA, LG2, tmp, MASK_X, none, none, - swz(src[0], X, X, X, X)); - arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), - swz(src[1], X, X, X, X), none); - arith(vpc, SCA, EX2, dst, mask, none, none, - swz(tmp, X, X, X, X)); - break; - case TGSI_OPCODE_RCP: - arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); - break; - case TGSI_OPCODE_RET: - break; - case TGSI_OPCODE_RSQ: - arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); - break; - case TGSI_OPCODE_SGE: - arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SLT: - arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); - break; - case TGSI_OPCODE_SUB: - arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); - break; - case TGSI_OPCODE_XPD: - tmp = temp(vpc); - arith(vpc, VEC, MUL, tmp, mask, - swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); - arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), - swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), - neg(tmp)); - break; - default: - NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); - return FALSE; - } - - release_temps(vpc); - return TRUE; -} - -static boolean -nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, - const struct tgsi_full_declaration *fdec) -{ - unsigned idx = fdec->Range.First; - int hw; - - switch (fdec->Semantic.Name) { - case TGSI_SEMANTIC_POSITION: - hw = NV40_VP_INST_DEST_POS; - vpc->hpos_idx = idx; - break; - case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_COL0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_COL1; - } else { - NOUVEAU_ERR("bad colour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_BCOLOR: - if (fdec->Semantic.Index == 0) { - hw = NV40_VP_INST_DEST_BFC0; - } else - if (fdec->Semantic.Index == 1) { - hw = NV40_VP_INST_DEST_BFC1; - } else { - NOUVEAU_ERR("bad bcolour semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_FOG: - hw = NV40_VP_INST_DEST_FOGC; - break; - case TGSI_SEMANTIC_PSIZE: - hw = NV40_VP_INST_DEST_PSZ; - break; - case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.Index <= 7) { - hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index); - } else { - NOUVEAU_ERR("bad generic semantic index\n"); - return FALSE; - } - break; - case TGSI_SEMANTIC_EDGEFLAG: - /* not really an error just a fallback */ - NOUVEAU_ERR("cannot handle edgeflag output\n"); - return FALSE; - default: - NOUVEAU_ERR("bad output semantic\n"); - return FALSE; - } - - vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); - return TRUE; -} - -static boolean -nv40_vertprog_prepare(struct nv40_vpc *vpc) -{ - struct tgsi_parse_context p; - int high_temp = -1, high_addr = -1, nr_imm = 0, i; - - tgsi_parse_init(&p, vpc->vp->pipe.tokens); - while (!tgsi_parse_end_of_tokens(&p)) { - const union tgsi_full_token *tok = &p.FullToken; - - tgsi_parse_token(&p); - switch(tok->Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - nr_imm++; - break; - case TGSI_TOKEN_TYPE_DECLARATION: - { - const struct tgsi_full_declaration *fdec; - - fdec = &p.FullToken.FullDeclaration; - switch (fdec->Declaration.File) { - case TGSI_FILE_TEMPORARY: - if (fdec->Range.Last > high_temp) { - high_temp = - fdec->Range.Last; - } - break; -#if 0 /* this would be nice.. except gallium doesn't track it */ - case TGSI_FILE_ADDRESS: - if (fdec->Range.Last > high_addr) { - high_addr = - fdec->Range.Last; - } - break; -#endif - case TGSI_FILE_OUTPUT: - if (!nv40_vertprog_parse_decl_output(vpc, fdec)) - return FALSE; - break; - default: - break; - } - } - break; -#if 1 /* yay, parse instructions looking for address regs instead */ - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - const struct tgsi_full_dst_register *fdst; - - finst = &p.FullToken.FullInstruction; - fdst = &finst->Dst[0]; - - if (fdst->Register.File == TGSI_FILE_ADDRESS) { - if (fdst->Register.Index > high_addr) - high_addr = fdst->Register.Index; - } - - } - break; -#endif - default: - break; - } - } - tgsi_parse_free(&p); - - if (nr_imm) { - vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); - assert(vpc->imm); - } - - if (++high_temp) { - vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_temp; i++) - vpc->r_temp[i] = temp(vpc); - } - - if (++high_addr) { - vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); - for (i = 0; i < high_addr; i++) - vpc->r_address[i] = temp(vpc); - } - - vpc->r_temps_discard = 0; - return TRUE; -} - -static void -nv40_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) -{ - struct tgsi_parse_context parse; - struct nv40_vpc *vpc = NULL; - struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); - int i; - - vpc = CALLOC(1, sizeof(struct nv40_vpc)); - if (!vpc) - return; - vpc->vp = vp; - - if (!nv40_vertprog_prepare(vpc)) { - FREE(vpc); - return; - } - - /* Redirect post-transform vertex position to a temp if user clip - * planes are enabled. We need to append code to the vtxprog - * to handle clip planes later. - */ - if (vp->ucp.nr) { - vpc->r_result[vpc->hpos_idx] = temp(vpc); - vpc->r_temps_discard = 0; - } - - tgsi_parse_init(&parse, vp->pipe.tokens); - - while (!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - const struct tgsi_full_immediate *imm; - - imm = &parse.FullToken.FullImmediate; - assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); - assert(imm->Immediate.NrTokens == 4 + 1); - vpc->imm[vpc->nr_imm++] = - constant(vpc, -1, - imm->u[0].Float, - imm->u[1].Float, - imm->u[2].Float, - imm->u[3].Float); - } - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - finst = &parse.FullToken.FullInstruction; - if (!nv40_vertprog_parse_instruction(vpc, finst)) - goto out_err; - } - break; - default: - break; - } - } - - /* Write out HPOS if it was redirected to a temp earlier */ - if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { - struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, - NV40_VP_INST_DEST_POS); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - - arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); - } - - /* Insert code to handle user clip planes */ - for (i = 0; i < vp->ucp.nr; i++) { - struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, - NV40_VP_INST_DEST_CLIP(i)); - struct nvfx_sreg ceqn = constant(vpc, -1, - nvfx->clip.ucp[i][0], - nvfx->clip.ucp[i][1], - nvfx->clip.ucp[i][2], - nvfx->clip.ucp[i][3]); - struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; - unsigned mask; - - switch (i) { - case 0: case 3: mask = MASK_Y; break; - case 1: case 4: mask = MASK_Z; break; - case 2: case 5: mask = MASK_W; break; - default: - NOUVEAU_ERR("invalid clip dist #%d\n", i); - goto out_err; - } - - arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); - } - - vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; - vp->translated = TRUE; -out_err: - tgsi_parse_free(&parse); - if (vpc->r_temp) - FREE(vpc->r_temp); - if (vpc->r_address) - FREE(vpc->r_address); - if (vpc->imm) - FREE(vpc->imm); - FREE(vpc); -} - -static boolean -nv40_vertprog_validate(struct nvfx_context *nvfx) -{ - struct pipe_screen *pscreen = nvfx->pipe.screen; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; - boolean upload_code = FALSE, upload_data = FALSE; - int i; - - if (nvfx->render_mode == HW) { - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - - if ((nvfx->dirty & NVFX_NEW_UCP) || - memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { - nv40_vertprog_destroy(nvfx, vp); - memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); - } - } else { - vp = nvfx->swtnl.vertprog; - constbuf = NULL; - } - - /* Translate TGSI shader into hw bytecode */ - if (vp->translated) - goto check_gpu_resources; - - nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; - nv40_vertprog_translate(nvfx, vp); - if (!vp->translated) { - nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; - return FALSE; - } - -check_gpu_resources: - /* Allocate hw vtxprog exec slots */ - if (!vp->exec) { - struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; - uint vplen = vp->nr_insns; - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { - while (heap->next && heap->size < vplen) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->exec); - } - - if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) - assert(0); - } - - so = so_new(3, 4, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); - so_data (so, vp->clip_ctrl); - so_ref(so, &vp->so); - so_ref(NULL, &so); - - upload_code = TRUE; - } - - /* Allocate hw vtxprog const slots */ - if (vp->nr_consts && !vp->data) { - struct nouveau_resource *heap = nvfx->screen->vp_data_heap; - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { - while (heap->next && heap->size < vp->nr_consts) { - struct nvfx_vertex_program *evict; - - evict = heap->next->priv; - nouveau_resource_free(&evict->data); - } - - if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) - assert(0); - } - - /*XXX: handle this some day */ - assert(vp->data->start >= vp->data_start_min); - - upload_data = TRUE; - if (vp->data_start != vp->data->start) - upload_code = TRUE; - } - - /* If exec or data segments moved we need to patch the program to - * fixup offsets and register IDs. - */ - if (vp->exec_start != vp->exec->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->has_branch_offset) { - assert(0); - } - } - - vp->exec_start = vp->exec->start; - } - - if (vp->nr_consts && vp->data_start != vp->data->start) { - for (i = 0; i < vp->nr_insns; i++) { - struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; - - if (vpi->const_index >= 0) { - vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; - vpi->data[1] |= - (vpi->const_index + vp->data->start) << - NV40_VP_INST_CONST_SRC_SHIFT; - - } - } - - vp->data_start = vp->data->start; - } - - /* Update + Upload constant values */ - if (vp->nr_consts) { - float *map = NULL; - - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } - - for (i = 0; i < vp->nr_consts; i++) { - struct nvfx_vertex_program_data *vpd = &vp->consts[i]; - - if (vpd->index >= 0) { - if (!upload_data && - !memcmp(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float))) - continue; - memcpy(vpd->value, &map[vpd->index * 4], - 4 * sizeof(float)); - } - - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); - OUT_RING (chan, i + vp->data->start); - OUT_RINGp (chan, (uint32_t *)vpd->value, 4); - } - - if (constbuf) - pscreen->buffer_unmap(pscreen, constbuf); - } - - /* Upload vtxprog */ - if (upload_code) { -#if 0 - for (i = 0; i < vp->nr_insns; i++) { - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); - NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); - } -#endif - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); - OUT_RING (chan, vp->exec->start); - for (i = 0; i < vp->nr_insns; i++) { - BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); - OUT_RINGp (chan, vp->insns[i].data, 4); - } - } - - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; - } - - return FALSE; -} - -void -nv40_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) -{ - vp->translated = FALSE; - - if (vp->nr_insns) { - FREE(vp->insns); - vp->insns = NULL; - vp->nr_insns = 0; - } - - if (vp->nr_consts) { - FREE(vp->consts); - vp->consts = NULL; - vp->nr_consts = 0; - } - - nouveau_resource_free(&vp->exec); - vp->exec_start = 0; - nouveau_resource_free(&vp->data); - vp->data_start = 0; - vp->data_start_min = 0; - - vp->ir = vp->or = vp->clip_ctrl = 0; - so_ref(NULL, &vp->so); -} - -struct nvfx_state_entry nv40_state_vertprog = { - .validate = nv40_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, - .hw = NVFX_STATE_VERTPROG, - } -}; - diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile index 6c4ee17493..a665c7e263 100644 --- a/src/gallium/drivers/nvfx/Makefile +++ b/src/gallium/drivers/nvfx/Makefile @@ -20,6 +20,7 @@ C_SOURCES = \ nvfx_state_zsa.c \ nvfx_surface.c \ nvfx_transfer.c \ - nvfx_vbo.c + nvfx_vbo.c \ + nvfx_vertprog.c include ../../Makefile.template diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index ab1a1fbbe9..9a4b4631b5 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -190,6 +190,7 @@ extern struct nvfx_state_entry nvfx_state_scissor; extern struct nvfx_state_entry nvfx_state_sr; extern struct nvfx_state_entry nvfx_state_stipple; extern struct nvfx_state_entry nvfx_state_vbo; +extern struct nvfx_state_entry nvfx_state_vertprog; extern struct nvfx_state_entry nvfx_state_viewport; extern struct nvfx_state_entry nvfx_state_vtxfmt; extern struct nvfx_state_entry nvfx_state_zsa; @@ -234,8 +235,7 @@ extern void nvfx_draw_elements(struct pipe_context *pipe, unsigned count); /* nvfx_vertprog.c */ -extern void nv30_vertprog_destroy(struct nvfx_context *, - struct nvfx_vertex_program *); -extern void nv40_vertprog_destroy(struct nvfx_context *, +extern void nvfx_vertprog_destroy(struct nvfx_context *, struct nvfx_vertex_program *); + #endif diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 76780dc4f5..7e138afc71 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -370,10 +370,7 @@ nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nvfx_vertex_program *vp = hwcso; draw_delete_vertex_shader(nvfx->draw, vp->draw); - if(!nvfx->is_nv4x) - nv30_vertprog_destroy(nvfx, vp); - else - nv40_vertprog_destroy(nvfx, vp); + nvfx_vertprog_destroy(nvfx, vp); FREE((void*)vp->pipe.tokens); FREE(vp); } diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 88b86f9fb5..fcbf831050 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -11,7 +11,7 @@ static struct nvfx_state_entry *name##_render_states[] = { \ &nvfx_state_stipple, \ &nvfx_state_fragprog, \ &nvxx##_state_fragtex, \ - &nvxx##_state_vertprog, \ + &nvfx_state_vertprog, \ &nvfx_state_blend, \ &nvfx_state_blend_colour, \ &nvfx_state_zsa, \ diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c new file mode 100644 index 0000000000..730361a982 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -0,0 +1,1108 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_util.h" + +#include "nvfx_context.h" +#include "nvfx_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30/nv30_shader.h" +#include "nv40/nv40_shader.h" + +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) + +#define NVFX_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nvfx_vpc { + struct nvfx_vertex_program *vp; + + struct nvfx_vertex_program_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nvfx_sreg *r_address; + struct nvfx_sreg *r_temp; + + struct nvfx_sreg *imm; + unsigned nr_imm; + + unsigned hpos_idx; +}; + +static struct nvfx_sreg +temp(struct nvfx_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nvfx_sr(NVFXSR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nvfx_sr(NVFXSR_TEMP, idx); +} + +static INLINE void +release_temps(struct nvfx_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nvfx_sreg +constant(struct nvfx_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nvfx_vertex_program *vp = vpc->vp; + struct nvfx_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nvfx_sr(NVFXSR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nvfx_sr(NVFXSR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nvfx_vp_arith(nvfx, (cc), NVFX_VP_INST_SLOT_##s, NVFX_VP_INST_##s##_OP_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, struct nvfx_sreg src) +{ + struct nvfx_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NVFXSR_TEMP: + sr |= (NVFX_VP(SRC_REG_TYPE_TEMP) << NVFX_VP(SRC_REG_TYPE_SHIFT)); + sr |= (src.index << NVFX_VP(SRC_TEMP_SRC_SHIFT)); + break; + case NVFXSR_INPUT: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NVFX_VP(INST_INPUT_SRC_SHIFT)); + break; + case NVFXSR_CONST: + sr |= (NVFX_VP(SRC_REG_TYPE_CONST) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NVFXSR_NONE: + sr |= (NVFX_VP(SRC_REG_TYPE_INPUT) << + NVFX_VP(SRC_REG_TYPE_SHIFT)); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NVFX_VP(SRC_NEGATE); + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NVFX_VP(SRC_SWZ_X_SHIFT)) | + (src.swz[1] << NVFX_VP(SRC_SWZ_Y_SHIFT)) | + (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | + (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); + + switch (pos) { + case 0: + hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> + NVFX_VP(SRC0_HIGH_SHIFT)) << NVFX_VP(INST_SRC0H_SHIFT); + hw[2] |= (sr & NVFX_VP(SRC0_LOW_MASK)) << + NVFX_VP(INST_SRC0L_SHIFT); + break; + case 1: + hw[2] |= sr << NVFX_VP(INST_SRC1_SHIFT); + break; + case 2: + hw[2] |= ((sr & NVFX_VP(SRC2_HIGH_MASK)) >> + NVFX_VP(SRC2_HIGH_SHIFT)) << NVFX_VP(INST_SRC2H_SHIFT); + hw[3] |= (sr & NVFX_VP(SRC2_LOW_MASK)) << + NVFX_VP(INST_SRC2L_SHIFT); + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int slot, struct nvfx_sreg dst) +{ + struct nvfx_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NVFXSR_TEMP: + if(!nvfx->is_nv4x) + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + else { + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + } + break; + case NVFXSR_OUTPUT: + /* TODO: this may be wrong because on nv30 COL0 and BFC0 are swapped */ + switch (dst.index) { + case NVFX_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE0; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE1; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE2; + dst.index = NVFX_VP(INST_DEST_FOGC); + break; + case NVFX_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE3; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NVFX_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE4; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + case NVFX_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV34TCL_VP_CLIP_PLANES_ENABLE_PLANE5; + dst.index = NVFX_VP(INST_DEST_PSZ); + break; + default: + if(!nvfx->is_nv4x) { + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } else { + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC: vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + } + } + break; + } + + if(!nvfx->is_nv4x) { + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + } else { + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + } + break; + default: + assert(0); + } +} + +static void +nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, + struct nvfx_sreg s2) +{ + struct nvfx_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NVFX_VP_INST_COND_TR << NVFX_VP(INST_COND_SHIFT)); + hw[0] |= ((0 << NVFX_VP(INST_COND_SWZ_X_SHIFT)) | + (1 << NVFX_VP(INST_COND_SWZ_Y_SHIFT)) | + (2 << NVFX_VP(INST_COND_SWZ_Z_SHIFT)) | + (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); + + if(!nvfx->is_nv4x) { + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); +// hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); + + if (dst.type == NVFXSR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + } else { + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + } + + emit_dst(nvfx, vpc, hw, slot, dst); + emit_src(nvfx, vpc, hw, 0, s0); + emit_src(nvfx, vpc, hw, 1, s1); + emit_src(nvfx, vpc, hw, 2, s2); +} + +static INLINE struct nvfx_sreg +tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nvfx_sreg src; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + src = nvfx_sr(NVFXSR_INPUT, fsrc->Register.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = vpc->r_temp[fsrc->Register.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; + return src; +} + +static INLINE struct nvfx_sreg +tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nvfx_sreg dst; + + switch (fdst->Register.File) { + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->Register.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->Register.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->Register.Index]; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nvfx_sreg *src) +{ + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + default: + assert(0); + } + } + + if (mask == MASK_ALL) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, VEC, MOV, *src, mask, tgsi, none, none); + + return FALSE; +} + +static boolean +nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nvfx_sreg src[3], dst, tmp; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->Src[i]; + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(nvfx, vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->Register.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->Register.Index) { + ii = fsrc->Register.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, VEC, MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, VEC, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, VEC, ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, VEC, ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, VEC, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, VEC, DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, VEC, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, SCA, EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, SCA, EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, VEC, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, VEC, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, SCA, LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, SCA, LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, VEC, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, VEC, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, VEC, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, VEC, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, SCA, LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, VEC, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, SCA, EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, SCA, RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); + break; + case TGSI_OPCODE_SGE: + arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, VEC, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, VEC, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nvfx_vertprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->Range.First; + int hw; + + switch (fdec->Semantic.Name) { + case TGSI_SEMANTIC_POSITION: + hw = NVFX_VP(INST_DEST_POS); + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_COL0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_COL1); + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.Index == 0) { + hw = NVFX_VP(INST_DEST_BFC0); + } else + if (fdec->Semantic.Index == 1) { + hw = NVFX_VP(INST_DEST_BFC1); + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NVFX_VP(INST_DEST_FOGC); + break; + case TGSI_SEMANTIC_PSIZE: + hw = NVFX_VP(INST_DEST_PSZ); + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.Index <= 7) { + hw = NVFX_VP(INST_DEST_TC(fdec->Semantic.Index)); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_EDGEFLAG: + /* not really an error just a fallback */ + NOUVEAU_ERR("cannot handle edgeflag output\n"); + return FALSE; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw); + return TRUE; +} + +static boolean +nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->Range.Last > high_temp) { + high_temp = + fdec->Range.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->Range.Last > high_addr) { + high_addr = + fdec->Range.Last; + } + break; +#endif + case TGSI_FILE_OUTPUT: + if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->Dst[0]; + + if (fdst->Register.File == TGSI_FILE_ADDRESS) { + if (fdst->Register.Index > high_addr) + high_addr = fdst->Register.Index; + } + + } + break; +#endif + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nvfx_sreg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +static void +nvfx_vertprog_translate(struct nvfx_context *nvfx, + struct nvfx_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nvfx_vpc *vpc = NULL; + struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + int i; + + vpc = CALLOC(1, sizeof(struct nvfx_vpc)); + if (!vpc) + return; + vpc->vp = vp; + + if (!nvfx_vertprog_prepare(nvfx, vpc)) { + FREE(vpc); + return; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code to the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u[0].Float, + imm->u[1].Float, + imm->u[2].Float, + imm->u[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nvfx_vertprog_parse_instruction(nvfx, vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NVFXSR_OUTPUT) { + struct nvfx_sreg hpos = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP(INST_DEST_POS)); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, VEC, MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nvfx_sreg cdst = nvfx_sr(NVFXSR_OUTPUT, + NVFX_VP_INST_DEST_CLIP(i)); + struct nvfx_sreg ceqn = constant(vpc, -1, + nvfx->clip.ucp[i][0], + nvfx->clip.ucp[i][1], + nvfx->clip.ucp[i][2], + nvfx->clip.ucp[i][3]); + struct nvfx_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, VEC, DP4, cdst, mask, htmp, ceqn, none); + } + + vp->insns[vp->nr_insns - 1].data[3] |= NVFX_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); + FREE(vpc); +} + +static boolean +nvfx_vertprog_validate(struct nvfx_context *nvfx) +{ + struct pipe_screen *pscreen = nvfx->pipe.screen; + struct nvfx_screen *screen = nvfx->screen; + struct nouveau_channel *chan = screen->base.channel; + struct nouveau_grobj *eng3d = screen->eng3d; + struct nvfx_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + if (nvfx->render_mode == HW) { + vp = nvfx->vertprog; + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + + if ((nvfx->dirty & NVFX_NEW_UCP) || + memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { + nvfx_vertprog_destroy(nvfx, vp); + memcpy(&vp->ucp, &nvfx->clip, sizeof(vp->ucp)); + } + } else { + vp = nvfx->swtnl.vertprog; + constbuf = NULL; + } + + /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + nvfx_vertprog_translate(nvfx, vp); + if (!vp->translated) { + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + return FALSE; + } + +check_gpu_resources: + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nvfx_vertex_program *evict; + + evict = heap->next->priv; + nouveau_resource_free(&evict->exec); + } + + if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(3, 4, 0); + so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + if(nvfx->is_nv4x) { + so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + } + so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); + so_data (so, vp->clip_ctrl); + so_ref(so, &vp->so); + so_ref(NULL, &so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nvfx->screen->vp_data_heap; + + if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nvfx_vertex_program *evict; + + evict = heap->next->priv; + nouveau_resource_free(&evict->data); + } + + if (nouveau_resource_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nvfx_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NVFX_VP(INST_CONST_SRC_SHIFT); + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = pipe_buffer_map(pscreen, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nvfx_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i + vp->data->start); + OUT_RINGp (chan, (uint32_t *)vpd->value, 4); + } + + if (constbuf) + pipe_buffer_unmap(pscreen, constbuf); + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (chan, vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (chan, vp->insns[i].data, 4); + } + } + + if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { + so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) +{ + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nouveau_resource_free(&vp->exec); + vp->exec_start = 0; + nouveau_resource_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); +} + +struct nvfx_state_entry nvfx_state_vertprog = { + .validate = nvfx_vertprog_validate, + .dirty = { + .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, + .hw = NVFX_STATE_VERTPROG, + } +}; -- cgit v1.2.3 From 7d210fa05f286eb19398ac2f8c8f631f6f83c859 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Wed, 24 Feb 2010 15:08:48 +0100 Subject: nv30, nv40: partially non-trivially unify sampler state in nv[34]0_state.c Many things, like texture wrap modes and min/mag filters are common. Some others, like annisotropy and lod settings, are not. --- src/gallium/drivers/nv30/Makefile | 3 +- src/gallium/drivers/nv30/nv30_fragtex.c | 30 +++++ src/gallium/drivers/nv30/nv30_state.c | 179 ------------------------------ src/gallium/drivers/nv40/Makefile | 3 +- src/gallium/drivers/nv40/nv40_fragtex.c | 46 ++++++++ src/gallium/drivers/nv40/nv40_state.c | 190 -------------------------------- src/gallium/drivers/nvfx/nvfx_context.h | 10 +- src/gallium/drivers/nvfx/nvfx_state.c | 49 +++++--- src/gallium/drivers/nvfx/nvfx_state.h | 8 -- src/gallium/drivers/nvfx/nvfx_tex.h | 133 ++++++++++++++++++++++ 10 files changed, 252 insertions(+), 399 deletions(-) delete mode 100644 src/gallium/drivers/nv30/nv30_state.c delete mode 100644 src/gallium/drivers/nv40/nv40_state.c create mode 100644 src/gallium/drivers/nvfx/nvfx_tex.h (limited to 'src/gallium/drivers/nvfx/nvfx_state.c') diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile index 0a20de9e7a..70741101d6 100644 --- a/src/gallium/drivers/nv30/Makefile +++ b/src/gallium/drivers/nv30/Makefile @@ -4,8 +4,7 @@ include $(TOP)/configs/current LIBNAME = nv30 C_SOURCES = \ - nv30_fragtex.c \ - nv30_state.c + nv30_fragtex.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c index 52f7c52a9b..081362d150 100644 --- a/src/gallium/drivers/nv30/nv30_fragtex.c +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -2,6 +2,36 @@ #include "nvfx_context.h" #include "nouveau/nouveau_util.h" +#include "nvfx_tex.h" + +void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 8) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ { \ diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c deleted file mode 100644 index bf96f7cf35..0000000000 --- a/src/gallium/drivers/nv30/nv30_state.c +++ /dev/null @@ -1,179 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv30_context.h" -#include "nvfx_state.h" - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; -/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; - break;*/ - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nvfx_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nvfx_sampler_state)); - - ps->fmt = 0; - /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format - are the tx format to use. We should store normalized coord flag - in sampler state structure, and set appropriate format in - nvxx_fragtex_build() - */ - /*NV34TCL_TX_FORMAT_RECT*/ - /*if (!cso->normalized_coords) { - ps->fmt |= (1<<14) ; - }*/ - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - - if (cso->max_anisotropy >= 8) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; - } else - if (cso->max_anisotropy >= 2) { - ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; - } - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile index 6e764512cf..8b69f9432d 100644 --- a/src/gallium/drivers/nv40/Makefile +++ b/src/gallium/drivers/nv40/Makefile @@ -4,8 +4,7 @@ include $(TOP)/configs/current LIBNAME = nv40 C_SOURCES = \ - nv40_fragtex.c \ - nv40_state.c + nv40_fragtex.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/nvfx diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c index 7615095025..5889b5e40d 100644 --- a/src/gallium/drivers/nv40/nv40_fragtex.c +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -1,5 +1,51 @@ #include "util/u_format.h" #include "nvfx_context.h" +#include "nvfx_tex.h" + +void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso) +{ + if (cso->max_anisotropy >= 2) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } +} #define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ { \ diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c deleted file mode 100644 index 4d4098bc42..0000000000 --- a/src/gallium/drivers/nv40/nv40_state.c +++ /dev/null @@ -1,190 +0,0 @@ -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" - -#include "tgsi/tgsi_parse.h" - -#include "nv40_context.h" -#include "nvfx_state.h" - -static INLINE unsigned -wrap_mode(unsigned wrap) { - unsigned ret; - - switch (wrap) { - case PIPE_TEX_WRAP_REPEAT: - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - case PIPE_TEX_WRAP_MIRROR_REPEAT: - ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; - break; - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_CLAMP: - ret = NV34TCL_TX_WRAP_S_CLAMP; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; - break; - case PIPE_TEX_WRAP_MIRROR_CLAMP: - ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; - break; - default: - NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - ret = NV34TCL_TX_WRAP_S_REPEAT; - break; - } - - return ret >> NV34TCL_TX_WRAP_S_SHIFT; -} - -void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso) -{ - struct nvfx_sampler_state *ps; - uint32_t filter = 0; - - ps = MALLOC(sizeof(struct nvfx_sampler_state)); - - ps->fmt = 0; - if (!cso->normalized_coords) - ps->fmt |= NV40TCL_TEX_FORMAT_RECT; - - ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | - (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | - (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); - - ps->en = 0; - if (cso->max_anisotropy >= 2) { - /* no idea, binary driver sets it, works without it.. meh.. */ - ps->wrap |= (1 << 5); - - if (cso->max_anisotropy >= 16) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; - } else - if (cso->max_anisotropy >= 12) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; - } else - if (cso->max_anisotropy >= 10) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; - } else - if (cso->max_anisotropy >= 8) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; - } else - if (cso->max_anisotropy >= 6) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; - } else - if (cso->max_anisotropy >= 4) { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; - } else { - ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; - } - } - - switch (cso->mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; - break; - case PIPE_TEX_FILTER_NEAREST: - default: - filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; - break; - } - - switch (cso->min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; - break; - } - break; - case PIPE_TEX_FILTER_NEAREST: - default: - switch (cso->min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; - break; - case PIPE_TEX_MIPFILTER_LINEAR: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; - break; - case PIPE_TEX_MIPFILTER_NONE: - default: - filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; - break; - } - break; - } - - ps->filt = filter; - - { - float limit; - - limit = CLAMP(cso->lod_bias, -16.0, 15.0); - ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; - - limit = CLAMP(cso->max_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 7; - - limit = CLAMP(cso->min_lod, 0.0, 15.0); - ps->en |= (int)(limit * 256.0) << 19; - } - - - if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - switch (cso->compare_func) { - case PIPE_FUNC_NEVER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; - break; - case PIPE_FUNC_GREATER: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; - break; - case PIPE_FUNC_EQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; - break; - case PIPE_FUNC_GEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; - break; - case PIPE_FUNC_LESS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; - break; - case PIPE_FUNC_NOTEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; - break; - case PIPE_FUNC_LEQUAL: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; - break; - case PIPE_FUNC_ALWAYS: - ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; - break; - default: - break; - } - } - - ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | - (float_to_ubyte(cso->border_color[0]) << 16) | - (float_to_ubyte(cso->border_color[1]) << 8) | - (float_to_ubyte(cso->border_color[2]) << 0)); - - return (void *)ps; -} - diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index e538904298..5eed8a560e 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -219,13 +219,19 @@ extern void nvfx_fragprog_destroy(struct nvfx_context *, struct nvfx_fragment_program *); /* nv30_fragtex.c */ -extern void nv30_init_sampler_functions(struct nvfx_context *nvfx); +extern void +nv30_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); extern void nv30_fragtex_bind(struct nvfx_context *); extern struct nouveau_stateobj * nv30_fragtex_build(struct nvfx_context *nvfx, int unit); /* nv40_fragtex.c */ -extern void nv40_init_sampler_functions(struct nvfx_context *nvfx); +extern void +nv40_sampler_state_init(struct pipe_context *pipe, + struct nvfx_sampler_state *ps, + const struct pipe_sampler_state *cso); extern void nv40_fragtex_bind(struct nvfx_context *); extern struct nouveau_stateobj * nv40_fragtex_build(struct nvfx_context *nvfx, int unit); diff --git a/src/gallium/drivers/nvfx/nvfx_state.c b/src/gallium/drivers/nvfx/nvfx_state.c index 7e138afc71..88a9d01c50 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.c +++ b/src/gallium/drivers/nvfx/nvfx_state.c @@ -8,6 +8,7 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_tex.h" static void * nvfx_blend_state_create(struct pipe_context *pipe, @@ -82,8 +83,35 @@ nvfx_blend_state_delete(struct pipe_context *pipe, void *hwcso) FREE(bso); } +static void * +nvfx_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_sampler_state *ps; + + ps = MALLOC(sizeof(struct nvfx_sampler_state)); + + /* on nv30, we use this as an internal flag */ + ps->fmt = cso->normalized_coords ? 0 : NV40TCL_TEX_FORMAT_RECT; + ps->en = 0; + ps->filt = nvfx_tex_filter(cso); + ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (nvfx_tex_wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT) | + nvfx_tex_wrap_compare_mode(cso); + ps->bcol = nvfx_tex_border_color(cso->border_color); + + if(nvfx->is_nv4x) + nv40_sampler_state_init(pipe, ps, cso); + else + nv30_sampler_state_init(pipe, ps, cso); + + return (void *)ps; +} + static void -nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned unit; @@ -103,7 +131,7 @@ nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) } static void -nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +nvfx_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { FREE(hwcso); } @@ -543,14 +571,6 @@ nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) /*nvfx->draw_dirty |= NVFX_NEW_ARRAYS;*/ } -void * -nv30_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso); - -void * -nv40_sampler_state_create(struct pipe_context *pipe, - const struct pipe_sampler_state *cso); - void nvfx_init_state_functions(struct nvfx_context *nvfx) { @@ -558,12 +578,9 @@ nvfx_init_state_functions(struct nvfx_context *nvfx) nvfx->pipe.bind_blend_state = nvfx_blend_state_bind; nvfx->pipe.delete_blend_state = nvfx_blend_state_delete; - if(nvfx->is_nv4x) - nvfx->pipe.create_sampler_state = nv40_sampler_state_create; - else - nvfx->pipe.create_sampler_state = nv30_sampler_state_create; - nvfx->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind; - nvfx->pipe.delete_sampler_state = nv40_sampler_state_delete; + nvfx->pipe.create_sampler_state = nvfx_sampler_state_create; + nvfx->pipe.bind_fragment_sampler_states = nvfx_sampler_state_bind; + nvfx->pipe.delete_sampler_state = nvfx_sampler_state_delete; nvfx->pipe.set_fragment_sampler_textures = nvfx_set_sampler_texture; nvfx->pipe.create_rasterizer_state = nvfx_rasterizer_state_create; diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index b243b1020f..e585246879 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -4,14 +4,6 @@ #include "pipe/p_state.h" #include "tgsi/tgsi_scan.h" -struct nvfx_sampler_state { - uint32_t fmt; - uint32_t wrap; - uint32_t en; - uint32_t filt; - uint32_t bcol; -}; - struct nvfx_vertex_program_exec { uint32_t data[4]; boolean has_branch_offset; diff --git a/src/gallium/drivers/nvfx/nvfx_tex.h b/src/gallium/drivers/nvfx/nvfx_tex.h new file mode 100644 index 0000000000..69187a79e7 --- /dev/null +++ b/src/gallium/drivers/nvfx/nvfx_tex.h @@ -0,0 +1,133 @@ +#ifndef NVFX_TEX_H_ +#define NVFX_TEX_H_ + +static inline unsigned +nvfx_tex_wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static inline unsigned +nvfx_tex_wrap_compare_mode(const struct pipe_sampler_state* cso) +{ + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + return NV34TCL_TX_WRAP_RCOMP_NEVER; + case PIPE_FUNC_GREATER: + return NV34TCL_TX_WRAP_RCOMP_GREATER; + case PIPE_FUNC_EQUAL: + return NV34TCL_TX_WRAP_RCOMP_EQUAL; + case PIPE_FUNC_GEQUAL: + return NV34TCL_TX_WRAP_RCOMP_GEQUAL; + case PIPE_FUNC_LESS: + return NV34TCL_TX_WRAP_RCOMP_LESS; + case PIPE_FUNC_NOTEQUAL: + return NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + case PIPE_FUNC_LEQUAL: + return NV34TCL_TX_WRAP_RCOMP_LEQUAL; + case PIPE_FUNC_ALWAYS: + return NV34TCL_TX_WRAP_RCOMP_ALWAYS; + default: + break; + } + } + return 0; +} + +static inline unsigned nvfx_tex_filter(const struct pipe_sampler_state* cso) +{ + unsigned filter = 0; + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + return filter; +} + +static inline unsigned nvfx_tex_border_color(const float* border_color) +{ + return ((float_to_ubyte(border_color[3]) << 24) | + (float_to_ubyte(border_color[0]) << 16) | + (float_to_ubyte(border_color[1]) << 8) | + (float_to_ubyte(border_color[2]) << 0)); +} + +struct nvfx_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +#endif /* NVFX_TEX_H_ */ -- cgit v1.2.3