From 1ba10e5ccf5cd0c990922e982e1e9bc6be48a5e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 09:44:16 +0100 Subject: draw: add aos vertex shader varient --- src/gallium/auxiliary/draw/draw_vs_aos.h | 181 +++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos.h (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h new file mode 100644 index 0000000000..1d8a055a90 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef DRAW_VS_AOS_H +#define DRAW_VS_AOS_H + + +struct tgsi_token; +struct x86_function; + +#include "pipe/p_state.h" +#include "rtasm/rtasm_x86sse.h" + + + + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + +#define MAX_INPUTS PIPE_MAX_ATTRIBS +#define MAX_OUTPUTS PIPE_MAX_ATTRIBS +#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ +#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ +#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ +#define MAX_INTERNALS 4 + +#define AOS_FILE_INTERNAL TGSI_FILE_COUNT + +/* This is the temporary storage used by all the aos_sse vs varients. + * Create one per context and reuse by passing a pointer in at + * vs_varient creation?? + */ +struct aos_machine { + float input [MAX_INPUTS ][4]; + float output [MAX_OUTPUTS ][4]; + float temp [MAX_TEMPS ][4]; + float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ + float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ + float internal [MAX_INTERNALS ][4]; + + unsigned fpu_round_nearest; + unsigned fpu_round_neg_inf; + + struct { + const void *input_ptr; + unsigned input_stride; + + unsigned output_offset; + } attrib[PIPE_MAX_ATTRIBS]; +}; + + + + +struct aos_compilation { + struct x86_function *func; + struct draw_vs_varient_aos_sse *vaos; + + unsigned insn_counter; + unsigned num_immediates; + + struct { + unsigned idx:16; + unsigned file:8; + unsigned dirty:8; + unsigned last_used; + } xmm[8]; + + + boolean input_fetched[PIPE_MAX_ATTRIBS]; + unsigned output_last_write[PIPE_MAX_ATTRIBS]; + + boolean have_sse2; + boolean error; + short fpucntl; + + /* these are actually known values, but putting them in a struct + * like this is helpful to keep them in sync across the file. + */ + struct x86_reg tmp_EAX; + struct x86_reg idx_EBX; /* either start+i or &elt[i] */ + struct x86_reg outbuf_ECX; + struct x86_reg machine_EDX; + struct x86_reg count_ESI; /* decrements to zero */ +}; + +struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); +void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx ); + +void aos_adopt_xmm_reg( struct aos_compilation *cp, + struct x86_reg reg, + unsigned file, + unsigned idx, + unsigned dirty ); + +struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, + unsigned file, + unsigned idx ); + +boolean aos_fetch_inputs( struct aos_compilation *cp, + boolean linear ); + +boolean aos_emit_outputs( struct aos_compilation *cp ); + + +#define IMM_ONES 0 /* 1, 1,1,1 */ +#define IMM_NEGS 1 /* 1,-1,0,0 */ +#define IMM_IDENTITY 2 /* 0, 0,0,1 */ +#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ +#define IMM_255 4 /* 255, 255, 255, 255 */ + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ); + + +#define ERROR(cp, msg) \ +do { \ + debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ + cp->error = 1; \ + assert(0); \ +} while (0) + + + + + + +struct draw_vs_varient_aos_sse { + struct draw_vs_varient base; + struct draw_context *draw; + +#if 0 + struct { + const void *ptr; + unsigned stride; + } attrib[PIPE_MAX_ATTRIBS]; +#endif + + struct aos_machine *machine; /* XXX: temporarily unshared */ + + vsv_run_linear_func gen_run_linear; + vsv_run_elts_func gen_run_elts; + + + struct x86_function func[2]; +}; + + + +#endif + -- cgit v1.2.3 From 889473b3f5a216bd753c357974d6bae29fe3c41d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:28:56 +0100 Subject: draw: add viewport to varient state --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 9 +++- src/gallium/auxiliary/draw/draw_vs.h | 8 +++- src/gallium/auxiliary/draw/draw_vs_aos.c | 50 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +++++ 6 files changed, 84 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 74945dcfe9..984fbb6767 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -95,10 +95,14 @@ static void fse_prepare( struct draw_pt_middle_end *middle, + fse->key.output_stride = vinfo->size * 4; fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ num_vs_inputs); /* inputs - fetch from api format */ - fse->key.output_stride = vinfo->size * 4; + fse->key.viewport = 1; + fse->key.clip = 0; + fse->key.pad = 0; + memset(fse->key.element, 0, fse->key.nr_elements * sizeof(fse->key.element[0])); @@ -211,6 +215,9 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->active->set_constants( fse->active, (const float (*)[4])draw->pt.user.constants ); + fse->active->set_viewport( fse->active, + &draw->viewport ); + //return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 5a8d0da06d..ff3e19b2a8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -58,7 +58,10 @@ struct draw_vs_element { struct draw_vs_varient_key { unsigned output_stride; - unsigned nr_elements; + unsigned nr_elements:16; + unsigned viewport:1; + unsigned clip:1; + unsigned pad:14; struct draw_vs_element element[PIPE_MAX_ATTRIBS]; }; @@ -88,6 +91,9 @@ struct draw_vs_varient { void (*set_constants)( struct draw_vs_varient *, const float (*constants)[4] ); + void (*set_viewport)( struct draw_vs_varient *, + const struct pipe_viewport_state * ); + void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, unsigned count, diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 620f5e3592..b8e66e8b78 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1401,6 +1401,37 @@ emit_instruction( struct aos_compilation *cp, } } + +static boolean emit_viewport( struct aos_compilation *cp ) +{ + struct x86_reg pos = aos_get_shader_reg(cp, + TGSI_FILE_OUTPUT, + 0); + + struct x86_reg scale = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, scale)); + + struct x86_reg translate = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, translate)); + + if (pos.file != file_XMM) { + struct x86_reg dst = aos_get_xmm_reg(cp); + sse_movups(cp->func, dst, pos); + pos = dst; + } + + sse_mulps(cp->func, pos, scale); + sse_addps(cp->func, pos, translate); + + aos_adopt_xmm_reg( cp, + pos, + TGSI_FILE_OUTPUT, + 0, + TRUE ); + return TRUE; +} + + static boolean note_immediate( struct aos_compilation *cp, struct tgsi_full_immediate *imm ) { @@ -1540,6 +1571,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, if (cp.error) goto fail; + if (cp.vaos->base.key.viewport) { + emit_viewport(&cp); + } + /* Emit output... TODO: do this eagerly after the last write to a * given output. */ @@ -1665,11 +1700,25 @@ static void vaos_set_constants( struct draw_vs_varient *varient, } +static void vaos_set_viewport( struct draw_vs_varient *varient, + const struct pipe_viewport_state *viewport ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float)); + memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); +} + + + static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ) { struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + if (key->clip) + return NULL; + if (!vaos) goto fail; @@ -1677,6 +1726,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.vs = vs; vaos->base.set_input = vaos_set_buffer; vaos->base.set_constants = vaos_set_constants; + vaos->base.set_viewport = vaos_set_viewport; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 1d8a055a90..16fef6451c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -68,8 +68,13 @@ struct aos_machine { float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ float internal [MAX_INTERNALS ][4]; - unsigned fpu_round_nearest; - unsigned fpu_round_neg_inf; + float scale[4]; /* viewport */ + float translate[4]; /* viewport */ + + ushort fpu_round_nearest; + ushort fpu_round_neg_inf; + ushort fpu_restore; + ushort fpucntl; /* one of FPU_* above */ struct { const void *input_ptr; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0581c3042f..7781782ae8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -156,8 +156,8 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); vs->base.draw = draw; - vs->base.create_varient = draw_vs_varient_generic; -// vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_varient_aos_sse; +// vs->base.create_varient = draw_vs_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d27b0f6187..f6f621a748 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -167,6 +167,12 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, + +static void vsvg_set_viewport( struct draw_vs_varient *varient, + const struct pipe_viewport_state *viewport ) +{ +} + static void vsvg_destroy( struct draw_vs_varient *varient ) { FREE(varient); @@ -179,6 +185,9 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, unsigned i; struct translate_key fetch, emit; + if (key->viewport || key->clip) + return NULL; + struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); if (vsvg == NULL) return NULL; @@ -187,6 +196,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.vs = vs; vsvg->base.set_input = vsvg_set_input; vsvg->base.set_constants = vsvg_set_constants; + vsvg->base.set_viewport = vsvg_set_viewport; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 194a7be28f6eed502f2475d9a637cb3610ca75f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:31:08 +0100 Subject: draw: fix vs aos internal/machine state --- src/gallium/auxiliary/draw/draw_vs_aos.c | 59 ++++++++++++++++++++++++++++++-- src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++-- 2 files changed, 63 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b8e66e8b78..67761f881d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -83,7 +83,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx])); case AOS_FILE_INTERNAL: - return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); + return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); default: ERROR(cp, "unknown reg file"); @@ -97,9 +97,63 @@ struct x86_reg aos_get_internal( struct aos_compilation *cp, { return get_reg_ptr( cp, AOS_FILE_INTERNAL, - imm + 1 ); + imm ); +} + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + +static void init_internals( struct aos_machine *machine ) +{ + float inv = 1.0f/255.0f; + float f255 = 255.0f; + + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); + ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + + + machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_NEAREST | + X87_CW_PRECISION_DOUBLE_EXT); + + assert(machine->fpu_rnd_nearest == 0x37f); + + machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_DOWN | + X87_CW_PRECISION_DOUBLE_EXT); } + static void spill( struct aos_compilation *cp, unsigned idx ) { if (!cp->xmm[idx].dirty || @@ -1736,6 +1790,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, goto fail; memset(vaos->machine, 0, sizeof(struct aos_machine)); + init_internals(vaos->machine); tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 16fef6451c..c2afd4e9a0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -52,10 +52,13 @@ struct x86_function; #define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ #define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ #define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ -#define MAX_INTERNALS 4 +#define MAX_INTERNALS 8 #define AOS_FILE_INTERNAL TGSI_FILE_COUNT +#define FPU_RND_NEG 1 +#define FPU_RND_NEAREST 2 + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -71,8 +74,8 @@ struct aos_machine { float scale[4]; /* viewport */ float translate[4]; /* viewport */ - ushort fpu_round_nearest; - ushort fpu_round_neg_inf; + ushort fpu_rnd_nearest; + ushort fpu_rnd_neg_inf; ushort fpu_restore; ushort fpucntl; /* one of FPU_* above */ -- cgit v1.2.3 From c684ffa02d8d43ee04b99ee63ccd1adb66e81c1a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:41:49 +0100 Subject: draw: clean up internal immediates in aos sse --- src/gallium/auxiliary/draw/draw_vs_aos.c | 64 +++++++++++++++++++++-------- src/gallium/auxiliary/draw/draw_vs_aos.h | 5 ++- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 10 ++--- 3 files changed, 55 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index fde92c7226..0b8600696a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -44,12 +44,6 @@ #ifdef PIPE_ARCH_X86 -#define DISASSEM 0 - - - - - static INLINE boolean eq( struct x86_reg a, struct x86_reg b ) { @@ -92,13 +86,6 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, } -struct x86_reg aos_get_internal( struct aos_compilation *cp, - unsigned imm ) -{ - return get_reg_ptr( cp, - AOS_FILE_INTERNAL, - imm ); -} #define X87_CW_EXCEPTION_INV_OP (1<<0) #define X87_CW_EXCEPTION_DENORM_OP (1<<1) @@ -123,6 +110,9 @@ static void init_internals( struct aos_machine *machine ) float inv = 1.0f/255.0f; float f255 = 255.0f; + ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); + *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); @@ -337,6 +327,39 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, +static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + struct x86_reg reg = aos_get_shader_reg( cp, file, idx ); + + if (reg.file != file_XMM) { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, tmp, reg); + aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE ); + reg = tmp; + } + + return reg; +} + + + +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm ); +} + + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm ); +} + + + /* Emulate pshufd insn in regular SSE, if necessary: @@ -461,15 +484,15 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, arg0 = dst; } - if (negs) { - struct x86_reg imm_negs = aos_get_internal(cp, IMM_NEGS); + if (negs && negs != 0xf) { + struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); struct x86_reg tmp = aos_get_xmm_reg(cp); /* Load 1,-1,0,0 * Use neg as arg to pshufd * Multiply */ - emit_pshufd(cp, tmp, imm_negs, + emit_pshufd(cp, tmp, imm_swz, SHUF((negs & 1) ? 1 : 0, (negs & 2) ? 1 : 0, (negs & 4) ? 1 : 0, @@ -479,12 +502,17 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, aos_release_xmm_reg(cp, tmp.idx); arg0 = dst; } + else if (negs) { + struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); + sse_mulps(cp->func, dst, imm_negs); + arg0 = dst; + } + if (abs && abs != 0xf) { ERROR(cp, "unsupported partial abs"); } - - if (abs) { + else if (abs) { struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index c2afd4e9a0..efdc9a38f4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -142,13 +142,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_ONES 0 /* 1, 1,1,1 */ -#define IMM_NEGS 1 /* 1,-1,0,0 */ +#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */ #define IMM_IDENTITY 2 /* 0, 0,0,1 */ #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ #define IMM_255 4 /* 255, 255, 255, 255 */ +#define IMM_NEGS 5 /* -1,-1,-1,-1 */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ); #define ERROR(cp, msg) \ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 72b2b3d11d..0dda9df97d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -54,7 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); - sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); + sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); sse_movlps(cp->func, data, src_ptr); } @@ -63,7 +63,7 @@ static void emit_load_R32G32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { - sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); + sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); sse_movlps(cp->func, data, src_ptr); } @@ -73,7 +73,7 @@ static void emit_load_R32( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); - sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); + sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); } @@ -82,8 +82,8 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); - sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); - sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); sse2_cvtdq2ps(cp->func, data, data); sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); } -- cgit v1.2.3 From 6172f1295cf812108d8ceba15a83ba87880360d3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 13:22:29 +0100 Subject: draw: add a debug-print which can be called from inside generated shaders --- src/gallium/auxiliary/draw/draw_vs_aos.c | 67 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 68 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b0c3ac49d2..aa119f242e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -752,7 +752,63 @@ static void x87_emit_ex2( struct aos_compilation *cp ) } +static void PIPE_CDECL print_reg( const char *msg, + const float *reg ) +{ + debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); +} + +static void emit_print( struct aos_compilation *cp, + const char *message, /* must point to a static string! */ + unsigned file, + unsigned idx ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg arg = get_reg_ptr( cp, file, idx ); + unsigned i; + + /* There shouldn't be anything on the x87 stack. Can add this + * capacity later if need be. + */ + assert(cp->func->x87_stack == 0); + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. We're obviously not concerned about performance on this + * debug path, so here goes: + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + + /* Push the arguments: + */ + x86_lea( cp->func, ecx, arg ); + x86_push( cp->func, ecx ); + x86_push_imm32( cp->func, (int)message ); + + /* Call the helper. Could call debug_printf directly, but + * print_reg is a nice place to put a breakpoint if need be. + */ + x86_mov_reg_imm( cp->func, ecx, (int)print_reg ); + x86_call( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + /* Pop caller-save regs + */ + x86_cdecl_caller_pop_regs( cp->func ); + + /* Done... + */ +} /** * The traditional instructions. All operate on internal registers @@ -1798,6 +1854,17 @@ static void vaos_set_constants( struct draw_vs_varient *varient, memcpy(vaos->machine->constant, constants, (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float)); + +#if 0 + unsigned i; + for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) + debug_printf("state %d: %f %f %f %f\n", + i, + constants[i][0], + constants[i][1], + constants[i][2], + constants[i][3]); +#endif } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index efdc9a38f4..a0680ec63d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -96,6 +96,7 @@ struct aos_compilation { unsigned insn_counter; unsigned num_immediates; + unsigned count; struct { unsigned idx:16; -- cgit v1.2.3 From 86e529ad90411d21bca3d70984b2db202e7a0cd6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 16:31:11 +0100 Subject: draw: use lookup tables to avoid calling pow() in LIT opcode --- src/gallium/auxiliary/draw/draw_vs_aos.c | 251 ++++++++++++++++++++++++++++++- src/gallium/auxiliary/draw/draw_vs_aos.h | 27 ++++ 2 files changed, 272 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index aa119f242e..1fbb7088ca 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -105,8 +105,31 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, #define X87_CW_ROUND_MASK (3<<10) #define X87_CW_INFINITY (1<<12) +static void do_populate_lut( struct shine_tab *tab, + float unclamped_exponent ) +{ + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); + unsigned i; + + tab->exponent = unclamped_exponent; /* for later comparison */ + + tab->values[0] = 0; + if (exponent == 0) { + for (i = 1; i < 258; i++) { + tab->values[i] = 1.0; + } + } + else { + for (i = 1; i < 258; i++) { + tab->values[i] = powf((float)i * epsilon, exponent); + } + } +} + static void init_internals( struct aos_machine *machine ) { + unsigned i; float inv = 1.0f/255.0f; float f255 = 255.0f; @@ -141,6 +164,9 @@ static void init_internals( struct aos_machine *machine ) (1<<6) | X87_CW_ROUND_DOWN | X87_CW_PRECISION_DOUBLE_EXT); + + for (i = 0; i < MAX_SHINE_TAB; i++) + do_populate_lut( &machine->shine_tab[i], 1.0f ); } @@ -1132,26 +1158,231 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static PIPE_CDECL void do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + } + else + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + return; + } + + if (machine->lit_info[count].shine_tab->exponent != in[3]) { + machine->lit_info[count].func = do_lit; + goto no_luck; + } + + if (in[1] <= 1.0) + { + const float *tab = machine->lit_info[count].shine_tab->values; + float f = in[1] * 256; + int k = (int)f; + float frac = f - (float)k; + + result[0] = 1.0F; + result[1] = in[0]; + result[2] = tab[k] + frac*(tab[k+1]-tab[k]); + result[3] = 1.0; + return; + } + + no_luck: + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + + +static void PIPE_CDECL populate_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + unsigned i, tab; + + /* Search for an existing table for this value. Note that without + * static analysis we don't really know if in[3] will be constant, + * but it usually is... + */ + for (tab = 0; tab < 4; tab++) { + if (machine->shine_tab[tab].exponent == in[3]) { + goto found; + } + } + + for (tab = 0, i = 1; i < 4; i++) { + if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) + tab = i; + } + + if (machine->shine_tab[tab].last_used == machine->now) { + /* No unused tables (this is not a ffvertex program...). Just + * call pow each time: + */ + machine->lit_info[count].func = do_lit; + machine->lit_info[count].func( machine, result, in, count ); + return; + } + else { + do_populate_lut( &machine->shine_tab[tab], in[3] ); + } + + found: + machine->shine_tab[tab].last_used = machine->now; + machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; + machine->lit_info[count].func = do_lit_lut; + machine->lit_info[count].func( machine, result, in, count ); +} + + + static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned lit_count = cp->lit_count++; + struct x86_reg result, arg0; + unsigned i; + +#if 1 + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +#endif + + if (writemask != TGSI_WRITEMASK_XYZW) + result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); + else + result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + + + arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + if (arg0.file == file_XMM) { + struct x86_reg tmp = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, tmp[1])); + sse_movaps( cp->func, tmp, arg0 ); + arg0 = tmp; + } + + + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + /* Push the arguments: + */ + x86_push_imm32( cp->func, lit_count ); + + x86_lea( cp->func, ecx, arg0 ); + x86_push( cp->func, ecx ); + x86_lea( cp->func, ecx, result ); + x86_push( cp->func, ecx ); + x86_push( cp->func, cp->machine_EDX ); + if (lit_count < MAX_LIT_INFO) { + x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX, + Offset(struct aos_machine, lit_info) + + lit_count * sizeof(struct lit_info) + + Offset(struct lit_info, func))); + } + else { + x86_mov_reg_imm( cp->func, ecx, (int)do_lit ); + } + + x86_call( cp->func, ecx ); + + x86_pop( cp->func, ecx ); /* fixme... */ + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + x86_cdecl_caller_pop_regs( cp->func ); + + if (writemask != TGSI_WRITEMASK_XYZW) { + store_dest( cp, + &op->FullDstRegisters[0], + get_xmm_clone( cp, result ) ); + } + + return TRUE; +} + + +static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); struct x86_reg st2 = x86_make_reg(file_x87, 2); - - - /* a1' = a1 <= 0 ? 1 : a1; - * - * Note: use 1.0 to avoid passing zero to */ x87_fldz(cp->func); /* 1 0 */ #if 1 @@ -1865,6 +2096,14 @@ static void vaos_set_constants( struct draw_vs_varient *varient, constants[i][2], constants[i][3]); #endif + + { + unsigned i; + for (i = 0; i < MAX_LIT_INFO; i++) { + vaos->machine->lit_info[i].func = populate_lut; + vaos->machine->now++; + } + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index a0680ec63d..c08c73d4bc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -59,6 +59,25 @@ struct x86_function; #define FPU_RND_NEG 1 #define FPU_RND_NEAREST 2 +struct aos_machine; +typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, + float *result, + const float *in, + unsigned count ); +struct shine_tab { + float exponent; + float values[258]; + unsigned last_used; +}; + +struct lit_info { + lit_func func; + struct shine_tab *shine_tab; +}; + +#define MAX_SHINE_TAB 4 +#define MAX_LIT_INFO 16 + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -74,6 +93,13 @@ struct aos_machine { float scale[4]; /* viewport */ float translate[4]; /* viewport */ + float tmp[2][4]; /* scratch space for LIT */ + + struct shine_tab shine_tab[MAX_SHINE_TAB]; + struct lit_info lit_info[MAX_LIT_INFO]; + unsigned now; + + ushort fpu_rnd_nearest; ushort fpu_rnd_neg_inf; ushort fpu_restore; @@ -97,6 +123,7 @@ struct aos_compilation { unsigned insn_counter; unsigned num_immediates; unsigned count; + unsigned lit_count; struct { unsigned idx:16; -- cgit v1.2.3 From 351eca365c0ba488000c3826d5093de6170381e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 11:03:00 +0100 Subject: draw: extend precision in RSQ opcode --- src/gallium/auxiliary/draw/draw_vs_aos.c | 48 ++++++++++++++++++-------------- src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 916203c66b..1622358ae1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -140,7 +140,8 @@ static void init_internals( struct aos_machine *machine ) ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); - ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | @@ -1561,35 +1562,40 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst * * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * or: + * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)] + * * * See: http://softwarecommunity.intel.com/articles/eng/1818.htm */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg dst = aos_get_xmm_reg(cp); - if (1) { - sse_rsqrtss(cp->func, dst, arg0); + if (0) { + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); + sse_rsqrtss(cp->func, r, arg0); + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; } else { -#if 0 - /* Extend precision here... - */ - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - - sse_rsqrtss( func, tmp1, src ); /* rsqrtss(a) */ - sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) */ - sse_mulss( func, dst, tmp1 ); /* .5 * rsqrtss(a) */ - sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) * rsqrtss(a) */ - sse_subss( func, tmp0, src ); /* 3.0 - (a * rsqrtss(a) * rsqrtss(a)) */ - sse_mulss( func, dst, tmp0 ); /* .5 * r * (3.0 - (a * r * r)) */ -#endif - } + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); - return TRUE; + struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); + struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); + struct x86_reg src = get_xmm_writable( cp, arg0 ); + + sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ + sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index c08c73d4bc..fffe2e4658 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -175,6 +175,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ #define IMM_255 4 /* 255, 255, 255, 255 */ #define IMM_NEGS 5 /* -1,-1,-1,-1 */ +#define IMM_RSQ 6 /* -.5,1.5,_,_ */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); -- cgit v1.2.3 From 7b85ea19de09d4e7e077ca147528e90e52683690 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 19:01:57 +0100 Subject: draw: support psize in vs_varient paths Preserve the vinfo "EMIT_*" format descriptors in the varient key, and deal with PSIZE directly in each implementation. --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 35 ++------------------ src/gallium/auxiliary/draw/draw_vertex.h | 21 ++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.c | 3 ++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + src/gallium/auxiliary/draw/draw_vs_aos_io.c | 37 ++++++++++++++-------- src/gallium/auxiliary/draw/draw_vs_varient.c | 24 +++++++++++--- 6 files changed, 71 insertions(+), 50 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 85d0bdfcab..729c7db999 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -77,7 +77,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; const struct vertex_info *vinfo; unsigned i; - boolean need_psize = 0; if (!draw->render->set_primitive( draw->render, @@ -123,34 +122,24 @@ static void fse_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; - unsigned output_format = PIPE_FORMAT_NONE; - unsigned vs_output = vinfo->src_index[i]; switch (vinfo->emit[i]) { case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); break; case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; emit_sz = 3 * sizeof(float); break; case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; emit_sz = 2 * sizeof(float); break; case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - need_psize = 1; - output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); - vs_output = vinfo->num_attribs + 1; break; case EMIT_4UB: - output_format = PIPE_FORMAT_B8G8R8A8_UNORM; emit_sz = 4 * sizeof(ubyte); break; default: @@ -162,33 +151,15 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * numbers, not to positions in the hw vertex description -- * that's handled by the output_offset field. */ - fse->key.element[vs_output].out.format = output_format; - fse->key.element[vs_output].out.vs_output = vs_output; - fse->key.element[vs_output].out.offset = dst_offset; + fse->key.element[i].out.format = vinfo->emit[i]; + fse->key.element[i].out.vs_output = vinfo->src_index[i]; + fse->key.element[i].out.offset = dst_offset; dst_offset += emit_sz; assert(fse->key.output_stride >= dst_offset); } } - /* To make psize work, really need to tell the vertex shader to - * copy that value from input->output. For 'translate' this was - * implicit for all elements. - */ -#if 0 - if (need_psize) { - unsigned input = num_vs_inputs + 1; - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; - fse->key.element[i].input_buffer = 0; //nr_buffers + 1; - fse->key.element[i].input_offset = 0; - - fse->key.nr_inputs += 1; - fse->key.nr_elements = MAX2(fse->key.nr_inputs, - fse->key.nr_outputs); - - } -#endif /* Would normally look up a vertex shader and peruse its list of * varients somehow. We omitted that step and put all the diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 6d8bac5138..16c65c4317 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo); void draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data); + +static INLINE unsigned draw_translate_vinfo_format(unsigned format ) +{ + switch (format) { + case EMIT_1F: + case EMIT_1F_PSIZE: + return PIPE_FORMAT_R32_FLOAT; + case EMIT_2F: + return PIPE_FORMAT_R32G32_FLOAT; + case EMIT_3F: + return PIPE_FORMAT_R32G32B32_FLOAT; + case EMIT_4F: + return PIPE_FORMAT_R32G32B32A32_FLOAT; + case EMIT_4UB: + return PIPE_FORMAT_R8G8B8A8_UNORM; + default: + return PIPE_FORMAT_NONE; + } +} + + #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1c63677e6e..d3770b2c53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2126,6 +2126,7 @@ static void vaos_run_elts( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; vaos->gen_run_elts( varient, elts, count, @@ -2139,6 +2140,7 @@ static void vaos_run_linear( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; vaos->gen_run_linear( varient, start, count, @@ -2204,6 +2206,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; + vaos->draw = vs->draw; vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); if (!vaos->machine) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index fffe2e4658..b47413ff43 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -176,6 +176,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_255 4 /* 255, 255, 255, 255 */ #define IMM_NEGS 5 /* -1,-1,-1,-1 */ #define IMM_RSQ 6 /* -.5,1.5,_,_ */ +#define IMM_PSIZE 7 /* not really an immediate - updated each run */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index cebfaf6474..836110f382 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -33,6 +33,7 @@ #include "tgsi/exec/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" +#include "draw_vertex.h" #include "rtasm/rtasm_x86sse.h" @@ -249,24 +250,27 @@ static boolean emit_output( struct aos_compilation *cp, unsigned format ) { switch (format) { - case PIPE_FORMAT_R32_FLOAT: + case EMIT_1F: + case EMIT_1F_PSIZE: emit_store_R32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32_FLOAT: + case EMIT_2F: emit_store_R32G32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32B32_FLOAT: + case EMIT_3F: emit_store_R32G32B32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: + case EMIT_4F: emit_store_R32G32B32A32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + case EMIT_4UB: + if (1) { + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } + else { + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } break; default: ERROR(cp, "unhandled output format"); @@ -287,9 +291,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) unsigned offset = cp->vaos->base.key.element[i].out.offset; unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; - struct x86_reg data = aos_get_shader_reg( cp, - TGSI_FILE_OUTPUT, - vs_output ); + struct x86_reg data; + + if (format == EMIT_1F_PSIZE) { + data = aos_get_internal_xmm( cp, IMM_PSIZE ); + } + else { + data = aos_get_shader_reg( cp, + TGSI_FILE_OUTPUT, + vs_output ); + } if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index dab46e8eed..119a3a04b5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -231,6 +231,10 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, output_buffer, vsvg->base.key.output_stride ); + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); vsvg->emit->run( vsvg->emit, 0, count, @@ -293,11 +297,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.nr_elements = key->nr_outputs; emit.output_stride = key->output_stride; for (i = 0; i < key->nr_outputs; i++) { - emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit.element[i].input_buffer = 0; - emit.element[i].input_offset = i * 4 * sizeof(float); - emit.element[i].output_format = key->element[i].out.format; - emit.element[i].output_offset = key->element[i].out.offset; + if (key->element[i].out.format != EMIT_1F_PSIZE) + { + emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit.element[i].input_buffer = 0; + emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); + emit.element[i].output_offset = key->element[i].out.offset; + } + else { + emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].input_buffer = 1; + emit.element[i].input_offset = 0; + emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].output_offset = key->element[i].out.offset; + } } vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); -- cgit v1.2.3 From 364f75d9dbc2b8e19c884b9cc74676ab6cbee60d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 22:19:58 +0900 Subject: draw: Use PIPE_CDECL. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 20 ++++++++++---------- src/gallium/auxiliary/draw/draw_vs_aos.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index d3770b2c53..0cd82ff599 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1189,7 +1189,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -static PIPE_CDECL void do_lit( struct aos_machine *machine, +static void PIPE_CDECL do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -1223,7 +1223,7 @@ static PIPE_CDECL void do_lit( struct aos_machine *machine, } -static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, +static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -2119,10 +2119,10 @@ static void vaos_destroy( struct draw_vs_varient *varient ) FREE(vaos); } -static void vaos_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; @@ -2133,10 +2133,10 @@ static void vaos_run_elts( struct draw_vs_varient *varient, output_buffer ); } -static void vaos_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index b47413ff43..837b32794f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,7 +60,7 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, +typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); -- cgit v1.2.3 From 728d1f7f43b6db9f4f42c2d16ba223c492d1147d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 28 May 2008 23:54:18 +0100 Subject: draw: enable FSE by default --- src/gallium/auxiliary/draw/draw_pt.c | 11 ++- src/gallium/auxiliary/draw/draw_vs.h | 4 ++ src/gallium/auxiliary/draw/draw_vs_aos.c | 105 +++++++++++++++++++++------- src/gallium/auxiliary/draw/draw_vs_aos.h | 34 +++++---- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 25 ++++--- src/gallium/auxiliary/draw/draw_vs_sse.c | 14 ++-- 6 files changed, 129 insertions(+), 64 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 75f44d503e..d48c6c220d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw, if (opt == 0) middle = draw->pt.middle.fetch_emit; - else if (opt == PT_SHADE && draw->pt.test_fse) + else if (opt == PT_SHADE) middle = draw->pt.middle.fetch_shade_emit; else middle = draw->pt.middle.general; @@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; - if (draw->pt.test_fse) { - draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); - if (!draw->pt.middle.fetch_shade_emit) - return FALSE; - } - + draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); + if (!draw->pt.middle.fetch_shade_emit) + return FALSE; draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 01171bc23d..7aa0415baf 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -123,6 +123,10 @@ struct draw_vertex_shader { struct tgsi_shader_info info; + /* Extracted from shader: + */ + const float (*immediates)[4]; + /* */ struct draw_vs_varient *varient[16]; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0cd82ff599..9056785e7a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a, a.disp == b.disp); } +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned value ) +{ + if (cp->ebp != value) { + unsigned offset; + + switch (value) { + case X86_IMMEDIATES: + offset = Offset(struct aos_machine, immediates); + break; + case X86_CONSTANTS: + offset = Offset(struct aos_machine, constants); + break; + case X86_ATTRIBS: + offset = Offset(struct aos_machine, attrib); + break; + default: + assert(0); + offset = 0; + } + + x86_mov(cp->func, cp->temp_EBP, + x86_make_disp(cp->machine_EDX, offset)); + /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */ + + cp->ebp = value; + } + + return cp->temp_EBP; +} + static struct x86_reg get_reg_ptr(struct aos_compilation *cp, unsigned file, @@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, case TGSI_FILE_TEMPORARY: return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); - case TGSI_FILE_IMMEDIATE: - return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); - - case TGSI_FILE_CONSTANT: - return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx])); - case AOS_FILE_INTERNAL: return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); + case TGSI_FILE_IMMEDIATE: + return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float)); + + case TGSI_FILE_CONSTANT: + return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float)); + default: ERROR(cp, "unknown reg file"); return x86_make_reg(0,0); @@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp ) } +#if 0 static boolean note_immediate( struct aos_compilation *cp, struct tgsi_full_immediate *imm ) { @@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp, return TRUE; } +#endif @@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX); cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); cp.count_ESI = x86_make_reg(file_REG32, reg_SI); + cp.temp_EBP = x86_make_reg(file_REG32, reg_BP); x86_init_func(cp.func); @@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x86_push(cp.func, cp.idx_EBX); x86_push(cp.func, cp.count_ESI); + x86_push(cp.func, cp.temp_EBP); /* Load arguments into regs: @@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: +#if 0 if (!note_immediate( &cp, &parse.FullToken.FullImmediate )) goto fail; +#endif break; case TGSI_TOKEN_TYPE_INSTRUCTION: @@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, if (cp.func->need_emms) mmx_emms(cp.func); + x86_pop(cp.func, cp.temp_EBP); x86_pop(cp.func, cp.count_ESI); x86_pop(cp.func, cp.idx_EBX); @@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, for (i = 0; i < vaos->base.key.nr_inputs; i++) { if (vaos->base.key.element[i].in.buffer == buf) { - vaos->machine->attrib[i].input_ptr = ((char *)ptr + - vaos->base.key.element[i].in.offset); - vaos->machine->attrib[i].input_stride = stride; + vaos->attrib[i].input_ptr = ((char *)ptr + + vaos->base.key.element[i].in.offset); + vaos->attrib[i].input_stride = stride; } } } -static void vaos_destroy( struct draw_vs_varient *varient ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - - if (vaos->machine) - align_free( vaos->machine ); - - x86_release_func( &vaos->func[0] ); - x86_release_func( &vaos->func[1] ); - - FREE(vaos); -} static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, const unsigned *elts, @@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + vaos->machine->constants = vaos->draw->pt.user.constants; + vaos->machine->immediates = vaos->base.vs->immediates; + vaos->machine->attrib = vaos->attrib; + vaos->gen_run_elts( varient, elts, count, @@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + vaos->machine->constants = vaos->draw->pt.user.constants; + vaos->machine->immediates = vaos->base.vs->immediates; + vaos->machine->attrib = vaos->attrib; + vaos->gen_run_linear( varient, start, count, @@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - memcpy(vaos->machine->constant, - constants, - (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float)); - #if 0 unsigned i; for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) @@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient, memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); } +static void vaos_destroy( struct draw_vs_varient *varient ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + if (vaos->machine) + align_free( vaos->machine ); + + FREE( vaos->attrib ); + + x86_release_func( &vaos->func[0] ); + x86_release_func( &vaos->func[1] ); + + FREE(vaos); +} + static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, @@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.run_elts = vaos_run_elts; vaos->draw = vs->draw; + + vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); + if (!vaos->attrib) + goto fail; + vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); if (!vaos->machine) goto fail; @@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, return &vaos->base; fail: - if (vaos->machine) + if (vaos && vaos->attrib) + FREE(vaos->attrib); + + if (vaos && vaos->machine) align_free( vaos->machine ); if (vaos) diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 837b32794f..295d2cb3fe 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -78,6 +78,14 @@ struct lit_info { #define MAX_SHINE_TAB 4 #define MAX_LIT_INFO 16 +struct aos_attrib { + const void *input_ptr; + unsigned input_stride; +}; + + + + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -86,8 +94,6 @@ struct aos_machine { float input [MAX_INPUTS ][4]; float output [MAX_OUTPUTS ][4]; float temp [MAX_TEMPS ][4]; - float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ - float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ float internal [MAX_INTERNALS ][4]; float scale[4]; /* viewport */ @@ -105,12 +111,10 @@ struct aos_machine { ushort fpu_restore; ushort fpucntl; /* one of FPU_* above */ - struct { - const void *input_ptr; - unsigned input_stride; + const float (*immediates)[4]; /* points to shader data */ + const float (*constants)[4]; /* points to draw data */ - unsigned output_offset; - } attrib[PIPE_MAX_ATTRIBS]; + const struct aos_attrib *attrib; /* points to ? */ }; @@ -132,6 +136,7 @@ struct aos_compilation { unsigned last_used; } xmm[8]; + unsigned ebp; /* one of X86_* */ boolean input_fetched[PIPE_MAX_ATTRIBS]; unsigned output_last_write[PIPE_MAX_ATTRIBS]; @@ -148,6 +153,7 @@ struct aos_compilation { struct x86_reg outbuf_ECX; struct x86_reg machine_EDX; struct x86_reg count_ESI; /* decrements to zero */ + struct x86_reg temp_EBP; }; struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); @@ -192,20 +198,20 @@ do { \ } while (0) +#define X86_NULL 0 +#define X86_IMMEDIATES 1 +#define X86_CONSTANTS 2 +#define X86_ATTRIBS 3 - +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned value ); struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; -#if 0 - struct { - const void *ptr; - unsigned stride; - } attrib[PIPE_MAX_ATTRIBS]; -#endif + struct aos_attrib *attrib; struct aos_machine *machine; /* XXX: temporarily unshared */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 836110f382..45e2092209 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, -static void get_src_ptr( struct x86_function *func, +static void get_src_ptr( struct aos_compilation *cp, struct x86_reg src, - struct x86_reg machine, struct x86_reg elt, unsigned a ) { - struct x86_reg input_ptr = - x86_make_disp(machine, - Offset(struct aos_machine, attrib[a].input_ptr)); + struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ), + a * sizeof(struct aos_attrib)); - struct x86_reg input_stride = - x86_make_disp(machine, - Offset(struct aos_machine, attrib[a].input_stride)); + struct x86_reg input_ptr = x86_make_disp(attrib, + Offset(struct aos_attrib, input_ptr)); + + struct x86_reg input_stride = x86_make_disp(attrib, + Offset(struct aos_attrib, input_stride)); /* Calculate pointer to current attrib: */ - x86_mov(func, src, input_stride); - x86_imul(func, src, elt); - x86_add(func, src, input_ptr); + x86_mov(cp->func, src, input_stride); + x86_imul(cp->func, src, elt); + x86_add(cp->func, src, input_ptr); } @@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp, /* Figure out source pointer address: */ - get_src_ptr(cp->func, + get_src_ptr(cp, src, - cp->machine_EDX, linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), idx); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 7781782ae8..24f619a278 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -68,8 +68,6 @@ struct draw_sse_vertex_shader { codegen_function func; struct tgsi_exec_machine *machine; - - float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Outputs, (float (*)[4])constants, machine->Temps, - shader->immediates, + (float (*)[4])shader->base.immediates, input, base->info.num_inputs, input_stride, @@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base ) x86_release_func( &shader->sse2_program ); + align_free(shader->base.immediates); + FREE( (void*) shader->base.state.tokens ); FREE( shader ); } @@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * + sizeof(float), 16); + vs->machine = &draw->vs.machine; x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates, TRUE )) + &vs->sse2_program, + (float (*)[4])vs->base.immediates, + TRUE )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); -- cgit v1.2.3 From 62628c4d3d497cbca73fde869c9069fa90e6453e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 00:17:53 +0100 Subject: draw: share machine --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_context.c | 3 + src/gallium/auxiliary/draw/draw_private.h | 9 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 6 - src/gallium/auxiliary/draw/draw_vs.c | 43 ++- src/gallium/auxiliary/draw/draw_vs.h | 24 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 282 ++----------------- src/gallium/auxiliary/draw/draw_vs_aos.h | 25 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 297 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_varient.c | 49 +--- 11 files changed, 412 insertions(+), 328 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos_machine.c (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 9a88ecc070..f2e36a89e9 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ draw_vs_varient.c \ draw_vs_aos.c \ draw_vs_aos_io.c \ + draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 26919a2298..925e668f22 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -36,6 +36,7 @@ draw = env.ConvenienceLibrary( 'draw_vs.c', 'draw_vs_aos.c', 'draw_vs_aos_io.c', + 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2242074965..8509baf865 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw, viewport->translate[1] == 0.0f && viewport->translate[2] == 0.0f && viewport->translate[3] == 0.0f); + + draw_vs_set_viewport( draw, viewport ); } @@ -218,6 +220,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { draw->pt.user.constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer ); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c095bf3d7b..4cbccc8b5b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -183,6 +183,10 @@ struct draw_context */ struct gallivm_cpu_engine *engine; + /* Here's another one: + */ + struct aos_machine *aos_machine; + struct translate *fetch; struct translate_cache *fetch_cache; @@ -215,6 +219,11 @@ struct draw_context boolean draw_vs_init( struct draw_context *draw ); void draw_vs_destroy( struct draw_context *draw ); +void draw_vs_set_viewport( struct draw_context *, + const struct pipe_viewport_state * ); + +void draw_vs_set_constants( struct draw_context *, + const float (*constants)[4] ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 729c7db999..5265a13160 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer[buf].pitch ); } - fse->active->set_constants( fse->active, - (const float (*)[4])draw->pt.user.constants ); - - fse->active->set_viewport( fse->active, - &draw->viewport ); - //return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 9b899d404e..a8b6d0c90d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -41,6 +41,22 @@ + +void draw_vs_set_constants( struct draw_context *draw, + const float (*constants)[4] ) +{ + draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); +} + + +void draw_vs_set_viewport( struct draw_context *draw, + const struct pipe_viewport_state *viewport ) +{ + draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport ); +} + + + struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader) @@ -83,6 +99,13 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { + unsigned i; + + for (i = 0; i < dvs->nr_varients; i++) + dvs->varient[i]->destroy( dvs->varient[i] ); + + dvs->nr_varients = 0; + dvs->delete( dvs ); } @@ -110,6 +133,10 @@ draw_vs_init( struct draw_context *draw ) draw->vs.fetch_cache = translate_cache_create(); if (!draw->vs.fetch_cache) return FALSE; + + draw->vs.aos_machine = draw_vs_aos_machine(); + if (!draw->vs.aos_machine) + return FALSE; return TRUE; } @@ -129,6 +156,9 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.emit_cache) translate_cache_destroy(draw->vs.emit_cache); + if (draw->vs.aos_machine) + draw_vs_aos_machine_destroy(draw->vs.aos_machine); + tgsi_exec_machine_free_data(&draw->vs.machine); } @@ -153,10 +183,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs, if (varient == NULL) return NULL; - /* Add it to our list: + /* Add it to our list, could be smarter: */ - assert(vs->nr_varients < Elements(vs->varient)); - vs->varient[vs->nr_varients++] = varient; + if (vs->nr_varients < Elements(vs->varient)) { + vs->varient[vs->nr_varients++] = varient; + } + else { + vs->last_varient++; + vs->last_varient %= Elements(vs->varient); + vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]); + vs->varient[vs->last_varient] = varient; + } /* Done */ diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 7aa0415baf..08c6de8ba8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -70,16 +70,6 @@ struct draw_vs_varient_key { struct draw_vs_varient; -typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *, - const unsigned *elts, - unsigned count, - void *output_buffer); - -typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *, - unsigned start, - unsigned count, - void *output_buffer); - struct draw_vs_varient { struct draw_vs_varient_key key; @@ -91,12 +81,6 @@ struct draw_vs_varient { const void *ptr, unsigned stride ); - void (*set_constants)( struct draw_vs_varient *, - const float (*constants)[4] ); - - void (*set_viewport)( struct draw_vs_varient *, - const struct pipe_viewport_state * ); - void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, unsigned count, @@ -131,6 +115,7 @@ struct draw_vertex_shader { */ struct draw_vs_varient *varient[16]; unsigned nr_varients; + unsigned last_varient; struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, const struct draw_vs_varient_key *key ); @@ -217,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key } +struct aos_machine *draw_vs_aos_machine( void ); +void draw_vs_aos_machine_destroy( struct aos_machine *machine ); + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ); +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ); #define MAX_TGSI_VERTICES 4 diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 9056785e7a..b5e4e1e7b1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -149,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, #define X87_CW_ROUND_MASK (3<<10) #define X87_CW_INFINITY (1<<12) -static void do_populate_lut( struct shine_tab *tab, - float unclamped_exponent ) -{ - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); - unsigned i; - tab->exponent = unclamped_exponent; /* for later comparison */ - - tab->values[0] = 0; - if (exponent == 0) { - for (i = 1; i < 258; i++) { - tab->values[i] = 1.0; - } - } - else { - for (i = 1; i < 258; i++) { - tab->values[i] = powf((float)i * epsilon, exponent); - } - } -} - -static void init_internals( struct aos_machine *machine ) -{ - unsigned i; - float inv = 1.0f/255.0f; - float f255 = 255.0f; - - ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); - *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; - - ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); - ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); - ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); - ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); - ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); - ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); - - - machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | - X87_CW_EXCEPTION_DENORM_OP | - X87_CW_EXCEPTION_ZERO_DIVIDE | - X87_CW_EXCEPTION_OVERFLOW | - X87_CW_EXCEPTION_UNDERFLOW | - X87_CW_EXCEPTION_PRECISION | - (1<<6) | - X87_CW_ROUND_NEAREST | - X87_CW_PRECISION_DOUBLE_EXT); - - assert(machine->fpu_rnd_nearest == 0x37f); - - machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | - X87_CW_EXCEPTION_DENORM_OP | - X87_CW_EXCEPTION_ZERO_DIVIDE | - X87_CW_EXCEPTION_OVERFLOW | - X87_CW_EXCEPTION_UNDERFLOW | - X87_CW_EXCEPTION_PRECISION | - (1<<6) | - X87_CW_ROUND_DOWN | - X87_CW_PRECISION_DOUBLE_EXT); - - for (i = 0; i < MAX_SHINE_TAB; i++) - do_populate_lut( &machine->shine_tab[i], 1.0f ); -} static void spill( struct aos_compilation *cp, unsigned idx ) @@ -1220,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -static void PIPE_CDECL do_lit( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - if (in[0] > 0) - { - if (in[1] <= 0.0) - { - result[0] = 1.0F; - result[1] = in[0]; - result[2] = 1.0; - result[3] = 1.0F; - } - else - { - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); - result[0] = 1.0F; - result[1] = in[0]; - result[2] = powf(in[1], exponent); - result[3] = 1.0; - } - } - else - { - result[0] = 1.0F; - result[1] = 0.0; - result[2] = 0.0; - result[3] = 1.0F; - } -} - - -static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - if (in[0] > 0) - { - if (in[1] <= 0.0) - { - result[0] = 1.0F; - result[1] = in[0]; - result[2] = 1.0; - result[3] = 1.0F; - return; - } - - if (machine->lit_info[count].shine_tab->exponent != in[3]) { - machine->lit_info[count].func = do_lit; - goto no_luck; - } - - if (in[1] <= 1.0) - { - const float *tab = machine->lit_info[count].shine_tab->values; - float f = in[1] * 256; - int k = (int)f; - float frac = f - (float)k; - - result[0] = 1.0F; - result[1] = in[0]; - result[2] = tab[k] + frac*(tab[k+1]-tab[k]); - result[3] = 1.0; - return; - } - - no_luck: - { - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); - result[0] = 1.0F; - result[1] = in[0]; - result[2] = powf(in[1], exponent); - result[3] = 1.0; - } - } - else - { - result[0] = 1.0F; - result[1] = 0.0; - result[2] = 0.0; - result[3] = 1.0F; - } -} - - - -static void PIPE_CDECL populate_lut( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - unsigned i, tab; - - /* Search for an existing table for this value. Note that without - * static analysis we don't really know if in[3] will be constant, - * but it usually is... - */ - for (tab = 0; tab < 4; tab++) { - if (machine->shine_tab[tab].exponent == in[3]) { - goto found; - } - } - - for (tab = 0, i = 1; i < 4; i++) { - if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) - tab = i; - } - - if (machine->shine_tab[tab].last_used == machine->now) { - /* No unused tables (this is not a ffvertex program...). Just - * call pow each time: - */ - machine->lit_info[count].func = do_lit; - machine->lit_info[count].func( machine, result, in, count ); - return; - } - else { - do_populate_lut( &machine->shine_tab[tab], in[3] ); - } - - found: - machine->shine_tab[tab].last_used = machine->now; - machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; - machine->lit_info[count].func = do_lit_lut; - machine->lit_info[count].func( machine, result, in, count ); -} @@ -1413,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst Offset(struct lit_info, func))); } else { - x86_mov_reg_imm( cp->func, ecx, (int)do_lit ); + x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit ); } x86_call( cp->func, ecx ); @@ -1434,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#if 0 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); @@ -1495,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu return TRUE; } +#endif @@ -1945,7 +1753,7 @@ static void find_last_write_outputs( struct aos_compilation *cp ) } -#define ARG_VARIENT 1 +#define ARG_MACHINE 1 #define ARG_START_ELTS 2 #define ARG_COUNT 3 #define ARG_OUTBUF 4 @@ -1985,7 +1793,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* Load arguments into regs: */ - x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT)); + x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE)); x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS)); x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT)); x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF)); @@ -1997,11 +1805,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX); fixup = x86_jcc_forward(cp.func, cc_E); - /* Dig out the machine pointer from inside the varient arg - */ - x86_mov(cp.func, cp.machine_EDX, - x86_make_disp(cp.machine_EDX, - Offset( struct draw_vs_varient_aos_sse, machine ))); save_fpu_state( &cp ); set_fpu_round_nearest( &cp ); @@ -2151,13 +1954,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; - vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - vaos->machine->constants = vaos->draw->pt.user.constants; - vaos->machine->immediates = vaos->base.vs->immediates; - vaos->machine->attrib = vaos->attrib; + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->immediates = vaos->base.vs->immediates; + machine->attrib = vaos->attrib; - vaos->gen_run_elts( varient, + vaos->gen_run_elts( machine, elts, count, output_buffer ); @@ -2169,61 +1973,25 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; - vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - vaos->machine->constants = vaos->draw->pt.user.constants; - vaos->machine->immediates = vaos->base.vs->immediates; - vaos->machine->attrib = vaos->attrib; + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->immediates = vaos->base.vs->immediates; + machine->attrib = vaos->attrib; - vaos->gen_run_linear( varient, + vaos->gen_run_linear( machine, start, count, output_buffer ); } -static void vaos_set_constants( struct draw_vs_varient *varient, - const float (*constants)[4] ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - -#if 0 - unsigned i; - for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) - debug_printf("state %d: %f %f %f %f\n", - i, - constants[i][0], - constants[i][1], - constants[i][2], - constants[i][3]); -#endif - - { - unsigned i; - for (i = 0; i < MAX_LIT_INFO; i++) { - vaos->machine->lit_info[i].func = populate_lut; - vaos->machine->now++; - } - } -} - - -static void vaos_set_viewport( struct draw_vs_varient *varient, - const struct pipe_viewport_state *viewport ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - - memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float)); - memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); -} static void vaos_destroy( struct draw_vs_varient *varient ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - if (vaos->machine) - align_free( vaos->machine ); - FREE( vaos->attrib ); x86_release_func( &vaos->func[0] ); @@ -2245,8 +2013,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.key = *key; vaos->base.vs = vs; vaos->base.set_input = vaos_set_buffer; - vaos->base.set_constants = vaos_set_constants; - vaos->base.set_viewport = vaos_set_viewport; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; @@ -2257,13 +2023,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->attrib) goto fail; - vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); - if (!vaos->machine) - goto fail; - - memset(vaos->machine, 0, sizeof(struct aos_machine)); - init_internals(vaos->machine); - tgsi_dump(vs->state.tokens, 0); if (!build_vertex_program( vaos, TRUE )) @@ -2272,11 +2031,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!build_vertex_program( vaos, FALSE )) goto fail; - vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]); + vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]); if (!vaos->gen_run_linear) goto fail; - vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]); + vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]); if (!vaos->gen_run_elts) goto fail; @@ -2286,9 +2045,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (vaos && vaos->attrib) FREE(vaos->attrib); - if (vaos && vaos->machine) - align_free( vaos->machine ); - if (vaos) x86_release_func( &vaos->func[0] ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 295d2cb3fe..89a9174151 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,10 +60,16 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, +typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); + +PIPE_CDECL void aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ); + struct shine_tab { float exponent; float values[258]; @@ -207,16 +213,25 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, unsigned value ); +typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *, + const unsigned *elts, + unsigned count, + void *output_buffer); + +typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *, + unsigned start, + unsigned count, + void *output_buffer); + + struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; struct aos_attrib *attrib; - struct aos_machine *machine; /* XXX: temporarily unshared */ - - vsv_run_linear_func gen_run_linear; - vsv_run_elts_func gen_run_elts; + vaos_run_linear_func gen_run_linear; + vaos_run_elts_func gen_run_elts; struct x86_function func[2]; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c new file mode 100644 index 0000000000..53e999b191 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -0,0 +1,297 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/exec/tgsi_exec.h" +#include "draw_vs.h" +#include "draw_vs_aos.h" +#include "draw_vertex.h" + +#include "rtasm/rtasm_x86sse.h" + + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + + +PIPE_CDECL void aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + } + else + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + return; + } + + if (machine->lit_info[count].shine_tab->exponent != in[3]) { + machine->lit_info[count].func = aos_do_lit; + goto no_luck; + } + + if (in[1] <= 1.0) + { + const float *tab = machine->lit_info[count].shine_tab->values; + float f = in[1] * 256; + int k = (int)f; + float frac = f - (float)k; + + result[0] = 1.0F; + result[1] = in[0]; + result[2] = tab[k] + frac*(tab[k+1]-tab[k]); + result[3] = 1.0; + return; + } + + no_luck: + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static void do_populate_lut( struct shine_tab *tab, + float unclamped_exponent ) +{ + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); + unsigned i; + + tab->exponent = unclamped_exponent; /* for later comparison */ + + tab->values[0] = 0; + if (exponent == 0) { + for (i = 1; i < 258; i++) { + tab->values[i] = 1.0; + } + } + else { + for (i = 1; i < 258; i++) { + tab->values[i] = powf((float)i * epsilon, exponent); + } + } +} + + + + +static void PIPE_CDECL populate_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + unsigned i, tab; + + /* Search for an existing table for this value. Note that without + * static analysis we don't really know if in[3] will be constant, + * but it usually is... + */ + for (tab = 0; tab < 4; tab++) { + if (machine->shine_tab[tab].exponent == in[3]) { + goto found; + } + } + + for (tab = 0, i = 1; i < 4; i++) { + if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) + tab = i; + } + + if (machine->shine_tab[tab].last_used == machine->now) { + /* No unused tables (this is not a ffvertex program...). Just + * call pow each time: + */ + machine->lit_info[count].func = aos_do_lit; + machine->lit_info[count].func( machine, result, in, count ); + return; + } + else { + do_populate_lut( &machine->shine_tab[tab], in[3] ); + } + + found: + machine->shine_tab[tab].last_used = machine->now; + machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; + machine->lit_info[count].func = do_lit_lut; + machine->lit_info[count].func( machine, result, in, count ); +} + + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ) +{ + machine->constants = constants; + + { + unsigned i; + for (i = 0; i < MAX_LIT_INFO; i++) { + machine->lit_info[i].func = populate_lut; + machine->now++; + } + } +} + + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ) +{ + memcpy(machine->scale, viewport->scale, 4 * sizeof(float)); + memcpy(machine->translate, viewport->translate, 4 * sizeof(float)); +} + + + +void draw_vs_aos_machine_destroy( struct aos_machine *machine ) +{ + align_free(machine); +} + +struct aos_machine *draw_vs_aos_machine( void ) +{ + struct aos_machine *machine; + unsigned i; + float inv = 1.0f/255.0f; + float f255 = 255.0f; + + machine = align_malloc(sizeof(struct aos_machine), 16); + if (!machine) + return NULL; + + memset(machine, 0, sizeof(*machine)); + + ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); + *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; + + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); + ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); + + + machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_NEAREST | + X87_CW_PRECISION_DOUBLE_EXT); + + assert(machine->fpu_rnd_nearest == 0x37f); + + machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_DOWN | + X87_CW_PRECISION_DOUBLE_EXT); + + for (i = 0; i < MAX_SHINE_TAB; i++) + do_populate_lut( &machine->shine_tab[i], 1.0f ); + + return machine; +} + + diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 784ae41205..18cb06e374 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -44,8 +44,6 @@ struct draw_vs_varient_generic { struct draw_vs_varient base; - struct pipe_viewport_state viewport; - struct draw_vertex_shader *shader; struct draw_context *draw; @@ -57,21 +55,11 @@ struct draw_vs_varient_generic { */ struct translate *fetch; struct translate *emit; - - const float (*constants)[4]; }; -static void vsvg_set_constants( struct draw_vs_varient *varient, - const float (*constants)[4] ) -{ - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - - vsvg->constants = constants; -} - static void vsvg_set_input( struct draw_vs_varient *varient, unsigned buffer, @@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, void *output_buffer ) { char *ptr = (char *)output_buffer; - const float *scale = vsvg->viewport.scale; - const float *trans = vsvg->viewport.translate; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; unsigned stride = vsvg->base.key.output_stride; unsigned j; @@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, void *output_buffer ) { char *ptr = (char *)output_buffer; - const float *scale = vsvg->viewport.scale; - const float *trans = vsvg->viewport.translate; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; unsigned stride = vsvg->base.key.output_stride; unsigned j; @@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer ) +static void vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -150,7 +138,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, output_buffer, output_buffer, - vsvg->constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, count, vsvg->base.key.output_stride, vsvg->base.key.output_stride); @@ -186,10 +174,10 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, } -static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -206,7 +194,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, output_buffer, output_buffer, - vsvg->constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, count, vsvg->base.key.output_stride, vsvg->base.key.output_stride); @@ -245,13 +233,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, -static void vsvg_set_viewport( struct draw_vs_varient *varient, - const struct pipe_viewport_state *viewport ) -{ - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - - vsvg->viewport = *viewport; -} static void vsvg_destroy( struct draw_vs_varient *varient ) { @@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.key = *key; vsvg->base.vs = vs; vsvg->base.set_input = vsvg_set_input; - vsvg->base.set_constants = vsvg_set_constants; - vsvg->base.set_viewport = vsvg_set_viewport; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 6945bcb89370501e0a218bc656e68e30e4dadcda Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 May 2008 18:28:02 +0900 Subject: draw: Put PIPE_CDECL in the right places. MSVC seems picky about this. --- src/gallium/auxiliary/draw/draw_vs_aos.h | 4 ++-- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_varient.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 89a9174151..665b425e68 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,12 +60,12 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, +typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); -PIPE_CDECL void aos_do_lit( struct aos_machine *machine, +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 53e999b191..b7864e9f2f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -57,7 +57,7 @@ #define X87_CW_INFINITY (1<<12) -PIPE_CDECL void aos_do_lit( struct aos_machine *machine, +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -91,7 +91,7 @@ PIPE_CDECL void aos_do_lit( struct aos_machine *machine, } -static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, +static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, float *result, const float *in, unsigned count ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 18cb06e374..4a155251b5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -118,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void vsvg_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer) +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -174,10 +174,10 @@ static void vsvg_run_elts( struct draw_vs_varient *varient, } -static void vsvg_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; -- cgit v1.2.3 From 8f887b4252208e60e7e86217ec3b72fb639a4e82 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 13:26:01 +0100 Subject: draw: michal's patch for calling powf... teapot still not quite right --- src/gallium/auxiliary/draw/draw_vs_aos.c | 46 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 47 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 55cabb6df9..725f36b502 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1372,11 +1372,20 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +/* A wrapper for powf(). + * Makes sure it is cdecl and operates on floats. + */ +static float PIPE_CDECL _powerf( float x, float y ) +{ + return powf( x, y ); +} + /* Really not sufficient -- need to check for conditions that could * generate inf/nan values, which will slow things down hugely. */ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { +#if 0 x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ @@ -1384,6 +1393,42 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ x87_fstp_dest4(cp, &op->FullDstRegisters[0]); +#else + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); + + x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); +#endif return TRUE; } @@ -1781,6 +1826,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); cp.count_ESI = x86_make_reg(file_REG32, reg_SI); cp.temp_EBP = x86_make_reg(file_REG32, reg_BP); + cp.stack_ESP = x86_make_reg( file_REG32, reg_SP ); x86_init_func(cp.func); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 665b425e68..fb6d43d32e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -160,6 +160,7 @@ struct aos_compilation { struct x86_reg machine_EDX; struct x86_reg count_ESI; /* decrements to zero */ struct x86_reg temp_EBP; + struct x86_reg stack_ESP; }; struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); -- cgit v1.2.3 From f27c7729a98937a761eacceabdfd03f9d694d257 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 6 Jun 2008 13:29:59 +0900 Subject: draw: Compile draw_vs_aos only on x86. --- src/gallium/auxiliary/draw/draw_vs_aos.h | 4 ++++ src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index fb6d43d32e..f6f6af2dd9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -31,6 +31,9 @@ #ifndef DRAW_VS_AOS_H #define DRAW_VS_AOS_H +#include "pipe/p_config.h" + +#ifdef PIPE_ARCH_X86 struct tgsi_token; struct x86_function; @@ -239,6 +242,7 @@ struct draw_vs_varient_aos_sse { }; +#endif #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index b7864e9f2f..dcb6c2c8d4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -26,6 +26,10 @@ **************************************************************************/ +#include "pipe/p_config.h" + +#ifdef PIPE_ARCH_X86 + #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" @@ -295,3 +299,5 @@ struct aos_machine *draw_vs_aos_machine( void ) } +#endif + -- cgit v1.2.3 From bd9264210097d08073a4ea3619ca25db56245280 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 11 Jun 2008 09:36:00 +0100 Subject: draw: remove another debug assert on failover to generic vs varient --- src/gallium/auxiliary/draw/draw_vs_aos.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index f6f6af2dd9..66944a4e33 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -202,9 +202,8 @@ struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, #define ERROR(cp, msg) \ do { \ - debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ + if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ cp->error = 1; \ - assert(0); \ } while (0) -- cgit v1.2.3 From 08f1b8ac709105d42ec34f8b8a81421e3b0fbc81 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 12 Jun 2008 16:01:05 -0600 Subject: gallium: fix SSE codegen for instructions that use both a CONSTANT and IMMEDIATE Fixes codegen for instructions like MUL dst, CONST[0], IMM[0]; the two operands would up getting aliased in the x86/sse code. Fixes glean/vertProg1/fogparams test. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 +++++++++++++++++++------- src/gallium/auxiliary/draw/draw_vs_aos.h | 3 ++- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 5d4a8b38c8..388dd3fbee 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -67,19 +67,30 @@ static INLINE boolean eq( struct x86_reg a, } struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, /* quick hack */ unsigned value ) { - if (cp->ebp != value) { + struct x86_reg reg; + + if (which_reg == 0) + reg = cp->temp_EBP; + else + reg = cp->tmp_EAX; + + if (cp->x86_reg[which_reg] != value) { unsigned offset; switch (value) { case X86_IMMEDIATES: + assert(which_reg == 0); offset = Offset(struct aos_machine, immediates); break; case X86_CONSTANTS: + assert(which_reg == 1); offset = Offset(struct aos_machine, constants); break; case X86_ATTRIBS: + assert(which_reg == 0); offset = Offset(struct aos_machine, attrib); break; default: @@ -87,14 +98,14 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, offset = 0; } - x86_mov(cp->func, cp->temp_EBP, + + x86_mov(cp->func, reg, x86_make_disp(cp->machine_EDX, offset)); - /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */ - cp->ebp = value; + cp->x86_reg[which_reg] = value; } - return cp->temp_EBP; + return reg; } @@ -118,10 +129,10 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); case TGSI_FILE_IMMEDIATE: - return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float)); + return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float)); case TGSI_FILE_CONSTANT: - return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float)); + return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); default: ERROR(cp, "unknown reg file"); @@ -1413,6 +1424,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + /* tmp_EAX has been pushed & will be restored below */ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf ); x86_call( cp->func, cp->tmp_EAX ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 66944a4e33..64e021ff6b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -145,7 +145,7 @@ struct aos_compilation { unsigned last_used; } xmm[8]; - unsigned ebp; /* one of X86_* */ + unsigned x86_reg[2]; /* one of X86_* */ boolean input_fetched[PIPE_MAX_ATTRIBS]; unsigned output_last_write[PIPE_MAX_ATTRIBS]; @@ -213,6 +213,7 @@ do { \ #define X86_ATTRIBS 3 struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, unsigned value ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index b720185709..6b92811870 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -96,7 +96,7 @@ static void get_src_ptr( struct aos_compilation *cp, struct x86_reg elt, unsigned a ) { - struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ), + struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), a * sizeof(struct aos_attrib)); struct x86_reg input_ptr = x86_make_disp(attrib, -- cgit v1.2.3 From feea0c9d958bc1645b09b288cd4d4756d0d6e61a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 2 Sep 2008 18:04:08 -0600 Subject: gallium: fix out of bounds array errors in SSE execution 1. #define MAX_INPUTS/OUTPUTS/TEMPS/etc with better values. 2. Add assertions in aos_get_x86() to check register file indexes 3. Assert that constant regs haven't changed after running SSE code. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 11 +++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 10 +++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 760fcb389f..a556477a76 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -32,6 +32,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -119,21 +120,26 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, switch (file) { case TGSI_FILE_INPUT: + assert(idx < MAX_INPUTS); return x86_make_disp(ptr, Offset(struct aos_machine, input[idx])); case TGSI_FILE_OUTPUT: return x86_make_disp(ptr, Offset(struct aos_machine, output[idx])); case TGSI_FILE_TEMPORARY: + assert(idx < MAX_TEMPS); return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); case AOS_FILE_INTERNAL: + assert(idx < MAX_INTERNALS); return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); case TGSI_FILE_IMMEDIATE: + assert(idx < MAX_IMMEDIATES); /* just a sanity check */ return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float)); case TGSI_FILE_CONSTANT: + assert(idx < MAX_CONSTANTS); /* just a sanity check */ return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); default: @@ -2108,6 +2114,11 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, start, count, output_buffer ); + + /* Sanity spot checks to make sure we didn't trash our constants */ + assert(machine->internal[IMM_ONES][0] == 1.0f); + assert(machine->internal[IMM_IDENTITY][0] == 0.0f); + assert(machine->internal[IMM_NEGS][0] == -1.0f); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 64e021ff6b..7fe6f79db0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -51,11 +51,11 @@ struct x86_function; #define W 3 #define MAX_INPUTS PIPE_MAX_ATTRIBS -#define MAX_OUTPUTS PIPE_MAX_ATTRIBS -#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ -#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ -#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ -#define MAX_INTERNALS 8 +#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS +#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS +#define MAX_CONSTANTS 1024 /** only used for sanity checking */ +#define MAX_IMMEDIATES 1024 /** only used for sanity checking */ +#define MAX_INTERNALS 8 /** see IMM_x values below */ #define AOS_FILE_INTERNAL TGSI_FILE_COUNT -- cgit v1.2.3 From a15699c3f54edb5d5b42960e7568e587b752e407 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 13:34:38 +0100 Subject: draw: add streamlined paths for fetching linear verts --- src/gallium/auxiliary/draw/draw_vs_aos.c | 44 +++++---- src/gallium/auxiliary/draw/draw_vs_aos.h | 19 ++-- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 137 +++++++++++++++++++++------- 3 files changed, 134 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a556477a76..4c794e0e23 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, assert(which_reg == 1); offset = Offset(struct aos_machine, constants); break; - case X86_ATTRIBS: + case X86_BUFFERS: assert(which_reg == 0); - offset = Offset(struct aos_machine, attrib); + offset = Offset(struct aos_machine, buffer); break; default: assert(0); @@ -1939,6 +1939,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, save_fpu_state( &cp ); set_fpu_round_nearest( &cp ); + aos_init_inputs( &cp, linear ); + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2018,13 +2020,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* Incr index */ - if (linear) { - x86_inc(cp.func, cp.idx_EBX); - } - else { - x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); - } - + aos_incr_inputs( &cp, linear ); } /* decr count, loop if not zero */ @@ -2065,14 +2061,10 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned stride ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - unsigned i; - for (i = 0; i < vaos->base.key.nr_inputs; i++) { - if (vaos->base.key.element[i].in.buffer == buf) { - vaos->attrib[i].input_ptr = ((char *)ptr + - vaos->base.key.element[i].in.offset); - vaos->attrib[i].input_stride = stride; - } + if (buf < vaos->nr_vb) { + vaos->buffer[buf].base_ptr = (char *)ptr; + vaos->buffer[buf].stride = stride; } } @@ -2089,7 +2081,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_elts( machine, elts, @@ -2108,7 +2100,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_linear( machine, start, @@ -2127,7 +2119,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - FREE( vaos->attrib ); + FREE( vaos->buffer ); x86_release_func( &vaos->func[0] ); x86_release_func( &vaos->func[1] ); @@ -2140,6 +2132,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ) { + unsigned i; struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); if (!vaos) @@ -2154,10 +2147,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->draw = vs->draw; - vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); - if (!vaos->attrib) + for (i = 0; i < key->nr_inputs; i++) + vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); + + vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); + if (!vaos->buffer) goto fail; + debug_printf("nr_vb: %d\n", vaos->nr_vb); + #if 0 tgsi_dump(vs->state.tokens, 0); #endif @@ -2179,8 +2177,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, return &vaos->base; fail: - if (vaos && vaos->attrib) - FREE(vaos->attrib); + if (vaos && vaos->buffer) + FREE(vaos->buffer); if (vaos) x86_release_func( &vaos->func[0] ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 7fe6f79db0..306392e5d6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -87,9 +87,10 @@ struct lit_info { #define MAX_SHINE_TAB 4 #define MAX_LIT_INFO 16 -struct aos_attrib { - const void *input_ptr; - unsigned input_stride; +struct aos_buffer { + const void *base_ptr; + unsigned stride; + void *ptr; /* updated per vertex */ }; @@ -123,7 +124,7 @@ struct aos_machine { const float (*immediates)[4]; /* points to shader data */ const float (*constants)[4]; /* points to draw data */ - const struct aos_attrib *attrib; /* points to ? */ + const struct aos_buffer *buffer; /* points to ? */ }; @@ -179,8 +180,9 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); -boolean aos_fetch_inputs( struct aos_compilation *cp, - boolean linear ); +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); boolean aos_emit_outputs( struct aos_compilation *cp ); @@ -210,7 +212,7 @@ do { \ #define X86_NULL 0 #define X86_IMMEDIATES 1 #define X86_CONSTANTS 2 -#define X86_ATTRIBS 3 +#define X86_BUFFERS 3 struct x86_reg aos_get_x86( struct aos_compilation *cp, unsigned which_reg, @@ -232,7 +234,8 @@ struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; - struct aos_attrib *attrib; + struct aos_buffer *buffer; + unsigned nr_vb; vaos_run_linear_func gen_run_linear; vaos_run_elts_func gen_run_elts; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 26297c74f8..8e08b9285f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -95,28 +95,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, -static void get_src_ptr( struct aos_compilation *cp, - struct x86_reg src, - struct x86_reg elt, - unsigned a ) -{ - struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), - a * sizeof(struct aos_attrib)); - - struct x86_reg input_ptr = x86_make_disp(attrib, - Offset(struct aos_attrib, input_ptr)); - - struct x86_reg input_stride = x86_make_disp(attrib, - Offset(struct aos_attrib, input_stride)); - - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, src, input_stride); - x86_imul(cp->func, src, elt); - x86_add(cp->func, src, input_ptr); -} - - /* Extended swizzles? Maybe later. */ static void emit_swizzle( struct aos_compilation *cp, @@ -128,22 +106,44 @@ static void emit_swizzle( struct aos_compilation *cp, } + +static boolean get_buffer_ptr( struct aos_compilation *cp, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) +{ + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + buf_idx * sizeof(struct aos_buffer)); + + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + return TRUE; +} + + + + static boolean load_input( struct aos_compilation *cp, unsigned idx, - boolean linear ) + struct x86_reg bufptr ) { unsigned format = cp->vaos->base.key.element[idx].in.format; - struct x86_reg src = cp->tmp_EAX; + unsigned offset = cp->vaos->base.key.element[idx].in.offset; struct x86_reg dataXMM = aos_get_xmm_reg(cp); /* Figure out source pointer address: */ - get_src_ptr(cp, - src, - linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), - idx); - - src = x86_deref(src); + struct x86_reg src = x86_make_disp(bufptr, offset); aos_adopt_xmm_reg( cp, dataXMM, @@ -179,20 +179,87 @@ static boolean load_input( struct aos_compilation *cp, return TRUE; } - -boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +static boolean load_inputs( struct aos_compilation *cp, + unsigned buffer, + struct x86_reg ptr ) { unsigned i; - + for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { - if (!load_input( cp, i, linear )) + if (cp->vaos->base.key.element[i].in.buffer == buffer) { + + if (!load_input( cp, i, ptr )) + return FALSE; + + cp->insn_counter++; + } + } + + return TRUE; +} + +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, 0, elt, ptr )) return FALSE; - cp->insn_counter++; + + /* In the linear, single buffer case, keep the buffer pointer + * instead of the index number. + */ + x86_mov( cp->func, elt, ptr ); + } + + return TRUE; +} + +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + load_inputs( cp, 0, cp->idx_EBX ); + + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); + unsigned j; + + for (j = 0; j < cp->vaos->nr_vb; j++) { + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, j, elt, ptr )) + return FALSE; + + cp->insn_counter++; + + if (!load_inputs( cp, j, ptr )) + return FALSE; + } } return TRUE; } +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + (0 * sizeof(struct aos_buffer) + + Offset(struct aos_buffer, stride))); + + x86_add(cp->func, cp->idx_EBX, stride); + } + else if (linear) { + x86_inc(cp->func, cp->idx_EBX); + } + else { + x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); + } +} -- cgit v1.2.3 From 21f98ad30aaeab5085d12278830f485e61b47cc1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 18:40:01 +0100 Subject: draw: don't keep refetching constant inputs --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 37 +++--- src/gallium/auxiliary/draw/draw_vs.h | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++- src/gallium/auxiliary/draw/draw_vs_aos.h | 2 + src/gallium/auxiliary/draw/draw_vs_aos_io.c | 127 +++++++++++++++------ src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +- 6 files changed, 144 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary/draw/draw_vs_aos.h') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 73fc70c1bc..a0e08dd10a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; const struct vertex_info *vinfo; unsigned i; + unsigned nr_vbs = 0; if (!draw->render->set_primitive( draw->render, @@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.viewport = !draw->identity_viewport; fse->key.clip = !draw->bypass_clipping; - fse->key.pad = 0; + fse->key.const_vbuffers = 0; memset(fse->key.element, 0, fse->key.nr_elements * sizeof(fse->key.element[0])); @@ -116,9 +117,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle, */ fse->key.element[i].in.buffer = src->vertex_buffer_index; fse->key.element[i].in.offset = src->src_offset; + nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1); } + for (i = 0; i < 5 && i < nr_vbs; i++) { + if (draw->pt.vertex_buffer[i].pitch == 0) + fse->key.const_vbuffers |= (1<key.const_vbuffers); + { unsigned dst_offset = 0; @@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } } - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, &fse->key ); @@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle, return ; } + if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__, + fse->active->key.const_vbuffers); + /* Now set buffer pointers: */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->active->set_input( fse->active, - i, - - ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset), - - draw->pt.vertex_buffer[buf].pitch ); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + fse->active->set_buffer( fse->active, + i, + ((const ubyte *) draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 45992d1986..68c24abad3 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -64,7 +64,7 @@ struct draw_vs_varient_key { unsigned nr_outputs:8; unsigned viewport:1; unsigned clip:1; - unsigned pad:5; + unsigned const_vbuffers:5; struct draw_varient_element element[PIPE_MAX_ATTRIBS]; }; @@ -76,7 +76,7 @@ struct draw_vs_varient { struct draw_vertex_shader *vs; - void (*set_input)( struct draw_vs_varient *, + void (*set_buffer)( struct draw_vs_varient *, unsigned i, const void *ptr, unsigned stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 4c794e0e23..87232865e2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx ) } +void aos_spill_all( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +} + + static struct x86_reg get_xmm_writable( struct aos_compilation *cp, struct x86_reg reg ) { @@ -1941,6 +1953,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, aos_init_inputs( &cp, linear ); + cp.x86_reg[0] = 0; + cp.x86_reg[1] = 0; + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2066,6 +2081,8 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, vaos->buffer[buf].base_ptr = (char *)ptr; vaos->buffer[buf].stride = stride; } + + if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride); } @@ -2078,6 +2095,8 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2097,6 +2116,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, + vaos->base.key.const_vbuffers); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2140,7 +2162,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.key = *key; vaos->base.vs = vs; - vaos->base.set_input = vaos_set_buffer; + vaos->base.set_buffer = vaos_set_buffer; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; @@ -2154,7 +2176,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d\n", vaos->nr_vb); + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 306392e5d6..264387517b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -176,6 +176,8 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, unsigned idx, unsigned dirty ); +void aos_spill_all( struct aos_compilation *cp ); + struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8e08b9285f..b0c51d7fa1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -108,29 +108,45 @@ static void emit_swizzle( struct aos_compilation *cp, static boolean get_buffer_ptr( struct aos_compilation *cp, - unsigned buf_idx, - struct x86_reg elt, - struct x86_reg ptr) + boolean linear, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) { struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), buf_idx * sizeof(struct aos_buffer)); - struct x86_reg buf_base_ptr = x86_make_disp(buf, - Offset(struct aos_buffer, base_ptr)); - struct x86_reg buf_stride = x86_make_disp(buf, Offset(struct aos_buffer, stride)); + if (linear) { + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, ptr, buf_stride); - x86_imul(cp->func, ptr, elt); - x86_add(cp->func, ptr, buf_base_ptr); - return TRUE; -} + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_ptr); + x86_mov(cp->func, elt, buf_stride); + x86_add(cp->func, elt, ptr); + sse_prefetchnta(cp->func, x86_deref(elt)); + x86_mov(cp->func, buf_ptr, elt); + } + else { + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + } + cp->insn_counter++; + return TRUE; +} static boolean load_input( struct aos_compilation *cp, @@ -200,18 +216,57 @@ static boolean load_inputs( struct aos_compilation *cp, boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { + unsigned i; + for (i = 0; i < cp->vaos->nr_vb; i++) { + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + i * sizeof(struct aos_buffer)); - struct x86_reg elt = cp->idx_EBX; - struct x86_reg ptr = cp->tmp_EAX; + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); - if (!get_buffer_ptr( cp, 0, elt, ptr )) - return FALSE; + if (cp->vaos->base.key.const_vbuffers & (1<tmp_EAX; - /* In the linear, single buffer case, keep the buffer pointer - * instead of the index number. - */ - x86_mov( cp->func, elt, ptr ); + x86_mov(cp->func, ptr, buf_base_ptr); + + /* Load all inputs for this constant vertex buffer + */ + load_inputs( cp, i, x86_deref(ptr) ); + + /* Then just force them out to aos_machine.input[] + */ + aos_spill_all( cp ); + + } + else if (linear) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (cp->vaos->nr_vb == 1) + x86_mov( cp->func, elt, ptr ); + else + x86_mov( cp->func, buf_ptr, ptr ); + + cp->insn_counter++; + } } return TRUE; @@ -219,23 +274,22 @@ boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { - - load_inputs( cp, 0, cp->idx_EBX ); + unsigned j; - } - else { - struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); - unsigned j; - - for (j = 0; j < cp->vaos->nr_vb; j++) { + for (j = 0; j < cp->vaos->nr_vb; j++) { + if (cp->vaos->base.key.const_vbuffers & (1<vaos->nr_vb == 1) { + load_inputs( cp, 0, cp->idx_EBX ); + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); struct x86_reg ptr = cp->tmp_EAX; - if (!get_buffer_ptr( cp, j, elt, ptr )) + if (!get_buffer_ptr( cp, linear, j, elt, ptr )) return FALSE; - cp->insn_counter++; - if (!load_inputs( cp, j, ptr )) return FALSE; } @@ -252,13 +306,16 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); + sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX)); } else if (linear) { - x86_inc(cp->func, cp->idx_EBX); + /* Nothing to do */ } else { x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); } + + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 4daf05dae7..7ee567d478 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -64,10 +64,10 @@ struct draw_vs_varient_generic { -static void vsvg_set_input( struct draw_vs_varient *varient, - unsigned buffer, - const void *ptr, - unsigned stride ) +static void vsvg_set_buffer( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.key = *key; vsvg->base.vs = vs; - vsvg->base.set_input = vsvg_set_input; + vsvg->base.set_buffer = vsvg_set_buffer; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3