From 665605234d2aed2baa22fa621fa02478b2c08a4d Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 6 Jun 2008 21:13:12 +0200 Subject: r300: Remove unused variable r300_tex_obj::dirty_state and related defines This variable was only ever written to, but nobody used its value. --- src/mesa/drivers/dri/r300/r300_context.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a5ec5ee46e..b33dfeafdd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -179,13 +179,6 @@ struct r300_tex_obj { GLuint bufAddr; /* Offset to start of locally shared texture block */ - GLuint dirty_state; /* Flags (1 per texunit) for - whether or not this texobj - has dirty hardware state - (pp_*) that needs to be - brought into the - texunit. */ - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; /* Six, for the cube faces */ -- cgit v1.2.3 From cb1687660844d42f929e11a2261c0eeb3fe859be Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 21:07:28 +0200 Subject: r300: Some cleanups in depth and stencil state handling This also fixes a bug with Zfunc set to GL_NEVER in glean/paths. --- src/mesa/drivers/dri/r300/r300_context.h | 2 -- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_state.c | 37 ++++++-------------------------- 3 files changed, 8 insertions(+), 33 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index b33dfeafdd..3ac87d173b 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -574,9 +574,7 @@ struct r300_depthbuffer_state { }; struct r300_stencilbuffer_state { - GLuint clear; GLboolean hw_stencil; - }; /* Vertex shader state */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index fbe6920745..6af23300f2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -148,7 +148,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) e32(t1); e32(t2); e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | - (r300->state.stencil.clear & R300_STENCILREF_MASK)); + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); } cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 7602f12d81..b36ca7aef8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -525,24 +525,15 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE; // XXX - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); - if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { + if (ctx->Depth.Test) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; if (ctx->Depth.Mask) - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_Z_ENABLE | R300_Z_WRITE_ENABLE | R300_STENCIL_FRONT_BACK; // XXX - else - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE | R300_STENCIL_FRONT_BACK; // XXX - + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(ctx->Depth. - Func) << R300_Z_FUNC_SHIFT; - } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; // XXX - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(GL_NEVER) << R300_Z_FUNC_SHIFT; + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; } r300SetEarlyZState(ctx); @@ -925,7 +916,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, GLuint flag; R300_STATECHANGE(rmesa, zs); - + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << @@ -1000,17 +991,6 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, } } -static void r300ClearStencil(GLcontext * ctx, GLint s) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - rmesa->state.stencil.clear = - ((GLuint) (ctx->Stencil.Clear & R300_STENCILREF_MASK) | - (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT) | - ((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << - R300_STENCILMASK_SHIFT)); -} - /* ============================================================= * Window position and viewport transformation */ @@ -2637,12 +2617,10 @@ void r300InitState(r300ContextPtr r300) case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; - r300->state.stencil.clear = 0x00000000; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; - r300->state.stencil.clear = 0x00ff0000; break; default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", @@ -2706,7 +2684,6 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->ShadeModel = r300ShadeModel; /* Stencil related */ - functions->ClearStencil = r300ClearStencil; functions->StencilFuncSeparate = r300StencilFuncSeparate; functions->StencilMaskSeparate = r300StencilMaskSeparate; functions->StencilOpSeparate = r300StencilOpSeparate; -- cgit v1.2.3 From b7669e4a8637a9680bcef0d0db82ae5e1984741c Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 20:49:52 +0200 Subject: r300/r500: Separate fragprog compiler structures r500 code still used r300_pfs_compile_state, which contains some fields that really only make sense on r3xx type hardware. In order to allow both fragprog implementations to go forward without disturbing each other, I've pushed this structure down into the respective r[3|5]00_fragprog.c --- src/mesa/drivers/dri/r300/r300_context.h | 83 ++------------------------ src/mesa/drivers/dri/r300/r300_fragprog.c | 92 ++++++++++++++++++++++++++--- src/mesa/drivers/dri/r300/r500_fragprog.c | 98 +++++++++++++++++++++++++++---- 3 files changed, 176 insertions(+), 97 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 3ac87d173b..2d51bad52e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -654,83 +654,7 @@ struct r300_vertex_program_cont { #define PFS_NUM_TEMP_REGS 32 #define PFS_NUM_CONST_REGS 16 -/* Mapping Mesa registers to R300 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; - -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; - -/** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. - */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r300_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; -}; - -/** - * Store information during compilation of fragment programs. - */ -struct r300_pfs_compile_state { - int nrslots; /* number of ALU slots used so far */ - - /* Track which (parts of) slots are already filled with instructions */ - struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; -}; +struct r300_pfs_compile_state; /** * Store everything about a fragment program that is needed @@ -789,13 +713,15 @@ struct r300_fragment_program { GLuint optimization; }; +struct r500_pfs_compile_state; + struct r500_fragment_program { struct gl_fragment_program mesa_program; GLcontext *ctx; GLboolean translated; GLboolean error; - struct r300_pfs_compile_state *cs; + struct r500_pfs_compile_state *cs; struct { GLuint inst0; @@ -840,7 +766,6 @@ struct r300_state { struct r300_texture_state texture; int sw_tcl_inputs[VERT_ATTRIB_MAX]; struct r300_vertex_shader_state vertex_shader; - struct r300_pfs_compile_state pfs_compile; struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 54b80d20a1..df8ab47b0f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -52,6 +52,85 @@ #include "r300_reg.h" #include "r300_state.h" +/* Mapping Mesa registers to R300 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r300_pfs_compile_state { + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + + /* * Usefull macros and values */ @@ -2093,7 +2172,7 @@ static void insert_wpos(struct gl_program *prog) */ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) { - struct r300_pfs_compile_state *cs = NULL; + COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; @@ -2105,7 +2184,6 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; - fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); fp->WritesDepth = GL_FALSE; fp->tex.length = 0; fp->cur_node = 0; @@ -2227,13 +2305,11 @@ static void update_params(struct r300_fragment_program *fp) void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { - - struct r300_pfs_compile_state *cs = NULL; - if (!fp->translated) { + struct r300_pfs_compile_state cs; + fp->cs = &cs; init_program(r300, fp); - cs = fp->cs; if (parse_program(fp) == GL_FALSE) { dump_program(fp); @@ -2242,11 +2318,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300, /* Finish off */ fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + cs.nrslots - fp->node[fp->cur_node].alu_offset - 1; if (fp->node[fp->cur_node].tex_end < 0) fp->node[fp->cur_node].tex_end = 0; fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; + fp->alu_end = cs.nrslots - 1; fp->tex_offset = 0; fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; assert(fp->node[fp->cur_node].alu_end >= 0); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 26513e74ae..70e45f3ea8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -57,6 +57,85 @@ #include "r300_reg.h" #include "r300_state.h" +/* Mapping Mesa registers to R500 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r500_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r500_pfs_compile_state { + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + + /* * Useful macros and values */ @@ -66,7 +145,7 @@ fp->error = GL_TRUE; \ } while(0) -#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define COMPILE_STATE struct r500_pfs_compile_state *cs = fp->cs #define R500_US_NUM_TEMP_REGS 128 #define R500_US_NUM_CONST_REGS 256 @@ -293,7 +372,7 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) fp->inst[counter].inst1 |= R500_TEX_UNSCALED; @@ -1227,7 +1306,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) { - struct r300_pfs_compile_state *cs = NULL; + struct r500_pfs_compile_state *cs = fp->cs; struct gl_fragment_program *mp = &fp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; @@ -1239,7 +1318,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; - fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); fp->const_nr = 0; /* Size of pixel stack, plus 1. */ fp->max_temp_idx = 1; @@ -1420,23 +1498,21 @@ static void dumb_shader(struct r500_fragment_program *fp) void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp) { - - struct r300_pfs_compile_state *cs = NULL; - if (!fp->translated) { + struct r500_pfs_compile_state cs; + fp->cs = &cs; init_program(r300, fp); - cs = fp->cs; if (parse_program(fp) == GL_FALSE) { ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); dumb_shader(fp); fp->inst_offset = 0; - fp->inst_end = cs->nrslots - 1; + fp->inst_end = cs.nrslots - 1; return; } fp->inst_offset = 0; - fp->inst_end = cs->nrslots - 1; + fp->inst_end = cs.nrslots - 1; fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -1449,6 +1525,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300, r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + + fp->cs = 0; } update_params(fp); -- cgit v1.2.3 From 4ba1c7d84826aaa07114872560cab3a428949499 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 21:43:09 +0200 Subject: r300_fragprog: Refactoring and cleanup Refactor so that r300_pfs_compile_state "owns"/holds a pointer to r300_fragment_program instead of the other way round. This is more natural from an object orientation point of view. Move the compiled hardware state into r300_fragment_program_code, in anticipation of on-the-fly program recompilation based on external OpenGL state. --- src/mesa/drivers/dri/r300/r300_context.h | 25 +- src/mesa/drivers/dri/r300/r300_fragprog.c | 717 +++++++++++++++--------------- src/mesa/drivers/dri/r300/r300_shader.c | 1 - src/mesa/drivers/dri/r300/r300_state.c | 73 +-- 4 files changed, 417 insertions(+), 399 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 2d51bad52e..05efb813e5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -657,17 +657,9 @@ struct r300_vertex_program_cont { struct r300_pfs_compile_state; /** - * Store everything about a fragment program that is needed - * to render with that program. + * Stores an R300 fragment program in its compiled-to-hardware form. */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLcontext *ctx; - GLboolean translated; - GLboolean error; - struct r300_pfs_compile_state *cs; - +struct r300_fragment_program_code { struct { int length; GLuint inst[PFS_MAX_TEX_INST]; @@ -708,6 +700,19 @@ struct r300_fragment_program { int const_nr; int max_temp_idx; +}; + +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ +struct r300_fragment_program { + struct gl_fragment_program mesa_program; + + GLboolean translated; + GLboolean error; + + struct r300_fragment_program_code code; GLboolean WritesDepth; GLuint optimization; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 90fd9a7cfe..ba88b88f45 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -111,6 +111,9 @@ struct r300_pfs_compile_slot { * Store information during compilation of fragment programs. */ struct r300_pfs_compile_state { + r300ContextPtr r300; + struct r300_fragment_program *fp; + int nrslots; /* number of ALU slots used so far */ /* Track which (parts of) slots are already filled with instructions */ @@ -141,7 +144,10 @@ struct r300_pfs_compile_state { } while(0) #define PFS_INVAL 0xFFFFFFFF -#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define COMPILE_STATE \ + struct r300_fragment_program *fp = cs->fp; \ + struct r300_fragment_program_code *code = &fp->code; \ + (void)code #define SWIZZLE_XYZ 0 #define SWIZZLE_XXX 1 @@ -155,7 +161,7 @@ struct r300_pfs_compile_state { #define SWIZZLE_000 9 #define SWIZZLE_HHH 10 -#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ +#define swizzle(r, x, y, z, w) do_swizzle(cs, r, \ ((SWIZZLE_##x<<0)| \ (SWIZZLE_##y<<3)| \ (SWIZZLE_##z<<6)| \ @@ -380,15 +386,16 @@ static const GLuint pfs_zero = REG(REG_TYPE_CONST, /* * Common functions prototypes */ -static void dump_program(struct r300_fragment_program *fp); -static void emit_arith(struct r300_fragment_program *fp, int op, +static void dump_program(struct r300_fragment_program *fp, + struct r300_fragment_program_code *code); +static void emit_arith(struct r300_pfs_compile_state *cs, int op, GLuint dest, int mask, GLuint src0, GLuint src1, GLuint src2, int flags); /** * Get an R300 temporary that can be written to in the given slot. */ -static int get_hw_temp(struct r300_fragment_program *fp, int slot) +static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) { COMPILE_STATE; int r; @@ -418,8 +425,8 @@ static int get_hw_temp(struct r300_fragment_program *fp, int slot) cs->hwtemps[r].vector_valid = 0; cs->hwtemps[r].scalar_valid = 0; - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; + if (r > fp->code.max_temp_idx) + fp->code.max_temp_idx = r; return r; } @@ -427,7 +434,7 @@ static int get_hw_temp(struct r300_fragment_program *fp, int slot) /** * Get an R300 temporary that will act as a TEX destination register. */ -static int get_hw_temp_tex(struct r300_fragment_program *fp) +static int get_hw_temp_tex(struct r300_pfs_compile_state *cs) { COMPILE_STATE; int r; @@ -442,7 +449,7 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) } if (r >= PFS_NUM_TEMP_REGS) - return get_hw_temp(fp, 0); /* Will cause an indirection */ + return get_hw_temp(cs, 0); /* Will cause an indirection */ cs->hwtemps[r].reserved = cs->hwtemps[r].free; cs->hwtemps[r].free = -1; @@ -454,8 +461,8 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) cs->hwtemps[r].vector_valid = cs->nrslots; cs->hwtemps[r].scalar_valid = cs->nrslots; - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; + if (r > code->max_temp_idx) + code->max_temp_idx = r; return r; } @@ -463,10 +470,8 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) /** * Mark the given hardware register as free. */ -static void free_hw_temp(struct r300_fragment_program *fp, int idx) +static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx) { - COMPILE_STATE; - // Be very careful here. Consider sequences like // MAD r0, r1,r2,r3 // TEX r4, ... @@ -482,7 +487,7 @@ static void free_hw_temp(struct r300_fragment_program *fp, int idx) /** * Create a new Mesa temporary register. */ -static GLuint get_temp_reg(struct r300_fragment_program *fp) +static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) { COMPILE_STATE; GLuint r = undef; @@ -508,7 +513,7 @@ static GLuint get_temp_reg(struct r300_fragment_program *fp) * Create a new Mesa temporary register that will act as the destination * register for a texture read. */ -static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) { COMPILE_STATE; GLuint r = undef; @@ -522,7 +527,7 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) cs->temp_in_use |= (1 << --index); cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(fp); + cs->temps[index].reg = get_hw_temp_tex(cs); REG_SET_TYPE(r, REG_TYPE_TEMP); REG_SET_INDEX(r, index); @@ -533,20 +538,19 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) /** * Free a Mesa temporary and the associated R300 temporary. */ -static void free_temp(struct r300_fragment_program *fp, GLuint r) +static void free_temp(struct r300_pfs_compile_state *cs, GLuint r) { - COMPILE_STATE; GLuint index = REG_GET_INDEX(r); if (!(cs->temp_in_use & (1 << index))) return; if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { - free_hw_temp(fp, cs->temps[index].reg); + free_hw_temp(cs, cs->temps[index].reg); cs->temps[index].reg = -1; cs->temp_in_use &= ~(1 << index); } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { - free_hw_temp(fp, cs->inputs[index].reg); + free_hw_temp(cs, cs->inputs[index].reg); cs->inputs[index].reg = -1; } } @@ -560,25 +564,26 @@ static void free_temp(struct r300_fragment_program *fp, GLuint r) * of the fragment program (actually, up until the next time the fragment * program is translated). */ -static GLuint emit_const4fv(struct r300_fragment_program *fp, +static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, const GLfloat * cp) { + COMPILE_STATE; GLuint reg = undef; int index; - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) + for (index = 0; index < code->const_nr; ++index) { + if (code->constant[index] == cp) break; } - if (index >= fp->const_nr) { + if (index >= code->const_nr) { if (index >= PFS_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } - fp->const_nr++; - fp->constant[index] = cp; + code->const_nr++; + code->constant[index] = cp; } REG_SET_TYPE(reg, REG_TYPE_CONST); @@ -609,9 +614,11 @@ static inline GLuint absolute(GLuint r) return r; } -static int swz_native(struct r300_fragment_program *fp, +static int swz_native(struct r300_pfs_compile_state *cs, GLuint src, GLuint * r, GLuint arbneg) { + COMPILE_STATE; + /* Native swizzle, handle negation */ src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); @@ -623,13 +630,13 @@ static int swz_native(struct r300_fragment_program *fp, *r = src; } else { if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); + *r = get_temp_reg(cs); src |= REG_NEGV_MASK; - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); src = src & ~REG_NEGV_MASK; - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, (arbneg ^ 0x7) | WRITEMASK_W, @@ -639,15 +646,16 @@ static int swz_native(struct r300_fragment_program *fp, return 3; } -static int swz_emit_partial(struct r300_fragment_program *fp, +static int swz_emit_partial(struct r300_pfs_compile_state *cs, GLuint src, GLuint * r, int mask, int mc, GLuint arbneg) { + COMPILE_STATE; GLuint tmp; GLuint wmask = 0; if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); + *r = get_temp_reg(cs); /* A partial match, VSWZ/mask define what parts of the * desired swizzle we match @@ -661,7 +669,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, if (tmp) { tmp = tmp ^ s_mask[mask].mask; if (tmp) { - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, arbneg & s_mask[mask].mask, @@ -672,7 +680,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, tmp | wmask, src, pfs_one, pfs_zero, 0); } else { @@ -681,7 +689,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, (arbneg & s_mask[mask].mask) | wmask, @@ -693,7 +701,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, PFS_OP_MAD, + emit_arith(cs, PFS_OP_MAD, *r, s_mask[mask].mask | wmask, src, pfs_one, pfs_zero, 0); @@ -702,9 +710,10 @@ static int swz_emit_partial(struct r300_fragment_program *fp, return s_mask[mask].count; } -static GLuint do_swizzle(struct r300_fragment_program *fp, +static GLuint do_swizzle(struct r300_pfs_compile_state *cs, GLuint src, GLuint arbswz, GLuint arbneg) { + COMPILE_STATE; GLuint r = undef; GLuint vswz; int c_mask = 0; @@ -759,10 +768,10 @@ static GLuint do_swizzle(struct r300_fragment_program *fp, if (chash == (arbswz & s_mask[c_mask].hash)) { if (s_mask[c_mask].count == 3) { - v_match += swz_native(fp, + v_match += swz_native(cs, src, &r, arbneg); } else { - v_match += swz_emit_partial(fp, + v_match += swz_emit_partial(cs, src, &r, c_mask, @@ -787,9 +796,10 @@ static GLuint do_swizzle(struct r300_fragment_program *fp, return r; } -static GLuint t_src(struct r300_fragment_program *fp, +static GLuint t_src(struct r300_pfs_compile_state *cs, struct prog_src_register fpsrc) { + COMPILE_STATE; GLuint r = undef; switch (fpsrc.File) { @@ -804,19 +814,18 @@ static GLuint t_src(struct r300_fragment_program *fp, REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(fp, + r = emit_const4fv(cs, fp->mesa_program.Base.LocalParams[fpsrc. Index]); break; case PROGRAM_ENV_PARAM: - r = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[fpsrc. - Index]); + r = emit_const4fv(cs, + cs->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: - r = emit_const4fv(fp, + r = emit_const4fv(cs, fp->mesa_program.Base.Parameters-> ParameterValues[fpsrc.Index]); break; @@ -827,11 +836,11 @@ static GLuint t_src(struct r300_fragment_program *fp, /* no point swizzling ONE/ZERO/HALF constants... */ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) - r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); return r; } -static GLuint t_scalar_src(struct r300_fragment_program *fp, +static GLuint t_scalar_src(struct r300_pfs_compile_state *cs, struct prog_src_register fpsrc) { struct prog_src_register src = fpsrc; @@ -839,12 +848,13 @@ static GLuint t_scalar_src(struct r300_fragment_program *fp, src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); - return t_src(fp, src); + return t_src(cs, src); } -static GLuint t_dst(struct r300_fragment_program *fp, +static GLuint t_dst(struct r300_pfs_compile_state *cs, struct prog_dst_register dest) { + COMPILE_STATE; GLuint r = undef; switch (dest.File) { @@ -871,7 +881,7 @@ static GLuint t_dst(struct r300_fragment_program *fp, } } -static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +static int t_hw_src(struct r300_pfs_compile_state *cs, GLuint src, GLboolean tex) { COMPILE_STATE; int idx; @@ -883,18 +893,18 @@ static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) * hasn't been written to. Undefined results. */ if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + cs->temps[index].reg = get_hw_temp(cs, cs->nrslots); idx = cs->temps[index].reg; if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) - free_temp(fp, src); + free_temp(cs, src); break; case REG_TYPE_INPUT: idx = cs->inputs[index].reg; if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) - free_hw_temp(fp, cs->inputs[index].reg); + free_hw_temp(cs, cs->inputs[index].reg); break; case REG_TYPE_CONST: return (index | SRC_CONST); @@ -909,7 +919,7 @@ static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) return idx; } -static int t_hw_dst(struct r300_fragment_program *fp, +static int t_hw_dst(struct r300_pfs_compile_state *cs, GLuint dest, GLboolean tex, int slot) { COMPILE_STATE; @@ -921,15 +931,15 @@ static int t_hw_dst(struct r300_fragment_program *fp, case REG_TYPE_TEMP: if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { if (!tex) { - cs->temps[index].reg = get_hw_temp(fp, slot); + cs->temps[index].reg = get_hw_temp(cs, slot); } else { - cs->temps[index].reg = get_hw_temp_tex(fp); + cs->temps[index].reg = get_hw_temp_tex(cs); } } idx = cs->temps[index].reg; if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) - free_temp(fp, dest); + free_temp(cs, dest); cs->dest_in_node |= (1 << idx); cs->used_in_node |= (1 << idx); @@ -937,13 +947,11 @@ static int t_hw_dst(struct r300_fragment_program *fp, case REG_TYPE_OUTPUT: switch (index) { case FRAG_RESULT_COLR: - fp->node[fp->cur_node].flags |= - R300_RGBA_OUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; break; case FRAG_RESULT_DEPR: fp->WritesDepth = GL_TRUE; - fp->node[fp->cur_node].flags |= - R300_W_OUT; + code->node[code->cur_node].flags |= R300_W_OUT; break; } return index; @@ -956,7 +964,7 @@ static int t_hw_dst(struct r300_fragment_program *fp, return idx; } -static void emit_nop(struct r300_fragment_program *fp) +static void emit_nop(struct r300_pfs_compile_state *cs) { COMPILE_STATE; @@ -965,18 +973,18 @@ static void emit_nop(struct r300_fragment_program *fp) return; } - fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; - fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; - fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; - fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + code->alu.inst[cs->nrslots].inst0 = NOP_INST0; + code->alu.inst[cs->nrslots].inst1 = NOP_INST1; + code->alu.inst[cs->nrslots].inst2 = NOP_INST2; + code->alu.inst[cs->nrslots].inst3 = NOP_INST3; cs->nrslots++; } -static void emit_tex(struct r300_fragment_program *fp, +static void emit_tex(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi, int opcode) { COMPILE_STATE; - GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint coord = t_src(cs, fpi->SrcReg[0]); GLuint dest = undef, rdest = undef; GLuint din, uin; int unit = fpi->TexSrcUnit; @@ -1001,15 +1009,15 @@ static void emit_tex(struct r300_fragment_program *fp, tokens[2] = unit; factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. + _mesa_add_state_reference(cs->fp->mesa_program.Base. Parameters, tokens); factorreg = - emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> + emit_const4fv(cs, + cs->fp->mesa_program.Base.Parameters-> ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(fp)); + tempreg = keep(get_temp_reg(cs)); - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, coord, factorreg, pfs_zero, 0); coord = tempreg; @@ -1022,8 +1030,8 @@ static void emit_tex(struct r300_fragment_program *fp, REG_GET_SSWZ(coord) != SWIZZLE_W || coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { assert(tempreg == 0); - tempreg = keep(get_temp_reg(fp)); - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + tempreg = keep(get_temp_reg(cs)); + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, coord, pfs_one, pfs_zero, 0); coord = tempreg; } @@ -1033,30 +1041,30 @@ static void emit_tex(struct r300_fragment_program *fp, din = cs->dest_in_node; /* Resolve source/dest to hardware registers */ - hwsrc = t_hw_src(fp, coord, GL_TRUE); + hwsrc = t_hw_src(cs, coord, GL_TRUE); if (opcode != R300_TEX_OP_KIL) { - dest = t_dst(fp, fpi->DstReg); + dest = t_dst(cs, fpi->DstReg); /* r300 doesn't seem to be able to do TEX->output reg */ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { rdest = dest; - dest = get_temp_reg_tex(fp); + dest = get_temp_reg_tex(cs); } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { /* in case write mask isn't XYZW */ rdest = dest; - dest = get_temp_reg_tex(fp); + dest = get_temp_reg_tex(cs); } hwdest = - t_hw_dst(fp, dest, GL_TRUE, - fp->node[fp->cur_node].alu_offset); + t_hw_dst(cs, dest, GL_TRUE, + code->node[code->cur_node].alu_offset); /* Use a temp that hasn't been used in this node, rather * than causing an indirection */ if (uin & (1 << hwdest)) { - free_hw_temp(fp, hwdest); - hwdest = get_hw_temp_tex(fp); + free_hw_temp(cs, hwdest); + hwdest = get_hw_temp_tex(cs); cs->temps[REG_GET_INDEX(dest)].reg = hwdest; } } else { @@ -1071,32 +1079,32 @@ static void emit_tex(struct r300_fragment_program *fp, (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { /* Finish off current node */ - if (fp->node[fp->cur_node].alu_offset == cs->nrslots) - emit_nop(fp); + if (code->node[code->cur_node].alu_offset == cs->nrslots) + emit_nop(cs); - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - assert(fp->node[fp->cur_node].alu_end >= 0); + code->node[code->cur_node].alu_end = + cs->nrslots - code->node[code->cur_node].alu_offset - 1; + assert(code->node[code->cur_node].alu_end >= 0); - if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + if (++code->cur_node >= PFS_MAX_TEX_INDIRECT) { ERROR("too many levels of texture indirection\n"); return; } /* Start new node */ - fp->node[fp->cur_node].tex_offset = fp->tex.length; - fp->node[fp->cur_node].alu_offset = cs->nrslots; - fp->node[fp->cur_node].tex_end = -1; - fp->node[fp->cur_node].alu_end = -1; - fp->node[fp->cur_node].flags = 0; + code->node[code->cur_node].tex_offset = code->tex.length; + code->node[code->cur_node].alu_offset = cs->nrslots; + code->node[code->cur_node].tex_end = -1; + code->node[code->cur_node].alu_end = -1; + code->node[code->cur_node].flags = 0; cs->used_in_node = 0; cs->dest_in_node = 0; } - if (fp->cur_node == 0) - fp->first_node_has_tex = 1; + if (code->cur_node == 0) + code->first_node_has_tex = 1; - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) + code->tex.inst[code->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) | (hwdest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); @@ -1105,25 +1113,25 @@ static void emit_tex(struct r300_fragment_program *fp, if (REG_GET_TYPE(coord) != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); - fp->node[fp->cur_node].tex_end++; + code->node[code->cur_node].tex_end++; /* Copy from temp to output if needed */ if (REG_GET_VALID(rdest)) { - emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, pfs_one, pfs_zero, 0); - free_temp(fp, dest); + free_temp(cs, dest); } /* Free temp register */ if (tempreg != 0) - free_temp(fp, tempreg); + free_temp(cs, tempreg); } /** * Returns the first slot where we could possibly allow writing to dest, * according to register allocation. */ -static int get_earliest_allowed_write(struct r300_fragment_program *fp, +static int get_earliest_allowed_write(struct r300_pfs_compile_state *cs, GLuint dest, int mask) { COMPILE_STATE; @@ -1173,7 +1181,7 @@ static int get_earliest_allowed_write(struct r300_fragment_program *fp, * * @return the index of the slot */ -static int find_and_prepare_slot(struct r300_fragment_program *fp, +static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, GLboolean emit_vop, GLboolean emit_sop, int argc, GLuint * src, GLuint dest, int mask) @@ -1198,10 +1206,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, if (emit_sop) used |= SLOT_OP_SCALAR; - pos = get_earliest_allowed_write(fp, dest, mask); + pos = get_earliest_allowed_write(cs, dest, mask); - if (fp->node[fp->cur_node].alu_offset > pos) - pos = fp->node[fp->cur_node].alu_offset; + if (code->node[code->cur_node].alu_offset > pos) + pos = code->node[code->cur_node].alu_offset; for (i = 0; i < argc; ++i) { if (!REG_GET_BUILTIN(src[i])) { if (emit_vop) @@ -1210,7 +1218,7 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; } - hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + hwsrc[i] = t_hw_src(cs, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ regnr = hwsrc[i] & 31; if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { @@ -1236,10 +1244,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, return -1; } - fp->alu.inst[pos].inst0 = NOP_INST0; - fp->alu.inst[pos].inst1 = NOP_INST1; - fp->alu.inst[pos].inst2 = NOP_INST2; - fp->alu.inst[pos].inst3 = NOP_INST3; + fp->code.alu.inst[pos].inst0 = NOP_INST0; + fp->code.alu.inst[pos].inst1 = NOP_INST1; + fp->code.alu.inst[pos].inst2 = NOP_INST2; + fp->code.alu.inst[pos].inst3 = NOP_INST3; cs->nrslots++; } @@ -1316,14 +1324,14 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst1 |= ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); - fp->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst3 |= ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); @@ -1349,10 +1357,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } } - fp->alu.inst[pos].inst0 &= + code->alu.inst[pos].inst0 &= ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | R300_ALU_ARG2C_MASK); - fp->alu.inst[pos].inst0 |= + code->alu.inst[pos].inst0 |= (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << R300_ALU_ARG1C_SHIFT) | (swz[2] << R300_ALU_ARG2C_SHIFT); @@ -1378,10 +1386,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } } - fp->alu.inst[pos].inst2 &= + code->alu.inst[pos].inst2 &= ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | R300_ALU_ARG2A_MASK); - fp->alu.inst[pos].inst2 |= + code->alu.inst[pos].inst2 |= (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << R300_ALU_ARG1A_SHIFT) | (swz[2] << R300_ALU_ARG2A_SHIFT); @@ -1393,7 +1401,7 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, /** * Append an ALU instruction to the instruction list. */ -static void emit_arith(struct r300_fragment_program *fp, +static void emit_arith(struct r300_pfs_compile_state *cs, int op, GLuint dest, int mask, @@ -1427,12 +1435,12 @@ static void emit_arith(struct r300_fragment_program *fp, emit_sop = GL_TRUE; pos = - find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + find_and_prepare_slot(cs, emit_vop, emit_sop, argc, src, dest, mask); if (pos < 0) return; - hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + hwdest = t_hw_dst(cs, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ if (flags & PFS_FLAG_SAT) { vop |= R300_ALU_OUTC_CLAMP; @@ -1441,19 +1449,19 @@ static void emit_arith(struct r300_fragment_program *fp, /* Throw the pieces together and get ALU/1 */ if (emit_vop) { - fp->alu.inst[pos].inst0 |= vop; + code->alu.inst[pos].inst0 |= vop; - fp->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; + code->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_ALU_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_ALU_DSTC_REG_MASK_SHIFT; @@ -1463,22 +1471,22 @@ static void emit_arith(struct r300_fragment_program *fp, /* And now ALU/3 */ if (emit_sop) { - fp->alu.inst[pos].inst2 |= sop; + code->alu.inst[pos].inst2 |= sop; if (mask & WRITEMASK_W) { if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= (hwdest << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= R300_ALU_DSTA_DEPTH; } else assert(0); } else { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= (hwdest << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; @@ -1548,7 +1556,7 @@ static GLfloat SinCosConsts[2][4] = { static const GLfloat LitConst[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; -static void emit_lit(struct r300_fragment_program *fp, +static void emit_lit(struct r300_pfs_compile_state *cs, GLuint dest, int mask, GLuint src, int flags) { COMPILE_STATE; @@ -1556,7 +1564,7 @@ static void emit_lit(struct r300_fragment_program *fp, int needTemporary; GLuint temp; - cnst = emit_const4fv(fp, LitConst); + cnst = emit_const4fv(cs, LitConst); needTemporary = 0; if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { @@ -1568,7 +1576,7 @@ static void emit_lit(struct r300_fragment_program *fp, } if (needTemporary) { - temp = keep(get_temp_reg(fp)); + temp = keep(get_temp_reg(cs)); } else { temp = keep(dest); } @@ -1579,48 +1587,49 @@ static void emit_lit(struct r300_fragment_program *fp, // so swizzling between the two parts can create fake dependencies. // First slot - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, keep(src), pfs_zero, undef, 0); - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); // Second slot - emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, swizzle(temp, Y, Y, Y, Y), undef, undef, 0); // Third slot // If desired, we saturate the y result here. // This does not affect the use as a condition variable in the CMP later - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); // Fourth slot - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, pfs_one, pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); // Fifth slot - emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, pfs_zero, swizzle(temp, W, W, W, W), negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, pfs_zero, 0); if (needTemporary) { - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, temp, pfs_one, pfs_zero, flags); - free_temp(fp, temp); + free_temp(cs, temp); } else { // Decrease refcount of the destination - t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); } } -static GLboolean parse_program(struct r300_fragment_program *fp) +static GLboolean parse_program(struct r300_pfs_compile_state *cs) { + COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; @@ -1640,30 +1649,30 @@ static GLboolean parse_program(struct r300_fragment_program *fp) flags = 0; if (fpi->Opcode != OPCODE_KIL) { - dest = t_dst(fp, fpi->DstReg); + dest = t_dst(cs, fpi->DstReg); mask = fpi->DstReg.WriteMask; } switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, absolute(src[0]), pfs_one, pfs_zero, flags); break; case OPCODE_ADD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, src[1], flags); break; case OPCODE_CMP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c * r300 - if src2.c < 0.0 ? src1.c : src0.c */ - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, src[2], src[1], src[0], flags); break; case OPCODE_COS: @@ -1675,196 +1684,196 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * x = (x*2*PI)-PI * result = sin(x) */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* add 0.5*PI and do range reduction */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(src[0], X, X, X, X), swizzle(const_sin[1], Z, Z, Z, Z), swizzle(const_sin[1], X, X, X, X), 0); - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, swizzle(temp[0], X, X, X, X), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI 0); /* SIN */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), negate(swizzle(temp[0], X, X, X, X)), 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_DP3: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP3, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DP4: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP4, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DPH: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); /* src0.xyz1 -> temp * DP4 dest, temp, src1 */ - emit_arith(fp, PFS_OP_DP4, dest, mask, + emit_arith(cs, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], undef, flags); break; case OPCODE_DST: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); /* dest.y = src0.y * src1.y */ if (mask & WRITEMASK_Y) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Y, keep(src[0]), keep(src[1]), pfs_zero, flags); /* dest.z = src0.z */ if (mask & WRITEMASK_Z) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Z, src[0], pfs_one, pfs_zero, flags); /* result.x = 1.0 * result.w = src1.w */ if (mask & WRITEMASK_XW) { REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & WRITEMASK_XW, src[1], pfs_one, pfs_zero, flags); } break; case OPCODE_EX2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_EX2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_FLR: - src[0] = t_src(fp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); /* FRC temp, src0 * MAD dest, src0, 1.0, -temp */ - emit_arith(fp, PFS_OP_FRC, temp[0], mask, + emit_arith(cs, PFS_OP_FRC, temp[0], mask, keep(src[0]), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(temp[0]), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_FRC: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_FRC, dest, mask, src[0], undef, undef, flags); break; case OPCODE_KIL: - emit_tex(fp, fpi, R300_TEX_OP_KIL); + emit_tex(cs, fpi, R300_TEX_OP_KIL); break; case OPCODE_LG2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_LG2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_LIT: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_lit(fp, dest, mask, src[0], flags); + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_lit(cs, dest, mask, src[0], flags); break; case OPCODE_LRP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); /* result = tmp0tmp1 + (1 - tmp0)tmp2 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 * MAD temp, -tmp0, tmp2, tmp2 * MAD result, tmp0, tmp1, temp */ - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_MAD, temp[0], mask, negate(keep(src[0])), keep(src[2]), src[2], 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], temp[0], flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_MAD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], src[2], flags); break; case OPCODE_MAX: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAX, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MIN: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MIN, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MOV: case OPCODE_SWZ: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, pfs_zero, flags); break; case OPCODE_MUL: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], pfs_zero, flags); break; case OPCODE_POW: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - src[1] = t_scalar_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + src[1] = t_scalar_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, src[0], undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, temp[0], src[1], pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, temp[0], undef, undef, 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_RCP: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RCP, dest, mask, src[0], undef, undef, flags); break; case OPCODE_RSQ: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RSQ, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RSQ, dest, mask, absolute(src[0]), pfs_zero, pfs_zero, flags); break; case OPCODE_SCS: @@ -1875,51 +1884,51 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * result.y = sin(x) (sin) * */ - temp[0] = get_temp_reg(fp); - temp[1] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + temp[1] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* x = -abs(x)+0.5*PI */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI pfs_half, negate(abs (swizzle(keep(src[0]), X, X, X, X))), 0); /* C*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, swizzle(const_sin[0], Y, Y, Y, Y), swizzle(keep(src[0]), X, X, X, X), pfs_zero, 0); /* B*x, C*x (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); /* B*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, swizzle(const_sin[0], X, X, X, X), keep(src[0]), pfs_zero, 0); /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z, absolute(src[0]), swizzle(temp[0], W, W, W, W), swizzle(temp[1], W, W, W, W), 0); /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], W, Z, Y, X), @@ -1927,26 +1936,26 @@ static GLboolean parse_program(struct r300_fragment_program *fp) negate(swizzle(temp[1], W, Z, Y, X)), 0); /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], swizzle(const_sin[0], W, W, W, W), swizzle(temp[1], W, Z, Y, X), flags); - free_temp(fp, temp[0]); - free_temp(fp, temp[1]); + free_temp(cs, temp[0]); + free_temp(cs, temp[1]); break; case OPCODE_SGE: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 0 : 1 */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + emit_arith(cs, PFS_OP_MAD, temp[0], mask, src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, pfs_one, pfs_zero, temp[0], 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SIN: /* @@ -1956,85 +1965,85 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * itself squared. */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* do range reduction */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(keep(src[0]), X, X, X, X), swizzle(const_sin[1], Z, Z, Z, Z), pfs_half, 0); - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, swizzle(temp[0], X, X, X, X), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI 0); /* SIN */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), negate(swizzle(temp[0], X, X, X, X)), 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SLT: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 1 : 0 */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + emit_arith(cs, PFS_OP_MAD, temp[0], mask, src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, pfs_zero, pfs_one, temp[0], 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SUB: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(src[1]), flags); break; case OPCODE_TEX: - emit_tex(fp, fpi, R300_TEX_OP_LD); + emit_tex(cs, fpi, R300_TEX_OP_LD); break; case OPCODE_TXB: - emit_tex(fp, fpi, R300_TEX_OP_TXB); + emit_tex(cs, fpi, R300_TEX_OP_TXB); break; case OPCODE_TXP: - emit_tex(fp, fpi, R300_TEX_OP_TXP); + emit_tex(cs, fpi, R300_TEX_OP_TXP); break; case OPCODE_XPD:{ - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0.zxy * src1.yzx */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_XYZ, swizzle(keep(src[0]), Z, X, Y, W), swizzle(keep(src[1]), Y, Z, X, W), @@ -2042,14 +2051,14 @@ static GLboolean parse_program(struct r300_fragment_program *fp) /* dest.xyz = src0.yzx * src1.zxy - temp * dest.w = undefined * */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ, swizzle(src[0], Y, Z, X, W), swizzle(src[1], Z, X, Y, W), negate(temp[0]), flags); /* cleanup */ - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; } default: @@ -2160,7 +2169,7 @@ static void insert_wpos(struct gl_program *prog) /* - Init structures * - Determine what hwregs each input corresponds to */ -static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +static void init_program(struct r300_pfs_compile_state *cs) { COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; @@ -2171,19 +2180,18 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* New compile, reset tracking data */ fp->optimization = - driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + driQueryOptioni(&cs->r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; fp->WritesDepth = GL_FALSE; - fp->tex.length = 0; - fp->cur_node = 0; - fp->first_node_has_tex = 0; - fp->const_nr = 0; - fp->max_temp_idx = 0; - fp->node[0].alu_end = -1; - fp->node[0].tex_end = -1; - - _mesa_memset(cs, 0, sizeof(*fp->cs)); + code->tex.length = 0; + code->cur_node = 0; + code->first_node_has_tex = 0; + code->const_nr = 0; + code->max_temp_idx = 0; + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + for (i = 0; i < PFS_MAX_ALU_INST; i++) { for (j = 0; j < 3; j++) { cs->slot[i].vsrc[j] = SRC_CONST; @@ -2200,11 +2208,11 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) */ /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + for (i = 0; i < cs->r300->radeon.glCtx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); + get_hw_temp(cs, 0); } } InputsRead &= ~FRAG_BITS_TEX_ANY; @@ -2212,7 +2220,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(cs, 0); insert_wpos(&mp->Base); } InputsRead &= ~FRAG_BIT_WPOS; @@ -2220,14 +2228,14 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(cs, 0); } InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(cs, 0); } InputsRead &= ~FRAG_BIT_COL1; @@ -2283,13 +2291,13 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) cs->temp_in_use = temps_used; } -static void update_params(struct r300_fragment_program *fp) +static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; /* Ask Mesa nicely to fill in ParameterValues for us */ if (mp->Base.Parameters) - _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); } void r300TranslateFragmentShader(r300ContextPtr r300, @@ -2298,37 +2306,40 @@ void r300TranslateFragmentShader(r300ContextPtr r300, if (!fp->translated) { struct r300_pfs_compile_state cs; - fp->cs = &cs; - init_program(r300, fp); + _mesa_memset(&cs, 0, sizeof(cs)); + cs.r300 = r300; + cs.fp = fp; + init_program(&cs); - if (parse_program(fp) == GL_FALSE) { - dump_program(fp); + if (parse_program(&cs) == GL_FALSE) { + dump_program(fp, &fp->code); return; } /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs.nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs.nrslots - 1; - fp->tex_offset = 0; - fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; - assert(fp->node[fp->cur_node].alu_end >= 0); - assert(fp->alu_end >= 0); + fp->code.node[fp->code.cur_node].alu_end = + cs.nrslots - fp->code.node[fp->code.cur_node].alu_offset - 1; + if (fp->code.node[fp->code.cur_node].tex_end < 0) + fp->code.node[fp->code.cur_node].tex_end = 0; + fp->code.alu_offset = 0; + fp->code.alu_end = cs.nrslots - 1; + fp->code.tex_offset = 0; + fp->code.tex_end = fp->code.tex.length ? fp->code.tex.length - 1 : 0; + assert(fp->code.node[fp->code.cur_node].alu_end >= 0); + assert(fp->code.alu_end >= 0); fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp); - r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + dump_program(fp, &fp->code); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); } - update_params(fp); + update_params(r300, fp); } /* just some random things... */ -static void dump_program(struct r300_fragment_program *fp) +static void dump_program(struct r300_fragment_program *fp, + struct r300_fragment_program_code *code) { int n, i, j; static int pc = 0; @@ -2343,21 +2354,21 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - for (n = 0; n < (fp->cur_node + 1); n++) { + for (n = 0; n < (code->cur_node + 1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " "alu_end: %d, tex_end: %d\n", n, - fp->node[n].alu_offset, - fp->node[n].tex_offset, - fp->node[n].alu_end, fp->node[n].tex_end); + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end); - if (fp->tex.length) { + if (code->tex.length) { fprintf(stderr, " TEX:\n"); - for (i = fp->node[n].tex_offset; - i <= fp->node[n].tex_offset + fp->node[n].tex_end; + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; ++i) { const char *instr; - switch ((fp->tex. + switch ((code->tex. inst[i] >> R300_TEX_INST_SHIFT) & 15) { case R300_TEX_OP_LD: @@ -2379,20 +2390,20 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr, - (fp->tex. + (code->tex. inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', - (fp->tex. + (code->tex. inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (fp->tex. + (code->tex. inst[i] & R300_TEX_ID_MASK) >> R300_TEX_ID_SHIFT, - fp->tex.inst[i]); + code->tex.inst[i]); } } - for (i = fp->node[n].alu_offset; - i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; @@ -2400,8 +2411,8 @@ static void dump_program(struct r300_fragment_program *fp) char flags[5], tmp[10]; for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst1 >> (j * 6); - int rega = fp->alu.inst[i].inst3 >> (j * 6); + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); @@ -2411,46 +2422,46 @@ static void dump_program(struct r300_fragment_program *fp) dstc[0] = 0; sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst3 >> R300_ALU_DSTA_SHIFT) & 31); } - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst3 >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -2458,12 +2469,12 @@ static void dump_program(struct r300_fragment_program *fp) "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", i, srcc[0], srcc[1], srcc[2], dstc, - fp->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, fp->alu.inst[i].inst3); + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst0 >> (j * 7); - int rega = fp->alu.inst[i].inst2 >> (j * 7); + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); int d; char buf[20]; @@ -2545,8 +2556,8 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], - fp->alu.inst[i].inst0, arga[0], arga[1], - arga[2], fp->alu.inst[i].inst2); + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); } } } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 5c8fd8a5e5..f30fd986e0 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -28,7 +28,6 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, target, id); } else { r300_fp = CALLOC_STRUCT(r300_fragment_program); - r300_fp->ctx = ctx; return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, target, id); } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c3f1cc9e28..1dcf9e0cab 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1308,18 +1308,19 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) int i; struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code; R300_STATECHANGE(r300, fpt); - for (i = 0; i < fp->tex.length; i++) { + for (i = 0; i < code->tex.length; i++) { int unit; int opcode; unsigned long val; - unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT; unit &= 15; - val = fp->tex.inst[i]; + val = code->tex.inst[i]; val &= ~R300_TEX_ID_MASK; opcode = @@ -1341,7 +1342,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); + cmdpacket0(R300_US_TEX_INST_0, code->tex.length); } static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) @@ -2405,6 +2406,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) GLcontext *ctx = rmesa->radeon.glCtx; struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code; int i, k; if (!fp) /* should only happenen once, just after context is created */ @@ -2416,62 +2418,63 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } + code = &fp->code; r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fpi[0]); - rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; } R300_STATECHANGE(rmesa, fpi[1]); - rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; } R300_STATECHANGE(rmesa, fpi[2]); - rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; } R300_STATECHANGE(rmesa, fpi[3]); - rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = fp->cur_node | (fp->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | - (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); + (code->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + (code->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | + (code->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + (code->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { - if (i < (fp->cur_node + 1)) { + for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { + if (i < (code->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i].alu_offset << R300_ALU_START_SHIFT) | - (fp->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (fp->node[i].tex_offset << R300_TEX_START_SHIFT) | - (fp->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - fp->node[i].flags; + (code->node[i].alu_offset << R300_ALU_START_SHIFT) | + (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (code->node[i].tex_offset << R300_TEX_START_SHIFT) | + (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | + code->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; } } R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); - for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); + for (i = 0; i < code->const_nr; i++) { + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(code->constant[i][0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(code->constant[i][1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(code->constant[i][2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(code->constant[i][3]); } } -- cgit v1.2.3 From e2aa45c2f9584ff76151a99b4fcd0ecb56260473 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 22:09:37 +0200 Subject: r300: Do not include r300_fragprog.h from r300_context.h and other cleanups --- src/mesa/drivers/dri/r300/r300_context.h | 1 - src/mesa/drivers/dri/r300/r300_fragprog.c | 34 ++++++++----------------------- src/mesa/drivers/dri/r300/r300_ioctl.c | 1 + src/mesa/drivers/dri/r300/r300_render.c | 1 + src/mesa/drivers/dri/r300/r300_state.c | 1 + 5 files changed, 11 insertions(+), 27 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 05efb813e5..a9b3b061f4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -73,7 +73,6 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r300_fragprog.h" #include "r500_fragprog.h" /** diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 819615c141..9d7a8c6570 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -445,8 +445,8 @@ static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) cs->hwtemps[r].vector_valid = 0; cs->hwtemps[r].scalar_valid = 0; - if (r > fp->code.max_temp_idx) - fp->code.max_temp_idx = r; + if (r > code->max_temp_idx) + code->max_temp_idx = r; return r; } @@ -1029,11 +1029,11 @@ static void emit_tex(struct r300_pfs_compile_state *cs, tokens[2] = unit; factor_index = - _mesa_add_state_reference(cs->fp->mesa_program.Base. + _mesa_add_state_reference(fp->mesa_program.Base. Parameters, tokens); factorreg = emit_const4fv(cs, - cs->fp->mesa_program.Base.Parameters-> + fp->mesa_program.Base.Parameters-> ParameterValues[factor_index]); tempreg = keep(get_temp_reg(cs)); @@ -1264,10 +1264,10 @@ static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, return -1; } - fp->code.alu.inst[pos].inst0 = NOP_INST0; - fp->code.alu.inst[pos].inst1 = NOP_INST1; - fp->code.alu.inst[pos].inst2 = NOP_INST2; - fp->code.alu.inst[pos].inst3 = NOP_INST3; + code->alu.inst[pos].inst0 = NOP_INST0; + code->alu.inst[pos].inst1 = NOP_INST1; + code->alu.inst[pos].inst2 = NOP_INST2; + code->alu.inst[pos].inst3 = NOP_INST3; cs->nrslots++; } @@ -1518,24 +1518,6 @@ static void emit_arith(struct r300_pfs_compile_state *cs, return; } -#if 0 -static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - GLuint r = undef; - - if (!(mp->Base.InputsRead & (1 << attr))) { - ERROR("Attribute %d was not provided!\n", attr); - return undef; - } - - REG_SET_TYPE(r, REG_TYPE_INPUT); - REG_SET_INDEX(r, attr); - REG_SET_VALID(r, GL_TRUE); - return r; -} -#endif - static GLfloat SinCosConsts[2][4] = { { 1.273239545, // 4/PI diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 6af23300f2..71821a01ea 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" +#include "r300_fragprog.h" #include "vblank.h" diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index fc07105c56..8f74f9d785 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -74,6 +74,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_fragprog.h" extern int future_hw_tcl_on; /** diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 1dcf9e0cab..e82c3d9681 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -60,6 +60,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" +#include "r300_fragprog.h" #include "r300_tex.h" #include "drirenderbuffer.h" -- cgit v1.2.3 From a1c0c56d70308a562c90cc01982c89ed1396c830 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 04:07:51 +0200 Subject: r300: Implement GL_ARB_shadow and GL_EXT_shadow_funcs --- src/mesa/drivers/dri/r300/r300_context.c | 2 + src/mesa/drivers/dri/r300/r300_context.h | 29 +++++++ src/mesa/drivers/dri/r300/r300_fragprog.c | 122 +++++++++++++++++++++++++++++- 3 files changed, 152 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 31cc00a081..063d4e575e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -98,6 +98,7 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_fragment_program", NULL}, {"GL_ARB_multisample", GL_ARB_multisample_functions}, {"GL_ARB_multitexture", NULL}, + {"GL_ARB_shadow", NULL}, {"GL_ARB_texture_border_clamp", NULL}, {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, {"GL_ARB_texture_cube_map", NULL}, @@ -116,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, {"GL_EXT_stencil_wrap", NULL}, {"GL_EXT_texture_edge_clamp", NULL}, diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a9b3b061f4..ca22c5dd8a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -655,6 +655,34 @@ struct r300_vertex_program_cont { struct r300_pfs_compile_state; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + + /** * Stores an R300 fragment program in its compiled-to-hardware form. */ @@ -711,6 +739,7 @@ struct r300_fragment_program { GLboolean translated; GLboolean error; + struct r300_fragment_program_external_state state; struct r300_fragment_program_code code; GLboolean WritesDepth; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 814ccd3eac..da2dedece8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -81,6 +81,33 @@ static GLboolean transform_TEX( inst.Opcode != OPCODE_KIL) return GL_FALSE; + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = inst.DstReg; + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_0000; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_0000; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + return GL_TRUE; + } + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures * instead of [0..Width]x[0..Height]. * Add a scaling instruction. @@ -156,7 +183,51 @@ static GLboolean transform_TEX( context->dest->NumInstructions, 1); _mesa_copy_instructions(tgt, &inst, 1); - if (destredirect) { + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 2); + + tgt[0].Opcode = OPCODE_MAD; + tgt[0].DstReg = inst.DstReg; + tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[0].SrcReg[0].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + tgt[0].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[0].SrcReg[2] = inst.SrcReg[0]; + tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + else + tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + + tgt[1].Opcode = OPCODE_CMP; + tgt[1].DstReg = orig_inst->DstReg; + tgt[1].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[0].Index = tgt[0].DstReg.Index; + tgt[1].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[1].SrcReg[2].File = PROGRAM_BUILTIN; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_0000; + } else { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_0000; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_1111; + } + } else if (destredirect) { tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); @@ -280,9 +351,58 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) } +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { + struct r300_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } + if (!fp->translated) { struct r300_fragment_program_compiler compiler; -- cgit v1.2.3 From 0a341ef29657c1ead116c4acaca138551631de16 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 07:03:08 -0700 Subject: r5xx: FP refactor, take one. Yes, I know it's massive. Imagine how I felt, auditing 3000 lines of code. --- src/mesa/drivers/dri/r300/Makefile | 2 +- src/mesa/drivers/dri/r300/r300_context.h | 51 +- src/mesa/drivers/dri/r300/r300_state.c | 49 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 1801 ++++++----------------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 13 + 5 files changed, 426 insertions(+), 1490 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7cd5647064..7b8f5f1384 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -28,7 +28,6 @@ DRIVER_SOURCES = \ radeon_span.c \ radeon_state.c \ r300_mem.c \ - \ r300_context.c \ r300_ioctl.c \ r300_cmdbuf.c \ @@ -42,6 +41,7 @@ DRIVER_SOURCES = \ r300_fragprog.c \ r300_fragprog_emit.c \ r500_fragprog.c \ + r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index ca22c5dd8a..1a90f5cabb 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -748,14 +748,30 @@ struct r300_fragment_program { struct r500_pfs_compile_state; -struct r500_fragment_program { - struct gl_fragment_program mesa_program; +struct r500_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; - GLcontext *ctx; - GLboolean translated; - GLboolean error; - struct r500_pfs_compile_state *cs; + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; +struct r500_fragment_program_code { struct { GLuint inst0; GLuint inst1; @@ -772,17 +788,28 @@ struct r500_fragment_program { int inst_end; /* Hardware constants. - * Contains a pointer to the value. The destination of the pointer - * is supposed to be updated when GL state changes. - * Typically, this is either a pointer into - * gl_program_parameter_list::ParameterValues, or a pointer to a - * global constant (e.g. for sin/cos-approximation) - */ + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ const GLfloat *constant[PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; +}; +struct r500_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + + struct r500_fragment_program_external_state state; + struct r500_fragment_program_code code; + GLboolean writes_depth; GLuint optimization; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e82c3d9681..a86e4bc344 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1351,14 +1351,15 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) int i; struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code; /* find all the texture instructions and relocate the texture units */ - for (i = 0; i < fp->inst_end + 1; i++) { - if ((fp->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + for (i = 0; i < code->inst_end + 1; i++) { + if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { uint32_t val; int unit, opcode, new_unit; - val = fp->inst[i].inst1; + val = code->inst[i].inst1; unit = (val >> 16) & 0xf; @@ -1375,7 +1376,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } } val |= R500_TEX_ID(new_unit); - fp->inst[i].inst1 = val; + code->inst[i].inst1 = val; } } } @@ -2499,6 +2500,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; int i; + struct r500_fragment_program_code *code; if (!fp) /* should only happenen once, just after context is created */ return; @@ -2512,42 +2514,43 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } + code = &fp->code; r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = - R500_US_CODE_START_ADDR(fp->inst_offset) | - R500_US_CODE_END_ADDR(fp->inst_end); + R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_END_ADDR(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = - R500_US_CODE_RANGE_ADDR(fp->inst_offset) | - R500_US_CODE_RANGE_SIZE(fp->inst_end); + R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_SIZE(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ - for (i = 0; i < fp->inst_end+1; i++) { - rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; - rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; - rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2; - rmesa->hw.r500fp.cmd[i*6+4] = fp->inst[i].inst3; - rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4; - rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; + for (i = 0; i < code->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5; } - bump_r500fp_count(rmesa->hw.r500fp.cmd, (fp->inst_end + 1) * 6); + bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); - for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); + for (i = 0; i < code->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(code->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(code->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(code->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(code->constant[i][3]); } - bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 70e45f3ea8..197036008a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1,8 +1,5 @@ /* - * Copyright (C) 2005 Ben Skeggs. - * * Copyright 2008 Corbin Simpson - * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. * * All Rights Reserved. * @@ -28,1491 +25,391 @@ * */ -/** - * \file - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \author Corbin Simpson - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - * \todo Verify results of opcodes for accuracy, I've only checked them in - * specific cases. - */ - -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "shader/prog_instruction.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" - -#include "r300_context.h" #include "r500_fragprog.h" -#include "r300_reg.h" -#include "r300_state.h" - -/* Mapping Mesa registers to R500 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; - -/** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. - */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r500_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; -}; +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} /** - * Store information during compilation of fragment programs. + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. */ -struct r500_pfs_compile_state { - int nrslots; /* number of ALU slots used so far */ - - /* Track which (parts of) slots are already filled with instructions */ - struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; -}; +static GLboolean transform_TEX( + struct radeon_program_transform_context* context, + struct prog_instruction* orig_inst, void* data) +{ + struct r500_fragment_program_compiler *compiler = + (struct r500_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = inst.DstReg; + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_0000; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_0000; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + return GL_TRUE; + } -/* - * Useful macros and values - */ -#define ERROR(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - fp->error = GL_TRUE; \ - } while(0) - -#define COMPILE_STATE struct r500_pfs_compile_state *cs = fp->cs - -#define R500_US_NUM_TEMP_REGS 128 -#define R500_US_NUM_CONST_REGS 256 - -/* "Register" flags */ -#define REG_CONSTANT (1 << 8) -#define REG_SRC_REL (1 << 9) -#define REG_DEST_REL (1 << 7) - -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define MAKE_SWIZ_TEX_STRQ(x) (x << 8) -#define MAKE_SWIZ_TEX_RGBA(x) (x << 24) -/* Swizzles for inst3 */ -#define MAKE_SWIZ_RGB_A(x) (x << 2) -#define MAKE_SWIZ_RGB_B(x) (x << 15) -/* Swizzles for inst4 */ -#define MAKE_SWIZ_ALPHA_A(x) (x << 14) -#define MAKE_SWIZ_ALPHA_B(x) (x << 21) -/* Swizzle for inst5 */ -#define MAKE_SWIZ_RGBA_C(x) (x << 14) -#define MAKE_SWIZ_ALPHA_C(x) (x << 27) - -/* Writemasks */ -#define R500_WRITEMASK_G 0x2 -#define R500_WRITEMASK_B 0x4 -#define R500_WRITEMASK_RGB 0x7 -#define R500_WRITEMASK_A 0x8 -#define R500_WRITEMASK_AR 0x9 -#define R500_WRITEMASK_AG 0xA -#define R500_WRITEMASK_ARG 0xB -#define R500_WRITEMASK_AB 0xC -#define R500_WRITEMASK_ARGB 0xF - -/* 1/(2pi), needed for quick modulus in trig insts - * Thanks to glisse for pointing out how to do it! */ -static const GLfloat RCP_2PI[] = {0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535}; - -static const GLfloat LIT[] = {127.999999, - 127.999999, - 127.999999, - -127.999999}; - -static void dump_program(struct r500_fragment_program *fp); - -static inline GLuint make_rgb_swizzle(struct prog_src_register src) { - GLuint swiz = 0x0; - GLuint temp; - /* This could be optimized, but it should be plenty fast already. */ - int i; - for (i = 0; i < 3; i++) { - temp = GET_SWZ(src.Swizzle, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; - swiz |= temp << i*3; - } - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 9); - return swiz; -} + int tempreg = radeonCompilerAllocateTemporary(context->compiler); -static inline GLuint make_rgba_swizzle(GLuint src) { - GLuint swiz = 0x0; - GLuint temp; - int i; - for (i = 0; i < 4; i++) { - temp = GET_SWZ(src, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; - swiz |= temp << i*3; + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; } - return swiz; -} -static inline GLuint make_alpha_swizzle(struct prog_src_register src) { - GLuint swiz = GET_SWZ(src.Swizzle, 3); - - if (swiz == 5) swiz++; - - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 3); - - return swiz; -} -static inline GLuint make_sop_swizzle(struct prog_src_register src) { - GLuint swiz = GET_SWZ(src.Swizzle, 0); - - if (swiz == 5) swiz++; - return swiz; -} - -static inline GLuint make_strq_swizzle(struct prog_src_register src) { - GLuint swiz = 0x0, temp = 0x0; - int i; - for (i = 0; i < 4; i++) { - temp = GET_SWZ(src.Swizzle, i) & 0x3; - swiz |= temp << i*2; + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = + _mesa_add_state_reference( + compiler->fp->mesa_program.Base.Parameters, tokens); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; } - return swiz; -} -static int get_temp(struct r500_fragment_program *fp, int slot) { - - COMPILE_STATE; - - int r = fp->temp_reg_offset + cs->temp_in_use + slot; - - if (r > R500_US_NUM_TEMP_REGS) { - ERROR("Too many temporary registers requested, can't compile!\n"); + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || + inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; } - return r; -} - -/* Borrowed verbatim from r300_fragprog since it hasn't changed. */ -static GLuint emit_const4fv(struct r500_fragment_program *fp, - const GLfloat * cp) -{ - GLuint reg = 0x0; - int index; + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) - break; + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } } - if (index >= fp->const_nr) { - if (index >= R500_US_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return reg; + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 2); + + tgt[0].Opcode = OPCODE_MAD; + tgt[0].DstReg = inst.DstReg; + tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[0].SrcReg[0].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + tgt[0].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[0].SrcReg[2] = inst.SrcReg[0]; + tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + else + tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + + tgt[1].Opcode = OPCODE_CMP; + tgt[1].DstReg = orig_inst->DstReg; + tgt[1].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[0].Index = tgt[0].DstReg.Index; + tgt[1].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[1].SrcReg[2].File = PROGRAM_BUILTIN; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_0000; + } else { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_0000; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_1111; } - - fp->const_nr++; - fp->constant[index] = cp; + } else if (destredirect) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; } - reg = index | REG_CONSTANT; - return reg; -} - -static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { - COMPILE_STATE; - GLuint reg; - switch (src.File) { - case PROGRAM_TEMPORARY: - reg = src.Index + fp->temp_reg_offset; - break; - case PROGRAM_INPUT: - reg = cs->inputs[src.Index].reg; - break; - case PROGRAM_LOCAL_PARAM: - reg = emit_const4fv(fp, - fp->mesa_program.Base.LocalParams[src. - Index]); - break; - case PROGRAM_ENV_PARAM: - reg = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[src. - Index]); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> - ParameterValues[src.Index]); - break; - default: - ERROR("Can't handle src.File %x\n", src.File); - reg = 0x0; - break; - } - return reg; + return GL_TRUE; } -static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) { - GLuint reg; - switch (dest.File) { - case PROGRAM_TEMPORARY: - reg = dest.Index + fp->temp_reg_offset; - break; - case PROGRAM_OUTPUT: - /* Eventually we may need to handle multiple - * rendering targets... */ - reg = dest.Index; - break; - default: - ERROR("Can't handle dest.File %x\n", dest.File); - reg = 0x0; - break; - } - return reg; -} -static void emit_tex(struct r500_fragment_program *fp, - struct prog_instruction *fpi, int dest, int counter) +static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) { - int hwsrc, hwdest; - GLuint mask; + struct gl_fragment_program *mp = &fp->mesa_program; - mask = fpi->DstReg.WriteMask << 11; - hwsrc = make_src(fp, fpi->SrcReg[0]); + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +} - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - hwdest = get_temp(fp, 0); - } else { - hwdest = dest; - } - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask - | R500_INST_TEX_SEM_WAIT; - - fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - - if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) - fp->inst[counter].inst1 |= R500_TEX_UNSCALED; - - switch (fpi->Opcode) { - case OPCODE_KIL: - fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; - break; - case OPCODE_TEX: - fp->inst[counter].inst1 |= R500_TEX_INST_LD; - break; - case OPCODE_TXB: - fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; - break; - case OPCODE_TXP: - fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; - break; - default: - ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode); - } +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. + */ +static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) - | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) - /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */ - | R500_TEX_DST_ADDR(hwdest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; - - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | (mask << 4); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - } -} + if (!(InputsRead & FRAG_BIT_WPOS)) + return; -static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { - /* Ideally, we shouldn't have to explicitly clear memory here! */ - fp->inst[counter].inst0 = 0x0; - fp->inst[counter].inst1 = 0x0; - fp->inst[counter].inst2 = 0x0; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; - - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT; - - if (fpi->DstReg.Index == FRAG_RESULT_COLR) - fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); - - if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { - fp->inst[counter].inst4 |= R500_ALPHA_W_OMASK; - /* Notify the state emission! */ - fp->writes_depth = GL_TRUE; + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = radeonCompilerAllocateTemporary(&compiler->compiler); + + fpi = radeonClauseInsertInstructions(&compiler->compiler, &compiler->compiler.Clauses[0], 0, 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->compiler.Clauses[0].NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } } - } else { - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - /* pixel_mask */ - | (fpi->DstReg.WriteMask << 11); } - - fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; } -static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { - /* The r3xx shader uses MAD to implement MOV. We are using CMP, since - * it is technically more accurate and recommended by ATI/AMD. */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); - /* (De)mangle the swizzle from Mesa to R500. */ - swizzle = make_rgba_swizzle(swizzle); - /* 0x1FF is 9 bits, size of an RGB swizzle. */ - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -} - -static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) { - /* Note: This code was all Corbin's. Corbin is a rather hackish coder. - * If you can make it pretty or fast, please do so! */ - emit_alu(fp, counter, fpi); - /* Common MAD stuff */ - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg)); - fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg)); - switch (one) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one])); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); - fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", one); - break; - } - switch (two) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two])); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); - fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 - | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", two); - break; - } - switch (three) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three])); - fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", three); - break; - } -} -static void emit_sop(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src); - fp->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP - | R500_ALU_RGBA_ADDRD(dest); - switch (opcode) { - case OPCODE_COS: - fp->inst[counter].inst4 |= R500_ALPHA_OP_COS; - break; - case OPCODE_EX2: - fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2; - break; - case OPCODE_LG2: - fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2; - break; - case OPCODE_RCP: - fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP; - break; - case OPCODE_RSQ: - fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; - break; - case OPCODE_SIN: - fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN; - break; - default: - ERROR("Bad opcode in emit_sop: %d\n", opcode); - break; +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; } } -static GLboolean parse_program(struct r500_fragment_program *fp) +static GLuint build_func(GLuint comparefunc) { - struct gl_fragment_program *mp = &fp->mesa_program; - const struct prog_instruction *inst = mp->Base.Instructions; - struct prog_instruction *fpi; - GLuint src[3], dest = 0; - int temp_swiz, counter = 0; + return comparefunc - GL_NEVER; +} - if (!inst || inst[0].Opcode == OPCODE_END) { - ERROR("The program is empty!\n"); - return GL_FALSE; - } - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r500_fragment_program *fp, + struct r500_fragment_program_external_state *state) +{ + int unit; - if (fpi->Opcode != OPCODE_KIL) { - dest = make_dest(fp, fpi->DstReg); - } + _mesa_bzero(state, sizeof(*state)); - switch (fpi->Opcode) { - case OPCODE_ABS: - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS - | R500_ALU_RGB_MOD_B_ABS; - fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS - | R500_ALPHA_MOD_B_ABS; - break; - case OPCODE_ADD: - /* Variation on MAD: 1*src0+src1 */ - emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); - break; - case OPCODE_CMP: - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); - break; - case OPCODE_COS: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_DP3: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DP4: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DPH: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DST: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_SEL_B_SRC1; - /* Select [1, y, z, 1] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [1, y, 1, w] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - break; - case OPCODE_EX2: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_FLR: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC1 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_MOD_C_NEG; - break; - case OPCODE_FRC: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_LG2: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_LIT: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, LIT); - /* First inst: MAX temp, input, [0, 0, 0, -128] - * Write: RG, A */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARG << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); - counter++; - /* Second inst: MIN temp, temp, [x, x, x, 128] - * Write: A */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); - /* fp->inst[counter].inst3; */ - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); - counter++; - /* Third-fifth insts: POW temp, temp.y, temp.w - * Write: B */ - emit_sop(fp, counter, fpi, OPCODE_LG2, get_temp(fp, 0), SWIZZLE_Y, get_temp(fp, 1)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)) - | R500_RGB_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)) - | R500_ALPHA_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), SWIZZLE_W, get_temp(fp, 0)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); - counter++; - /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; - * Write: ARGB - * This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_1 - | R500_ALU_RGB_G_SWIZ_A_R - | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_1 - | R500_ALU_RGB_G_SWIZ_B_R - | R500_ALU_RGB_B_SWIZ_B_0; - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_R_SWIZ_R - | R500_ALU_RGBA_G_SWIZ_R - | R500_ALU_RGBA_B_SWIZ_R - | R500_ALU_RGBA_A_SWIZ_R; - break; - case OPCODE_LRP: - /* src0 * src1 + INV(src0) * src2 - * 1) MUL src0, src1, temp - * 2) PRE 1-src0; MAD srcp, src2, temp */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[2]) - | R500_RGB_ADDR2(get_temp(fp, 0)) - | R500_RGB_SRCP_OP_1_MINUS_RGB0; - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[2]) - | R500_ALPHA_ADDR2(get_temp(fp, 0)) - | R500_ALPHA_SRCP_OP_1_MINUS_A0; - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); - break; - case OPCODE_MAD: - emit_mad(fp, counter, fpi, 0, 1, 2); - break; - case OPCODE_MAX: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_MIN: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_MOV: - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - break; - case OPCODE_MUL: - /* Variation on MAD: src0*src1+0 */ - emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); - break; - case OPCODE_POW: - /* POW(a,b) = EX2(LN2(a)*b) */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(fp, 0)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_RCP: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_RSQ: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SCS: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - /* Do a cosine, then a sine, masking out the channels we want to protect. */ - /* Cosine only goes in R (x) channel. */ - fpi->DstReg.WriteMask = 0x1; - emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - counter++; - /* Sine only goes in G (y) channel. */ - fpi->DstReg.WriteMask = 0x2; - emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SGE: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; - break; - case OPCODE_SIN: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SLT: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; - break; - case OPCODE_SUB: - /* Variation on MAD: 1*src0-src1 */ - fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ - emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); - break; - case OPCODE_SWZ: - /* TODO: The rarer negation masks! */ - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - break; - case OPCODE_XPD: - /* src0 * src1 - src1 * src0 - * 1) MUL temp.xyz, src0.yzx, src1.zxy - * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_RGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) - | R500_RGB_ADDR2(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) - | R500_ALPHA_ADDR2(get_temp(fp, 0)); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_A_SWIZ_0; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - emit_tex(fp, fpi, dest, counter); - if (fpi->DstReg.File == PROGRAM_OUTPUT) - counter++; - break; - default: - ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); - break; - } + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - /* Finishing touches */ - if (fpi->SaturateMode == SATURATE_ZERO_ONE) { - fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); } - - counter++; - - if (fp->error) - return GL_FALSE; - - } - - /* Finish him! (If it's an ALU/OUT instruction...) */ - if ((fp->inst[counter-1].inst0 & 0x3) == 1) { - fp->inst[counter-1].inst0 |= R500_INST_LAST; - } else { - /* We still need to put an output inst, right? */ - WARN_ONCE("Final FP instruction is not an OUT.\n"); } - - fp->cs->nrslots = counter; - - fp->max_temp_idx++; - - return GL_TRUE; } -static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) -{ - struct r500_pfs_compile_state *cs = fp->cs; - struct gl_fragment_program *mp = &fp->mesa_program; - struct prog_instruction *fpi; - GLuint InputsRead = mp->Base.InputsRead; - GLuint temps_used = 0; - int i, j; - - /* New compile, reset tracking data */ - fp->optimization = - driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); - fp->translated = GL_FALSE; - fp->error = GL_FALSE; - fp->const_nr = 0; - /* Size of pixel stack, plus 1. */ - fp->max_temp_idx = 1; - /* Temp register offset. */ - fp->temp_reg_offset = 0; - /* Whether or not we perform any depth writing. */ - fp->writes_depth = GL_FALSE; - - _mesa_memset(cs, 0, sizeof(*fp->cs)); - for (i = 0; i < PFS_MAX_ALU_INST; i++) { - for (j = 0; j < 3; j++) { - cs->slot[i].vsrc[j] = SRC_CONST; - cs->slot[i].ssrc[j] = SRC_CONST; - } - } - - /* Work out what temps the Mesa inputs correspond to, this must match - * what setup_rs_unit does, which shouldn't be a problem as rs_unit - * configures itself based on the fragprog's InputsRead - * - * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0, so we're just going to do that instead. - */ +static void dump_program(struct r500_fragment_program_code *code); - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp) +{ + struct r500_fragment_program_external_state state; - if (!mp->Base.Instructions) { - ERROR("No instructions found in program, going to go die now.\n"); - return; + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); } - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) { - if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { - if (fpi->SrcReg[i].Index >= temps_used) - temps_used = fpi->SrcReg[i].Index + 1; - } - } - } + if (!fp->translated) { + struct r500_fragment_program_compiler compiler; - cs->temp_in_use = temps_used + 1; + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; - fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use; + radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base); - if (RADEON_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use); -} + insert_WPOS_trailer(&compiler); -static void update_params(struct r500_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; + struct radeon_program_transformation transformations[1] = { + { &transform_TEX, &compiler } + }; + radeonClauseLocalTransform(&compiler.compiler, + &compiler.compiler.Clauses[0], + 1, transformations); - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); -} - -static void dumb_shader(struct r500_fragment_program *fp) -{ - fp->inst[0].inst0 = R500_INST_TYPE_TEX - | R500_INST_TEX_SEM_WAIT - | R500_INST_RGB_WMASK_R - | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B - | R500_INST_ALPHA_WMASK - | R500_INST_RGB_CLAMP - | R500_INST_ALPHA_CLAMP; - fp->inst[0].inst1 = R500_TEX_ID(0) - | R500_TEX_INST_LD - | R500_TEX_SEM_ACQUIRE - | R500_TEX_IGNORE_UNCOVERED; - fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) - | R500_TEX_SRC_S_SWIZ_R - | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_DST_ADDR(0) - | R500_TEX_DST_R_SWIZ_R - | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B - | R500_TEX_DST_A_SWIZ_A; - fp->inst[0].inst3 = R500_DX_ADDR(0) - | R500_DX_S_SWIZ_R - | R500_DX_T_SWIZ_R - | R500_DX_R_SWIZ_R - | R500_DX_Q_SWIZ_R - | R500_DY_ADDR(0) - | R500_DY_S_SWIZ_R - | R500_DY_T_SWIZ_R - | R500_DY_R_SWIZ_R - | R500_DY_Q_SWIZ_R; - fp->inst[0].inst4 = 0x0; - fp->inst[0].inst5 = 0x0; - - fp->inst[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK; - fp->inst[1].inst1 = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST | - R500_RGB_SRCP_OP_1_MINUS_2RGB0; - fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST | - R500_ALPHA_SRCP_OP_1_MINUS_2A0; - fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_1 | - R500_ALU_RGB_B_SWIZ_B_1 | - R500_ALU_RGB_G_SWIZ_B_1; - fp->inst[1].inst4 = R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_1; - fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; - - fp->cs->nrslots = 2; - fp->translated = GL_TRUE; -} + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler state after transformations:\n"); + radeonCompilerDump(&compiler.compiler); + } -void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp) -{ - if (!fp->translated) { - struct r500_pfs_compile_state cs; - fp->cs = &cs; + if (!r500FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; - init_program(r300, fp); + radeonCompilerCleanup(&compiler.compiler); - if (parse_program(fp) == GL_FALSE) { - ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); - dumb_shader(fp); - fp->inst_offset = 0; - fp->inst_end = cs.nrslots - 1; - return; - } - fp->inst_offset = 0; - fp->inst_end = cs.nrslots - 1; + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -1520,16 +417,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); fflush(stdout); - dump_program(fp); + dump_program(&fp->code); } - - r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); - - fp->cs = 0; } - update_params(fp); + update_params(r300, fp); } @@ -1630,7 +523,7 @@ static char *to_texop(int val) return NULL; } -static void dump_program(struct r500_fragment_program *fp) +static void dump_program(struct r500_fragment_program_code *code) { fprintf(stderr, "R500 Fragment Program:\n--------\n"); @@ -1640,18 +533,18 @@ static void dump_program(struct r500_fragment_program *fp) uint32_t inst0; char *str = NULL; - if (fp->const_nr) { + if (code->const_nr) { fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < fp->const_nr; n++) { + for (n = 0; n < code->const_nr; n++) { fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n, - fp->constant[n][0], fp->constant[n][1], fp->constant[n][2], - fp->constant[n][3]); + code->constant[n][0], code->constant[n][1], code->constant[n][2], + code->constant[n][3]); } fprintf(stderr, "--------\n"); } - for (n = 0; n < fp->inst_end+1; n++) { - inst0 = inst = fp->inst[n].inst0; + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); switch(inst & 0x3) { case R500_INST_TYPE_ALU: str = "ALU"; break; @@ -1670,8 +563,8 @@ static void dump_program(struct r500_fragment_program *fp) switch(inst0 & 0x3) { case 0: case 1: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1); - inst = fp->inst[n].inst1; + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, (inst & (1<<8)) ? 'c' : 't', @@ -1679,15 +572,15 @@ static void dump_program(struct r500_fragment_program *fp) (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', (inst >> 30)); - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2); - inst = fp->inst[n].inst2; + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, (inst & (1<<8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); - inst = fp->inst[n].inst3; + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, @@ -1695,16 +588,16 @@ static void dump_program(struct r500_fragment_program *fp) (inst >> 24) & 0x3); - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); - inst = fp->inst[n].inst4; + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, (inst >> 31) & 0x1); - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5); - inst = fp->inst[n].inst5; + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), @@ -1714,11 +607,11 @@ static void dump_program(struct r500_fragment_program *fp) case 2: break; case 3: - inst = fp->inst[n].inst1; + inst = code->inst[n].inst1; fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = fp->inst[n].inst2; + inst = code->inst[n].inst2; fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, inst & 127, inst & (1<<7) ? "(rel)" : "", toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), @@ -1727,7 +620,7 @@ static void dump_program(struct r500_fragment_program *fp) toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3); + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); break; } fprintf(stderr,"\n"); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 5dd2def1c4..ff6a9002c1 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -36,10 +36,14 @@ #include "glheader.h" #include "macros.h" #include "enums.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/program.h" #include "shader/prog_instruction.h" #include "r300_context.h" +#include "r300_state.h" +#include "radeon_program.h" /* supported hw opcodes */ #define PFS_OP_MAD 0 @@ -76,4 +80,13 @@ struct r500_fragment_program; extern void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp); +struct r500_fragment_program_compiler { + r300ContextPtr r300; + struct r500_fragment_program *fp; + struct r500_fragment_program_code *code; + struct radeon_compiler compiler; +}; + +extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); + #endif -- cgit v1.2.3 From 588abd66966a672a93e87cd577802255193bebcd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 16 Jun 2008 01:21:42 -0700 Subject: r300: Fix new incarnation of bug 3195. tests/bug_3195 doesn't render right, but at least it doesn't segfault this way. --- progs/demos/lodbias.c | 2 +- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_tex.c | 19 ++++++++++++++----- 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r300/r300_context.h') diff --git a/progs/demos/lodbias.c b/progs/demos/lodbias.c index c5a2a1b457..28215d46c0 100644 --- a/progs/demos/lodbias.c +++ b/progs/demos/lodbias.c @@ -40,7 +40,7 @@ static GLfloat Xrot = 0, Yrot = -30, Zrot = 0; static GLboolean Anim = GL_TRUE; -static GLint Bias = 0, BiasStepSign = +1; /* ints avoid fp precision problem */ +static GLint Bias = 4, BiasStepSign = +1; /* ints avoid fp precision problem */ static GLint BiasMin = -400, BiasMax = 400; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 1a90f5cabb..6279a67ab1 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -925,6 +925,7 @@ struct r300_context { driTextureObject swapped; int texture_depth; float initialMaxAnisotropy; + float LODBias; /* Clientdata textures; */ diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index b672baca1b..a3d6f90ef6 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -1005,6 +1005,7 @@ static GLenum r300TexUnitTarget(struct gl_texture_unit *unit) { } else if (unit->Enabled & (TEXTURE_CUBE_BIT)) { return GL_TEXTURE_CUBE_MAP; } + return 0; } static void r300TexEnv(GLcontext * ctx, GLenum target, @@ -1037,13 +1038,17 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, /* This next part feels quite hackish; * is there a cleaner way? */ - struct gl_texture_object *texObj; GLenum target = r300TexUnitTarget(&ctx->Texture.Unit[ctx->Texture.CurrentUnit]); - texObj = _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); - r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; - texObj->LodBias = bias; + if (target) { + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); + r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + texObj->LodBias = bias; - r300SetTexLodBias(t, texObj->LodBias); + r300SetTexLodBias(t, texObj->LodBias); + } + + rmesa->LODBias = bias; break; } @@ -1175,6 +1180,10 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, return NULL; obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + /* Attempt to fill LOD bias, if previously set. + * Should start at 0.0, which won't affect the HW. */ + obj->LodBias = rmesa->LODBias; + r300AllocTexObj(obj); return obj; } -- cgit v1.2.3