From be66ff51ec98cf583044b3e53a49c41edd803134 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 23 Sep 2009 14:40:45 +0100 Subject: st/mesa: trim calculated userbuffer size In get_array_bounds we were previously defining a user buffer sized as (nr_vertices * stride). The trouble is that if the vertex data occupies less than stride bytes, the extra tailing (stride - size) bytes may extend outside the memory actually allocated by the app and caused a segfault. To fix this, define a the buffer bounds to be: ptr .. ptr + (nr-1)*stride + element_size --- src/mesa/state_tracker/st_draw.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 225541a30b..11699d5b1b 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -317,23 +317,29 @@ get_arrays_bounds(const struct st_vertex_program *vp, const GLubyte **low, const GLubyte **high) { const GLubyte *low_addr = NULL; + const GLubyte *high_addr = NULL; GLuint attr; - GLint stride; for (attr = 0; attr < vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; + const GLint stride = arrays[mesaAttr]->StrideB; const GLubyte *start = arrays[mesaAttr]->Ptr; - stride = arrays[mesaAttr]->StrideB; + const unsigned sz = (arrays[mesaAttr]->Size * + _mesa_sizeof_type(arrays[mesaAttr]->Type)); + const GLubyte *end = start + (max_index * stride) + sz; + if (attr == 0) { low_addr = start; + high_addr = end; } else { low_addr = MIN2(low_addr, start); + high_addr = MAX2(high_addr, end); } } *low = low_addr; - *high = low_addr + (max_index + 1) * stride; + *high = high_addr; } -- cgit v1.2.3 From ad935c3f4708417641dd3c257912ccce11485acc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 12:54:14 -0600 Subject: swrast: fix typo in partial derivatives parameter passing --- src/mesa/swrast/s_fragprog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index b71fb9eae9..144f78b024 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -108,7 +108,7 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4], lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */ texdx[1], texdy[1], /* dt/dx, dt/dy */ - texdx[3], texdy[2], /* dq/dx, dq/dy */ + texdx[3], texdy[3], /* dq/dx, dq/dy */ texW, texH, texcoord[0], texcoord[1], texcoord[3], 1.0F / texcoord[3]) + lodBias; -- cgit v1.2.3 From 890f37d4d96471a5c3d8ae286dfc13ad18ff78e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 13:34:30 -0600 Subject: mesa: don't bias LOD in shader interpreter; do it in swrast --- src/mesa/shader/prog_execute.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index 68a59350a1..7cb463cd07 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -1527,17 +1527,12 @@ _mesa_execute_program(GLcontext * ctx, case OPCODE_TXB: /* GL_ARB_fragment_program only */ /* Texel lookup with LOD bias */ { - const GLuint unit = machine->Samplers[inst->TexSrcUnit]; - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; GLfloat texcoord[4], color[4], lodBias; fetch_vector4(&inst->SrcReg[0], machine, texcoord); /* texcoord[3] is the bias to add to lambda */ - lodBias = texUnit->LodBias + texcoord[3]; - if (texUnit->_Current) { - lodBias += texUnit->_Current->LodBias; - } + lodBias = texcoord[3]; fetch_texel(ctx, machine, inst, texcoord, lodBias, color); -- cgit v1.2.3 From 2acd5de22651a3461c0576107c8e8fab1f01469a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 13:35:03 -0600 Subject: swrast: add lod bias when texture sampling Mostly fixes progs/demos/lodbias when MESA_TEX_PROG=1. But the LOD still seems off by -1 or so. May be an issue with the params passed to _swrast_compute_lambda() --- src/mesa/swrast/s_fragprog.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 144f78b024..b326acf3e3 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -89,6 +89,8 @@ fetch_texel_lod( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda, * Fetch a texel with the given partial derivatives to compute a level * of detail in the mipmap. * Called via machine->FetchTexelDeriv() + * \param lodBias the lod bias which may be specified by a TXB instruction, + * otherwise zero. */ static void fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4], @@ -96,7 +98,8 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lodBias, GLuint unit, GLfloat color[4] ) { SWcontext *swrast = SWRAST_CONTEXT(ctx); - const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current; + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + const struct gl_texture_object *texObj = texUnit->_Current; if (texObj) { const struct gl_texture_image *texImg = @@ -111,7 +114,9 @@ fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4], texdx[3], texdy[3], /* dq/dx, dq/dy */ texW, texH, texcoord[0], texcoord[1], texcoord[3], - 1.0F / texcoord[3]) + lodBias; + 1.0F / texcoord[3]); + + lambda += lodBias + texUnit->LodBias + texObj->LodBias; lambda = CLAMP(lambda, texObj->MinLod, texObj->MaxLod); -- cgit v1.2.3 From 84c7afd9e0f2df72d90dd82d38384c4f2f45173e Mon Sep 17 00:00:00 2001 From: Maciej Cencora Date: Sat, 19 Sep 2009 18:45:59 +0200 Subject: r300: fallback to software rendering if we are out of free texcoords Fixes #22741 --- src/mesa/drivers/dri/r300/r300_fragprog_common.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 469c278b51..0bdc90b4a8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -99,8 +99,8 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, { int i; + fp->wpos_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) { - fp->wpos_attr = FRAG_ATTRIB_MAX; return; } @@ -112,6 +112,13 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->wpos_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr); } @@ -127,8 +134,8 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r struct prog_src_register src; int i; + fp->fog_attr = FRAG_ATTRIB_MAX; if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) { - fp->fog_attr = FRAG_ATTRIB_MAX; return; } @@ -140,6 +147,13 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r } } + /* No free texcoord found, fall-back to software rendering */ + if (fp->fog_attr == FRAG_ATTRIB_MAX) + { + compiler->Base.Error = 1; + return; + } + memset(&src, 0, sizeof(src)); src.File = PROGRAM_INPUT; src.Index = fp->fog_attr; -- cgit v1.2.3 From cbab3d7f2a77f187fb688593c17396d4967c75b5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 8 Sep 2009 16:03:25 -0400 Subject: r600: fix dri2 clipping --- src/mesa/drivers/dri/r600/r700_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 1f4724e838..2a0b419256 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1272,8 +1272,8 @@ void r700SetScissor(context_t *context) //--------------- if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; y1 = 0; - x2 = rrb->base.Width - 1; - y2 = rrb->base.Height - 1; + x2 = rrb->base.Width; + y2 = rrb->base.Height; } else { x1 = rrb->dPriv->x; y1 = rrb->dPriv->y; -- cgit v1.2.3 From b1e417413f2da8aad1872fa009949da101156431 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 9 Sep 2009 15:02:16 +1000 Subject: r600: don't setup hardware state if TFP if we have a BO here it means TFP and we should have set it up already. tested by b0le on #radeon --- src/mesa/drivers/dri/r600/r600_texstate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 49b603b65e..6436a5d7e9 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -565,6 +565,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; + if ( t->bo ) { + return GL_TRUE; + } + firstImage = t->base.Image[0][firstlevel]; if (!t->image_override) { -- cgit v1.2.3 From 65b01d449cc594e1c7e1a44c5d87fdc698300e9a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Sep 2009 01:41:46 -0400 Subject: r600: fix ftp for dri1 We use t->bo for dri1 since r600 uses CS for dri1. --- src/mesa/drivers/dri/r600/r600_texstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 6436a5d7e9..f30dd11230 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -565,9 +565,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; - if ( t->bo ) { - return GL_TRUE; - } + if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && + t->image_override && + t->bo) + return; firstImage = t->base.Image[0][firstlevel]; -- cgit v1.2.3 From 6552a103f903a2b767464cd2d267f706a6baf7d5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Sep 2009 11:14:17 -0400 Subject: r600: check if textures are actually enabled before submission noticed by taiu on IRC. --- src/mesa/drivers/dri/r600/r600_texstate.c | 2 +- src/mesa/drivers/dri/r600/r700_chip.c | 118 ++++++++++++++++-------------- 2 files changed, 64 insertions(+), 56 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index f30dd11230..bcb8d7c73d 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -69,7 +69,7 @@ void r600UpdateTextureState(GLcontext * ctx) for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) { texUnit = &ctx->Texture.Unit[unit]; t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); - + r700->textures[unit] = NULL; if (texUnit->_ReallyEnabled) { if (!t) continue; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 37bff56f5a..312cacffda 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -52,38 +52,40 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - if (!t->image_override) - bo = t->mt->bo; - else - bo = t->bo; - if (bo) { - - r700SyncSurf(context, bo, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, - 0, TC_ACTION_ENA_bit); - - BEGIN_BATCH_NO_AUTOSTATE(9 + 4); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH(i * 7); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, - bo, - 0, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, - bo, - r700->textures[i]->SQ_TEX_RESOURCE3, - RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + if (!t->image_override) + bo = t->mt->bo; + else + bo = t->bo; + if (bo) { + + r700SyncSurf(context, bo, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, + 0, TC_ACTION_ENA_bit); + + BEGIN_BATCH_NO_AUTOSTATE(9 + 4); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); + R600_OUT_BATCH(i * 7); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, + bo, + 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, + bo, + r700->textures[i]->SQ_TEX_RESOURCE3, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -98,16 +100,18 @@ static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *at radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(5); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); - R600_OUT_BATCH(i * 3); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); - R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(5); + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3)); + R600_OUT_BATCH(i * 3); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1); + R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -121,16 +125,18 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) { - BEGIN_BATCH_NO_AUTOSTATE(2 + 4); - R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); - R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); - END_BATCH(); - COMMIT_BATCH(); + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) { + BEGIN_BATCH_NO_AUTOSTATE(2 + 4); + R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE); + R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA); + END_BATCH(); + COMMIT_BATCH(); + } } } } @@ -1176,9 +1182,11 @@ static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { - radeonTexObj *t = r700->textures[i]; - if (t) - count++; + if (ctx->Texture.Unit[i]._ReallyEnabled) { + radeonTexObj *t = r700->textures[i]; + if (t) + count++; + } } radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count * 31; -- cgit v1.2.3 From 9edd1a441c3c0c3f018ae561cd5711398ca56f95 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 11 Sep 2009 10:59:05 -0400 Subject: r600: enable caching of vertex programs --- src/mesa/drivers/dri/r600/r600_context.h | 3 + src/mesa/drivers/dri/r600/r700_chip.c | 10 ++- src/mesa/drivers/dri/r600/r700_oglprog.c | 36 ++++++----- src/mesa/drivers/dri/r600/r700_render.c | 9 +-- src/mesa/drivers/dri/r600/r700_vertprog.c | 103 ++++++++++++++++++++---------- src/mesa/drivers/dri/r600/r700_vertprog.h | 11 +++- 6 files changed, 110 insertions(+), 62 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 8ae05a301c..c59df7505a 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -51,6 +51,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r700_chip.h" #include "r600_tex.h" #include "r700_oglprog.h" +#include "r700_vertprog.h" struct r600_context; typedef struct r600_context context_t; @@ -155,6 +156,8 @@ struct r600_context { struct r600_hw_state atoms; + struct r700_vertex_program *selected_vp; + /* Vertex buffers */ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 312cacffda..1b56059197 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -211,8 +211,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, void r700SetupStreams(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; unsigned int i, j = 0; @@ -221,7 +220,7 @@ void r700SetupStreams(GLcontext *ctx) R600_STATECHANGE(context, vtx); for(i=0; imesa_program.Base.InputsRead & (1 << i)) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) { rcommon_emit_vector(ctx, &context->radeon.tcl.aos[j], vb->AttribPtr[i]->data, @@ -237,8 +236,7 @@ void r700SetupStreams(GLcontext *ctx) static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; unsigned int i, j = 0; BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); @@ -258,7 +256,7 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; imesa_program.Base.InputsRead & (1 << i)) { + if(vp->mesa_program->Base.InputsRead & (1 << i)) { /* currently aos are packed */ r700SetupVTXConstants(ctx, i, diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 3c8c1fd7a3..5290ef31be 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -46,7 +46,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, { struct gl_program *pProgram = NULL; - struct r700_vertex_program *vp; + struct r700_vertex_program_cont *vpc; struct r700_fragment_program *fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -56,16 +56,11 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = CALLOC_STRUCT(r700_vertex_program); + vpc = CALLOC_STRUCT(r700_vertex_program_cont); pProgram = _mesa_init_vertex_program(ctx, - &vp->mesa_program, + &vpc->mesa_program, target, id); - vp->translated = GL_FALSE; - vp->loaded = GL_FALSE; - - vp->shaderbo = NULL; - break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -89,7 +84,8 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program * vp; + struct r700_vertex_program_cont * vpc; + struct r700_vertex_program *vp, *tmp; struct r700_fragment_program * fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -99,14 +95,20 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vp = (struct r700_vertex_program*)prog; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); + vpc = (struct r700_vertex_program_cont*)prog; + vp = vpc->progs; + while (vp) { + tmp = vp->next; + /* Release DMA region */ + + r600DeleteShader(ctx, vp->shaderbo); + + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + _mesa_free(vp); + vp = tmp; + } break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 3566bf3ca7..b1c3648ca5 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -319,14 +319,13 @@ static GLuint r700PredictRenderSize(GLcontext* ctx) { context_t *context = R700_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vpc->mesa_program.Base.InputsRead); + context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; for (i = 0; i < vb->PrimitiveCount; i++) @@ -365,7 +364,6 @@ static GLboolean r700RunRender(GLcontext * ctx, /* mark vtx as dirty since it changes per-draw */ R600_STATECHANGE(context, vtx); - r700UpdateShaders(ctx); r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); @@ -427,7 +425,10 @@ static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ /* TODO : sw fallback */ + /* Need shader bo's setup before bo check */ + r700UpdateShaders(ctx); /** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. */ if(!r600ValidateBuffers(ctx)) diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index d107f99e7b..8c2b0071df 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -35,6 +35,7 @@ #include "main/mtypes.h" #include "tnl/t_context.h" +#include "shader/program.h" #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" @@ -258,28 +259,54 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, return GL_TRUE; } -GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, - struct gl_vertex_program *mesa_vp) +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, + struct gl_vertex_program *mesa_vp) { + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + unsigned int unBit; + unsigned int i; + + vp = _mesa_calloc(sizeof(*vp)); + vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + + for(i=0; imesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + { + vp->aos_desc[i].size = vb->AttribPtr[i]->size; + vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ + vp->aos_desc[i].type = GL_FLOAT; + } + } + + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + { + vp->r700AsmCode.bR6xx = 1; + } + //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); Map_Vertex_Program( vp, mesa_vp ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) { - return GL_FALSE; + return NULL; } if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, &(mesa_vp->Base.Instructions[0]), &(vp->r700AsmCode)) ) { - return GL_FALSE; + return NULL; } if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) { - return GL_FALSE; + return NULL; } vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 @@ -289,72 +316,82 @@ GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, vp->translated = GL_TRUE; - return GL_TRUE; + return vp; } void r700SelectVertexShader(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vpc - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program_cont *vpc; + struct r700_vertex_program *vp; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; unsigned int unBit; unsigned int i; + GLboolean match; + vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + +#if 0 if (context->radeon.NewGLState & (_NEW_PROGRAM_CONSTANTS|_NEW_PROGRAM)) { vpc->needUpdateVF = 1; } +#endif - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) + for (vp = vpc->progs; vp; vp = vp->next) { - vpc->r700AsmCode.bR6xx = 1; - } - + match = GL_TRUE; for(i=0; imesa_program.Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ + if(vpc->mesa_program.Base.InputsRead & unBit) { - vpc->aos_desc[i].size = vb->AttribPtr[i]->size; - vpc->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vpc->aos_desc[i].type = GL_FLOAT; + if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) + match = GL_FALSE; + break; } } - - if(GL_FALSE == vpc->translated) { - r700TranslateVertexShader(vpc, &(vpc->mesa_program) ); + if (match) + { + context->selected_vp = vp; + return; } + } + + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + if(!vp) + { + radeon_error("Failed to translate vertex shader. \n"); + return; + } + vp->next = vpc->progs; + vpc->progs = vp; + context->selected_vp = vp; + return; } void * r700GetActiveVpShaderBo(GLcontext * ctx) { - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vp = context->selected_vp;; - return vp->shaderbo; + if (vp) + return vp->shaderbo; + else + return NULL; } GLboolean r700SetupVertexProgram(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); - struct r700_vertex_program *vp - = (struct r700_vertex_program *)ctx->VertexProgram._Current; + struct r700_vertex_program *vp = context->selected_vp; struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; - if (vp->needUpdateVF) - { - vp->loaded = GL_FALSE; - vp->r700Shader.bNeedsAssembly = GL_TRUE; - Process_Vertex_Program_Vfetch_Instructions(vp, &(vp->mesa_program)); - r600DeleteShader(ctx, vp->shaderbo); - } - if(GL_FALSE == vp->loaded) { if(vp->r700Shader.bNeedsAssembly == GL_TRUE) @@ -410,7 +447,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) */ /* sent out shader constants. */ - paramList = vp->mesa_program.Base.Parameters; + paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { _mesa_load_state_parameters(ctx, paramList); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index e2e65021fd..c48764c43b 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -43,7 +43,7 @@ typedef struct ArrayDesc //TEMP struct r700_vertex_program { - struct gl_vertex_program mesa_program; /* Must be first */ + struct gl_vertex_program *mesa_program; /* Must be first */ struct r700_vertex_program *next; @@ -59,6 +59,13 @@ struct r700_vertex_program ArrayDesc aos_desc[VERT_ATTRIB_MAX]; }; +struct r700_vertex_program_cont +{ + struct gl_vertex_program mesa_program; + + struct r700_vertex_program *progs; +}; + //Internal unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, struct gl_vertex_program *mesa_vp, @@ -74,7 +81,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -GLboolean r700TranslateVertexShader(struct r700_vertex_program *vp, +struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, struct gl_vertex_program *mesa_vp); /* Interface */ -- cgit v1.2.3 From 7f5a958c80f0fcd7681d515fd1c1b8bc00524a7a Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 11 Sep 2009 15:59:55 -0400 Subject: r600: fix texcoords from constants with some minor updates from Richard. --- src/mesa/drivers/dri/r600/r700_assembler.c | 92 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2d8480daaf..fda6692725 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1149,41 +1149,49 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - + GLboolean bValidTexCoord = GL_FALSE; - switch (pILInst->SrcReg[0].File) - { + switch (pILInst->SrcReg[0].File) { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + break; case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - - break; + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - } - break; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; } if(GL_TRUE == bValidTexCoord) - { + { setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); } else @@ -1201,7 +1209,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } @@ -2202,7 +2210,9 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) ) + if( GL_TRUE == IsTex(pILInst->Opcode) && + /* handle const moves to temp register */ + !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -3374,28 +3384,30 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: src_const = GL_TRUE; + break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: src_const = GL_FALSE; + break; } - if (GL_TRUE == src_const) + if (GL_TRUE == src_const) { - radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); - return GL_FALSE; + if ( GL_FALSE == mov_temp(pAsm, 0) ) + return GL_FALSE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: + case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: @@ -3418,13 +3430,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } -- cgit v1.2.3 From 93a7ea6ba0d5700e18b28c23da226e055f7c2fa1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Sep 2009 17:08:26 -0400 Subject: r600: fix warning Noticed by rnoland on IRC. --- src/mesa/drivers/dri/r600/r700_assembler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index fda6692725..efeccb25f1 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3387,6 +3387,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: + default: src_const = GL_FALSE; break; } -- cgit v1.2.3 From 9437ac9bccd294bd5a8b838e7ca7597e5dc6d5b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Sep 2009 18:05:15 -0400 Subject: r600: add span support for 1D tiles 1D tile span support for depth/stencil/color/textures Z and stencil buffers are always tiled, so this fixes sw access to Z and stencil buffers. color and textures are currently linear, but this adds span support when we implement 1D tiling. This fixes the text in progs/demos/engine and progs/tests/z* --- src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h | 2 + src/mesa/drivers/dri/r600/r700_chip.c | 2 +- src/mesa/drivers/dri/radeon/radeon_span.c | 220 +++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h index 9d5aa3c7e4..edd85b0fac 100644 --- a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h +++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h @@ -1366,6 +1366,7 @@ enum { DB_DEPTH_INFO__READ_SIZE_bit = 1 << 3, DB_DEPTH_INFO__ARRAY_MODE_mask = 0x0f << 15, DB_DEPTH_INFO__ARRAY_MODE_shift = 15, + ARRAY_1D_TILED_THIN1 = 0x02, ARRAY_2D_TILED_THIN1 = 0x04, TILE_SURFACE_ENABLE_bit = 1 << 25, TILE_COMPACT_bit = 1 << 26, @@ -1449,6 +1450,7 @@ enum { CB_COLOR0_INFO__ARRAY_MODE_shift = 8, ARRAY_LINEAR_GENERAL = 0x00, ARRAY_LINEAR_ALIGNED = 0x01, +/* ARRAY_1D_TILED_THIN1 = 0x02, */ /* ARRAY_2D_TILED_THIN1 = 0x04, */ NUMBER_TYPE_mask = 0x07 << 12, NUMBER_TYPE_shift = 12, diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 1b56059197..06d7e9c9ab 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -351,7 +351,7 @@ static void r700SetDepthTarget(context_t *context) SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16, DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask); } - SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_2D_TILED_THIN1, + SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1, DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask); /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */ } diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 4e100d854e..aa2035338c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -106,6 +106,142 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, } #endif +/* r600 tiling + * two main types: + * - 1D (akin to macro-linear/micro-tiled on older asics) + * - 2D (akin to macro-tiled/micro-tiled on older asics) + * only 1D tiling is implemented below + */ +#if defined(RADEON_COMMON_FOR_R600) +static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) +{ + GLint element_bytes = rrb->cpp; + GLint num_samples = 1; + GLint tile_width = 8; + GLint tile_height = 8; + GLint tile_thickness = 1; + GLint pitch_elements = rrb->pitch / element_bytes; + GLint height = rrb->base.Height; + GLint z = 0; + GLint sample_number = 0; + /* */ + GLint tile_bytes; + GLint tiles_per_row; + GLint tiles_per_slice; + GLint slice_offset; + GLint tile_row_index; + GLint tile_column_index; + GLint tile_offset; + GLint pixel_number = 0; + GLint element_offset; + GLint offset = 0; + + tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; + tiles_per_row = pitch_elements /tile_width; + tiles_per_slice = tiles_per_row * (height / tile_height); + slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; + tile_row_index = y / tile_height; + tile_column_index = x / tile_width; + tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes; + + if (is_depth) { + GLint pixel_offset = 0; + + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] + pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + switch (element_bytes) { + case 2: + pixel_offset = pixel_number * element_bytes * num_samples; + element_offset = pixel_offset + (sample_number * element_bytes); + break; + case 4: + /* stencil and depth data are stored separately within a tile. + * stencil is stored in a contiguous tile before the depth tile. + * stencil element is 1 byte, depth element is 3 bytes. + * stencil tile is 64 bytes. + */ + if (is_stencil) + pixel_offset = pixel_number * 1 * num_samples; + else + pixel_offset = (pixel_number * 3 * num_samples) + 64; + break; + } + element_offset = pixel_offset + (sample_number * element_bytes); + } else { + GLint sample_offset; + + switch (element_bytes) { + case 1: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 2: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 4: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] + pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + } + sample_offset = sample_number * (tile_bytes / num_samples); + element_offset = sample_offset + (pixel_number * element_bytes); + } + offset = slice_offset + tile_offset + element_offset; + return offset; +} + +/* depth buffers */ +static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); + return &ptr[offset]; +} + +static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = r600_1d_tile_helper(rrb, x, y, 0, 0); + } + return &ptr[offset]; +} + +#endif + /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear macro-linear/micro-tiled @@ -270,7 +406,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, ARGB1555 color spanline and pixel functions @@ -280,7 +420,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB1555 #define TAG2(x,y) radeon##x##_ARGB1555##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 16 bit, RGBA4 color spanline and pixel functions @@ -290,7 +434,11 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB4444 #define TAG2(x,y) radeon##x##_ARGB4444##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) +#else #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#endif #include "spantmp2.h" /* 32 bit, xRGB8888 color spanline and pixel functions @@ -300,11 +448,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_xRGB8888 #define TAG2(x,y) radeon##x##_xRGB8888##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -314,11 +470,19 @@ s8z24_to_z24s8(uint32_t val) #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y +#if defined(RADEON_COMMON_FOR_R600) +#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) #define PUT_VALUE(_x, _y, d) { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#endif #include "spantmp2.h" /* ================================================================ @@ -342,6 +506,9 @@ s8z24_to_z24s8(uint32_t val) #if defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d #else #define WRITE_DEPTH( _x, _y, d ) \ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d @@ -350,6 +517,9 @@ s8z24_to_z24s8(uint32_t val) #if defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) #else #define READ_DEPTH( d, _x, _y ) \ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) @@ -374,6 +544,15 @@ do { \ tmp |= ((d << 8) & 0xffffff00); \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -399,6 +578,11 @@ do { \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ + }while(0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ @@ -426,6 +610,20 @@ do { \ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ *_ptr = d; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= (((d) >> 8) & 0x00ffffff); \ + *_ptr = tmp; \ + _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_DEPTH( _x, _y, d ) \ do { \ @@ -447,6 +645,12 @@ do { \ do { \ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ }while(0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \ + d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \ + }while(0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ do { \ @@ -476,6 +680,15 @@ do { \ tmp |= (d) & 0xff; \ *_ptr = tmp; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0xffffff00; \ + tmp |= (d) & 0xff; \ + *_ptr = tmp; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define WRITE_STENCIL( _x, _y, d ) \ do { \ @@ -503,6 +716,13 @@ do { \ GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R600) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = tmp & 0x000000ff; \ +} while (0) #elif defined(RADEON_COMMON_FOR_R200) #define READ_STENCIL( d, _x, _y ) \ do { \ -- cgit v1.2.3 From 2cd2dc34ac93dd929ec8f01cf1f7f8dfa6b34d0d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Tue, 15 Sep 2009 11:27:51 -0400 Subject: r600: support position_invariant programs --- src/mesa/drivers/dri/r600/r700_vertprog.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 8c2b0071df..9ee26286d9 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -42,6 +42,7 @@ #include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" +#include "shader/programopt.c" #include "r700_debug.h" #include "r700_vertprog.h" @@ -272,6 +273,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, vp = _mesa_calloc(sizeof(*vp)); vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); + if (mesa_vp->IsPositionInvariant) + { + _mesa_insert_mvp_code(ctx, vp->mesa_program); + } + for(i=0; ir700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, mesa_vp ); + Map_Vertex_Program( vp, vp->mesa_program ); - if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, mesa_vp)) + if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { return NULL; } - if(GL_FALSE == AssembleInstr(mesa_vp->Base.NumInstructions, - &(mesa_vp->Base.Instructions[0]), + if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { return NULL; } - if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), mesa_vp->Base.OutputsWritten) ) + if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { return NULL; } @@ -329,23 +335,23 @@ void r700SelectVertexShader(GLcontext *ctx) unsigned int unBit; unsigned int i; GLboolean match; + GLbitfield InputsRead; vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; -#if 0 - if (context->radeon.NewGLState & (_NEW_PROGRAM_CONSTANTS|_NEW_PROGRAM)) + InputsRead = vpc->mesa_program.Base.InputsRead; + if (vpc->mesa_program.IsPositionInvariant) { - vpc->needUpdateVF = 1; - } -#endif - + InputsRead |= VERT_BIT_POS; + } + for (vp = vpc->progs; vp; vp = vp->next) { match = GL_TRUE; for(i=0; imesa_program.Base.InputsRead & unBit) + if(InputsRead & unBit) { if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) match = GL_FALSE; -- cgit v1.2.3 From dbec27be856584bc5205c7eeeca2b7e98299d4cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Sep 2009 16:58:37 -0400 Subject: r600: minor span cleanups --- src/mesa/drivers/dri/radeon/radeon_span.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index aa2035338c..9959da011e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -113,8 +113,8 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, * only 1D tiling is implemented below */ #if defined(RADEON_COMMON_FOR_R600) -static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, - GLint x, GLint y, GLint is_depth, GLint is_stencil) +static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) { GLint element_bytes = rrb->cpp; GLint num_samples = 1; @@ -138,7 +138,7 @@ static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, GLint offset = 0; tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; - tiles_per_row = pitch_elements /tile_width; + tiles_per_row = pitch_elements / tile_width; tiles_per_slice = tiles_per_row * (height / tile_height); slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; tile_row_index = y / tile_height; @@ -157,7 +157,6 @@ static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, switch (element_bytes) { case 2: pixel_offset = pixel_number * element_bytes * num_samples; - element_offset = pixel_offset + (sample_number * element_bytes); break; case 4: /* stencil and depth data are stored separately within a tile. -- cgit v1.2.3 From ec14d59afa952b4e53ad268971098584686a6fca Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Sep 2009 17:12:03 -0400 Subject: radeon: don't build non-r600 span code on r600 --- src/mesa/drivers/dri/radeon/radeon_span.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 9959da011e..d603f52df7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -239,7 +239,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, return &ptr[offset]; } -#endif +#else /* radeon tiling on r300-r500 has 4 states, macro-linear/micro-linear @@ -332,7 +332,10 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, return &ptr[offset]; } +#endif + #ifndef COMPILE_R300 +#ifndef COMPILE_R600 static uint32_t z24s8_to_s8z24(uint32_t val) { @@ -345,6 +348,7 @@ s8z24_to_z24s8(uint32_t val) return (val >> 24) | (val << 8); } #endif +#endif /* * Note that all information needed to access pixels in a renderbuffer -- cgit v1.2.3 From 095db818c6c7ed5706b5f31d17d0cb19c03cb67a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 19 Sep 2009 14:46:06 -0400 Subject: r600: fix polygon offset --- src/mesa/drivers/dri/r600/r700_state.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 2a0b419256..d8190efe47 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -1122,20 +1122,25 @@ static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) // context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLfloat constant = units; + GLchar depth = 0; + + R600_STATECHANGE(context, poly); switch (ctx->Visual.depthBits) { case 16: constant *= 4.0; + depth = -16; break; case 24: constant *= 2.0; + depth = -24; break; } factor *= 12.0; - - R600_STATECHANGE(context, poly); - + SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth, + POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask); + //r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //??? r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; -- cgit v1.2.3 From 48559c76056e09ca4f9e4f39e9008f6d32ecd5b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 19 Sep 2009 15:18:42 -0400 Subject: r600: fix point sizes registers takes radius --- src/mesa/drivers/dri/r600/r700_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index d8190efe47..8571563149 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -837,9 +837,9 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); /* format is 12.4 fixed point */ - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); } @@ -854,11 +854,11 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa /* format is 12.4 fixed point */ switch (pname) { case GL_POINT_SIZE_MIN: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), MIN_SIZE_shift, MIN_SIZE_mask); break; case GL_POINT_SIZE_MAX: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), MAX_SIZE_shift, MAX_SIZE_mask); break; case GL_POINT_DISTANCE_ATTENUATION: -- cgit v1.2.3 From ed91d103477d563f73be3555d1022ec9af073467 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 21 Sep 2009 10:14:25 -0400 Subject: r600: fix some issues with LIT instruction - MUL_LIT is ALU.Trans instruction - some Trans instructions can take 3 arguments - don't clobber dst.x, use dst.z as temp, it'll get written correct value in last insn - respect source swizzles --- src/mesa/drivers/dri/r600/r700_assembler.c | 69 ++++++++++++++++-------------- 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index efeccb25f1..f46bc32201 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2024,7 +2024,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2880,6 +2880,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2891,11 +2896,6 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2909,34 +2909,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2948,29 +2961,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { -- cgit v1.2.3 From 28308c92605229129a12a2273dda47c6a2ca4790 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:30:14 -0400 Subject: r600: various cleanups - max texture size is 8k, but mesa doesn't support that at the moment. - attempt to set shader limits to what the hw actually supports - clean up some old r300 cruft - no need to explicitly disable irqs. This is fixed in the drm now. Signed-off-by: Alex Deucher --- src/mesa/drivers/dri/r600/r600_context.c | 43 +++++++++++----------- src/mesa/drivers/dri/r600/r600_context.h | 19 ---------- .../drivers/dri/radeon/radeon_common_context.c | 7 +--- 3 files changed, 24 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 251c124cbf..414c5aec59 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -281,8 +281,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->Const.MaxTextureMaxAnisotropy = 16.0; ctx->Const.MaxTextureLodBias = 16.0; - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ ctx->Const.MinPointSize = 0x0001 / 8.0; ctx->Const.MinPointSizeAA = 0x0001 / 8.0; @@ -328,25 +328,26 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_vertex_fog(ctx, GL_TRUE); /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index c59df7505a..9397ecde81 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -86,29 +86,10 @@ extern int hw_tcl_on; #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, -}; - struct r600_hw_state { struct radeon_state_atom sq; struct radeon_state_atom db; diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 71ee06d9a7..330721acee 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -227,11 +227,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon, fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); radeon->iw.irq_seq = -1; radeon->irqsEmitted = 0; - if (IS_R600_CLASS(radeon->radeonScreen)) - radeon->do_irqs = 0; - else - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); -- cgit v1.2.3 From 639fb1472d09281a8df3792c9bcbc59cd4424688 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Sep 2009 16:48:55 -0400 Subject: r600: fix typo in the last commit 128 gprs, 256 reg-based consts --- src/mesa/drivers/dri/r600/r600_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 414c5aec59..e0b77d4385 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -327,16 +327,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ + /* 256 for reg-based consts, inline consts also supported */ ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ ctx->Const.VertexProgram.MaxNativeInstructions = 8192; ctx->Const.VertexProgram.MaxNativeAttribs = 160; - ctx->Const.VertexProgram.MaxTemps = 256; /* 256 for reg-based constants, inline consts also supported */ - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* ??? */ + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ - ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeTemps = 128; ctx->Const.FragmentProgram.MaxNativeAttribs = 32; ctx->Const.FragmentProgram.MaxNativeParameters = 256; ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; -- cgit v1.2.3 From 2058dfaa47704abc62aa5aa9719013624f26764d Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Wed, 23 Sep 2009 14:20:59 +0300 Subject: r600: add support for CUBE textures, also TXP seems to work here ... --- src/mesa/drivers/dri/r600/r700_assembler.c | 306 ++++++++++++++++++++++++----- src/mesa/drivers/dri/r600/r700_assembler.h | 4 + 2 files changed, 263 insertions(+), 47 deletions(-) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index f46bc32201..00eda544d4 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -213,7 +213,7 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) { if (dest->dst.op3 == 0) { - if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) { return GL_TRUE; } @@ -350,6 +350,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_PRED_SETNE: case SQ_OP2_INST_DOT4: case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: return 2; case SQ_OP2_INST_MOV: @@ -469,6 +470,9 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->number_of_inputs = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + return 0; } @@ -682,7 +686,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, // If this clause constains any TEX instruction that is dependent on a previous instruction, // set the barrier bit - if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) { pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; } @@ -1152,42 +1156,48 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) GLboolean bValidTexCoord = GL_FALSE; + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { switch (pILInst->SrcReg[0].File) { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - bValidTexCoord = GL_TRUE; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = pAsm->aArgSubst[1]; - break; - case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + - pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - } - break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; + } } if(GL_TRUE == bValidTexCoord) @@ -1955,7 +1965,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { is_single_scalar_operation = GL_FALSE; number_of_scalar_operations = 4; - + +/* current assembler doesn't do more than 1 register per source */ +#if 0 /* check read port, only very preliminary algorithm, not count in src0/1 same comp case and prev slot repeat case; also not count relative addressing. TODO: improve performance. */ @@ -1990,6 +2002,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { bSplitInst = GL_TRUE; } +#endif } contiguous_slots_needed = 0; @@ -2210,9 +2223,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) && - /* handle const moves to temp register */ - !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + if( GL_TRUE == pAsm->is_tex ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -2256,7 +2267,8 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; - + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; return GL_TRUE; } @@ -3379,7 +3391,10 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + checkop1(pAsm); + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { case PROGRAM_CONSTANT: @@ -3399,20 +3414,18 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { if ( GL_FALSE == mov_temp(pAsm, 0) ) return GL_FALSE; + need_barrier = GL_TRUE; } switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: radeon_error("Internal error: bad texture op (not TEX)\n"); @@ -3420,6 +3433,190 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) break; } + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently + * have to do explicit instruction + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + * also no support for imm constants, so add 1 here + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1); + + next_ins(pAsm); + + /* ADD the remaining .5 */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5 + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } // Set src1 to tex unit id pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; @@ -3440,10 +3637,25 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - return GL_FALSE; + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c6..73bb8bac55 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -374,6 +374,10 @@ typedef struct r700_AssemblerBase struct prog_instruction * pILInst; GLuint uiCurInst; GLboolean bR6xx; + /* helper to decide which type of instruction to assemble */ + GLboolean is_tex; + /* we inserted helper intructions and need barrier on next TEX ins */ + GLboolean need_tex_barrier; } r700_AssemblerBase; //Internal use -- cgit v1.2.3 From e44c084be536c021985a8908db4300c764c63bbc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 15:44:02 -0600 Subject: glsl: fix missing initializers warning --- src/mesa/shader/slang/slang_builtin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/shader/slang/slang_builtin.c b/src/mesa/shader/slang/slang_builtin.c index bef0f85653..e5809509c9 100644 --- a/src/mesa/shader/slang/slang_builtin.c +++ b/src/mesa/shader/slang/slang_builtin.c @@ -741,7 +741,7 @@ static const struct input_info vertInputs[] = { { "gl_MultiTexCoord5", VERT_ATTRIB_TEX5, GL_FLOAT_VEC4, SWIZZLE_NOOP }, { "gl_MultiTexCoord6", VERT_ATTRIB_TEX6, GL_FLOAT_VEC4, SWIZZLE_NOOP }, { "gl_MultiTexCoord7", VERT_ATTRIB_TEX7, GL_FLOAT_VEC4, SWIZZLE_NOOP }, - { NULL, 0, SWIZZLE_NOOP } + { NULL, 0, GL_NONE, SWIZZLE_NOOP } }; /** Predefined fragment shader inputs */ @@ -754,7 +754,7 @@ static const struct input_info fragInputs[] = { { "gl_FogFragCoord", FRAG_ATTRIB_FOGC, GL_FLOAT, SWIZZLE_XXXX }, { "gl_FrontFacing", FRAG_ATTRIB_FACE, GL_FLOAT, SWIZZLE_XXXX }, { "gl_PointCoord", FRAG_ATTRIB_PNTC, GL_FLOAT_VEC2, SWIZZLE_XYZW }, - { NULL, 0, SWIZZLE_NOOP } + { NULL, 0, GL_NONE, SWIZZLE_NOOP } }; -- cgit v1.2.3 From 00ddd4f9e9680132872f98f2d18b52dfc30c6f2f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 15:44:18 -0600 Subject: glsl: init var to silence warning --- src/mesa/shader/slang/slang_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 349f432dec..703af9f874 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -2775,7 +2775,7 @@ _slang_gen_while(slang_assemble_ctx * A, slang_operation *oper) * body code (child[1]) */ slang_ir_node *loop, *breakIf, *body; - GLboolean isConst, constTrue; + GLboolean isConst, constTrue = GL_FALSE; if (!A->EmitContReturn) { /* We don't want to emit CONT instructions. If this while-loop has -- cgit v1.2.3 From a491e25b1fa8683f538ed0d67a6389f2cdf7e4bc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Sep 2009 15:44:37 -0600 Subject: mesa: added default case return to silence warning --- src/mesa/main/texenvprogram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index 2f3e47e69e..d7e77e759e 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -276,6 +276,7 @@ need_saturate( GLuint mode ) return GL_TRUE; default: assert(0); + return GL_FALSE; } } -- cgit v1.2.3 From a64d4516a0d6219dec0b5b0622215918469faecc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Sep 2009 10:26:56 -0600 Subject: tgsi/sse: Pass the lodbias, not zero. More comments. This fixes the glean/glsl1 "texture2D(), with bias" test when using SSE. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3cdf8b9f35..1e719940ec 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1431,11 +1431,11 @@ fetch_texel( struct tgsi_sampler **sampler, { float rgba[NUM_CHANNELS][QUAD_SIZE]; (*sampler)->get_samples(*sampler, - &store[0], - &store[4], - &store[8], - 0.0f, /*store[12], lodbias */ - rgba); + &store[0], /* s */ + &store[4], /* t */ + &store[8], /* r */ + store[12], /* lodbias */ + rgba); /* results */ memcpy( store, rgba, 16 * sizeof(float)); } -- cgit v1.2.3