diff options
Diffstat (limited to 'src/mesa/state_tracker')
23 files changed, 935 insertions, 1101 deletions
diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index 5d4d8eee02..77153889b6 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -39,6 +39,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" +#include "st_debug.h" #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" @@ -75,7 +76,7 @@ void st_upload_constants( struct st_context *st, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); - if (0) { + if (ST_DEBUG & DEBUG_CONSTANTS) { debug_printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", __FUNCTION__, shader_type, params->NumParameters, params->StateFlags); diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 0aa128f947..88b80a07fc 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -104,10 +104,6 @@ update_depth_stencil_alpha(struct st_context *st) dsa->depth.func = st_compare_func_to_pipe(ctx->Depth.Func); } - if (ctx->Query.CurrentOcclusionObject && - ctx->Query.CurrentOcclusionObject->Active) - dsa->depth.occlusion_count = 1; - if (ctx->Stencil.Enabled && ctx->DrawBuffer->Visual.stencilBits > 0) { dsa->stencil[0].enabled = 1; dsa->stencil[0].func = st_compare_func_to_pipe(ctx->Stencil.Function[0]); diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 5209a6a0c9..e18c0f6e0a 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -39,6 +39,7 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" +#include "util/u_rect.h" @@ -162,10 +163,17 @@ update_framebuffer_state( struct st_context *st ) (void) st_get_framebuffer_surface(stfb, ST_SURFACE_FRONT_LEFT, &surf_front); (void) st_get_framebuffer_surface(stfb, ST_SURFACE_BACK_LEFT, &surf_back); - st->pipe->surface_copy(st->pipe, - surf_front, 0, 0, /* dest */ - surf_back, 0, 0, /* src */ - fb->Width, fb->Height); + if (st->pipe->surface_copy) { + st->pipe->surface_copy(st->pipe, + surf_front, 0, 0, /* dest */ + surf_back, 0, 0, /* src */ + fb->Width, fb->Height); + } else { + util_surface_copy(st->pipe, FALSE, + surf_front, 0, 0, + surf_back, 0, 0, + fb->Width, fb->Height); + } } /* we're assuming we'll really draw to the front buffer */ st->frontbuffer_status = FRONT_STATUS_DIRTY; diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index eff3666ca8..babfcc87b4 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -122,7 +122,8 @@ create_color_map_texture(GLcontext *ctx) const uint texSize = 256; /* simple, and usually perfect */ /* find an RGBA texture format */ - format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); + format = st_choose_format(pipe->screen, GL_RGBA, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); /* create texture for color map/table */ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 95181578f6..a6b9765452 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -34,6 +34,7 @@ #include "main/image.h" #include "main/macros.h" +#include "st_debug.h" #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_fbo.h" @@ -136,6 +137,9 @@ accum_accum(struct st_context *st, GLfloat value, GLubyte *data = acc_strb->data; GLfloat *buf; + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + color_trans = st_cond_flush_get_tex_transfer(st, color_strb->texture, 0, 0, 0, PIPE_TRANSFER_READ, xpos, ypos, @@ -181,6 +185,10 @@ accum_load(struct st_context *st, GLfloat value, GLubyte *data = acc_strb->data; GLfloat *buf; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + color_trans = st_cond_flush_get_tex_transfer(st, color_strb->texture, 0, 0, 0, PIPE_TRANSFER_READ, xpos, ypos, @@ -228,6 +236,9 @@ accum_return(GLcontext *ctx, GLfloat value, const GLubyte *data = acc_strb->data; GLfloat *buf; + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + buf = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat)); if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) @@ -241,7 +252,7 @@ accum_return(GLcontext *ctx, GLfloat value, xpos, ypos, width, height); - if (usage != PIPE_TRANSFER_WRITE) + if (usage & PIPE_TRANSFER_READ) pipe_get_tile_rgba(color_trans, 0, 0, width, height, buf); switch (acc_strb->format) { diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 902fb38d1a..a22fa68299 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -330,7 +330,18 @@ setup_bitmap_vertex_data(struct st_context *st, const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); - const GLuint max_slots = 4096 / sizeof(st->bitmap.vertices); + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */ GLuint i; if (st->bitmap.vbuf_slot >= max_slots) { diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index c741940bcf..5626e25323 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -39,6 +39,7 @@ #include "shader/prog_print.h" #include "st_context.h" +#include "st_texture.h" #include "st_program.h" #include "st_cb_blit.h" #include "st_cb_fbo.h" @@ -110,17 +111,50 @@ st_BlitFramebuffer(GLcontext *ctx, } if (mask & GL_COLOR_BUFFER_BIT) { - struct st_renderbuffer *srcRb = - st_renderbuffer(readFB->_ColorReadBuffer); - struct st_renderbuffer *dstRb = - st_renderbuffer(drawFB->_ColorDrawBuffers[0]); - struct pipe_surface *srcSurf = srcRb->surface; - struct pipe_surface *dstSurf = dstRb->surface; - - util_blit_pixels(st->blit, - srcSurf, srcX0, srcY0, srcX1, srcY1, - dstSurf, dstX0, dstY0, dstX1, dstY1, - 0.0, pFilter); + struct gl_renderbuffer_attachment *srcAtt = + &readFB->Attachment[readFB->_ColorReadBufferIndex]; + + if(srcAtt->Type == GL_TEXTURE) { + struct pipe_screen *screen = ctx->st->pipe->screen; + const struct st_texture_object *srcObj = + st_texture_object(srcAtt->Texture); + struct st_renderbuffer *dstRb = + st_renderbuffer(drawFB->_ColorDrawBuffers[0]); + struct pipe_surface *srcSurf; + struct pipe_surface *dstSurf = dstRb->surface; + + if (!srcObj->pt) + return; + + srcSurf = screen->get_tex_surface(screen, + srcObj->pt, + srcAtt->CubeMapFace, + srcAtt->TextureLevel, + srcAtt->Zoffset, + PIPE_BUFFER_USAGE_GPU_READ); + if(!srcSurf) + return; + + util_blit_pixels(st->blit, + srcSurf, srcX0, srcY0, srcX1, srcY1, + dstSurf, dstX0, dstY0, dstX1, dstY1, + 0.0, pFilter); + + pipe_surface_reference(&srcSurf, NULL); + } + else { + struct st_renderbuffer *srcRb = + st_renderbuffer(readFB->_ColorReadBuffer); + struct st_renderbuffer *dstRb = + st_renderbuffer(drawFB->_ColorDrawBuffers[0]); + struct pipe_surface *srcSurf = srcRb->surface; + struct pipe_surface *dstSurf = dstRb->surface; + + util_blit_pixels(st->blit, + srcSurf, srcX0, srcY0, srcX1, srcY1, + dstSurf, dstX0, dstY0, dstX1, dstY1, + 0.0, pFilter); + } } if (mask & depthStencil) { diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 8a8c99f7e1..36510720a4 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -116,7 +116,18 @@ draw_quad(GLcontext *ctx, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; - const GLuint max_slots = 1024 / sizeof(st->clear.vertices); + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ GLuint i; if (st->clear.vbuf_slot >= max_slots) { diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index c655690603..be44577117 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -40,6 +40,7 @@ #include "shader/prog_parameter.h" #include "shader/prog_print.h" +#include "st_debug.h" #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" @@ -61,6 +62,7 @@ #include "util/u_tile.h" #include "util/u_draw_quad.h" #include "util/u_math.h" +#include "util/u_rect.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" @@ -98,7 +100,7 @@ is_passthrough_program(const struct gl_fragment_program *prog) static struct st_fragment_program * combined_drawpix_fragment_program(GLcontext *ctx) { - struct st_context *st = ctx->st; + struct st_context *st = st_context(ctx); struct st_fragment_program *stfp; if (st->pixel_xfer.program->serialNo == st->pixel_xfer.xfer_prog_sn @@ -445,8 +447,8 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, GLfloat x1, GLfloat y1, const GLfloat *color, GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) { - struct st_context *st = ctx->st; - struct pipe_context *pipe = ctx->st->pipe; + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ /* setup vertex data */ @@ -540,9 +542,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { - struct st_context *st = ctx->st; - struct pipe_context *pipe = ctx->st->pipe; - struct cso_context *cso = ctx->st->cso_context; + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; GLfloat x0, y0, x1, y1; GLsizei maxSize; @@ -652,7 +654,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) { - struct st_context *st = ctx->st; + struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *strb; @@ -793,7 +795,7 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, { struct st_fragment_program *stfp; struct st_vertex_program *stvp; - struct st_context *st = ctx->st; + struct st_context *st = st_context(ctx); struct pipe_surface *ps; const GLfloat *color; @@ -811,21 +813,21 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, if (format == GL_DEPTH_COMPONENT) { ps = st->state.framebuffer.zsbuf; - stfp = make_fragment_shader_z(ctx->st); - stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); + stfp = make_fragment_shader_z(st); + stvp = st_make_passthrough_vertex_shader(st, GL_TRUE); color = ctx->Current.RasterColor; } else { ps = st->state.framebuffer.cbufs[0]; stfp = combined_drawpix_fragment_program(ctx); - stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); + stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); color = NULL; } /* draw with textured quad */ { struct pipe_texture *pt - = make_texture(ctx->st, width, height, format, type, unpack, pixels); + = make_texture(st, width, height, format, type, unpack, pixels); if (pt) { draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, @@ -942,7 +944,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type) { - struct st_context *st = ctx->st; + struct st_context *st = st_context(ctx); struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *rbRead; @@ -995,14 +997,14 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, rbRead = st_get_color_read_renderbuffer(ctx); color = NULL; stfp = combined_drawpix_fragment_program(ctx); - stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); + stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); } else { assert(type == GL_DEPTH); rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; - stfp = make_fragment_shader_z(ctx->st); - stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); + stfp = make_fragment_shader_z(st); + stvp = st_make_passthrough_vertex_shader(st, GL_TRUE); } srcFormat = rbRead->texture->format; @@ -1014,13 +1016,14 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, else { /* srcFormat can't be used as a texture format */ if (type == GL_DEPTH) { - texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE_2D, + texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_DEPTH_STENCIL); assert(texFormat != PIPE_FORMAT_NONE); /* XXX no depth texture formats??? */ } else { /* default color format */ - texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, + texFormat = st_choose_format(screen, GL_RGBA, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); assert(texFormat != PIPE_FORMAT_NONE); } @@ -1059,7 +1062,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, assert(pth <= maxSize); } - pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, texFormat, 0, + pt = st_texture_create(st, PIPE_TEXTURE_2D, texFormat, 0, ptw, pth, 1, PIPE_TEXTURE_USAGE_SAMPLER); if (!pt) @@ -1073,11 +1076,19 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, PIPE_BUFFER_USAGE_GPU_READ); struct pipe_surface *psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE ); - pipe->surface_copy(pipe, - psTex, /* dest */ - 0, 0, /* destx/y */ - psRead, - srcx, srcy, width, height); + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + psTex, /* dest */ + 0, 0, /* destx/y */ + psRead, + srcx, srcy, width, height); + } else { + util_surface_copy(pipe, FALSE, + psTex, + 0, 0, + psRead, + srcx, srcy, width, height); + } pipe_surface_reference(&psRead, NULL); pipe_surface_reference(&psTex, NULL); } @@ -1090,6 +1101,9 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, struct pipe_transfer *ptTex; enum pipe_transfer_usage transfer_usage; + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + if (type == GL_DEPTH && pf_is_depth_and_stencil(pt->format)) transfer_usage = PIPE_TRANSFER_READ_WRITE; else diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index e8399ded7b..73aa65955b 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -49,6 +49,7 @@ #include "st_public.h" #include "st_texture.h" +#include "util/u_rect.h" /** @@ -93,7 +94,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, if (strb->format != PIPE_FORMAT_NONE) format = strb->format; else - format = st_choose_renderbuffer_format(pipe, internalFormat); + format = st_choose_renderbuffer_format(pipe->screen, internalFormat); /* init renderbuffer fields */ strb->Base.Width = width; @@ -384,6 +385,7 @@ st_render_texture(GLcontext *ctx, rb->Width = texImage->Width2; rb->Height = texImage->Height2; + rb->_BaseFormat = texImage->_BaseFormat; /*printf("***** render to texture level %d: %d x %d\n", att->TextureLevel, rb->Width, rb->Height);*/ /*printf("***** pipe texture %d x %d\n", pt->width[0], pt->height[0]);*/ @@ -537,10 +539,17 @@ copy_back_to_front(struct st_context *st, (void) st_get_framebuffer_surface(stfb, backIndex, &surf_back); if (surf_front && surf_back) { - st->pipe->surface_copy(st->pipe, - surf_front, 0, 0, /* dest */ - surf_back, 0, 0, /* src */ - fb->Width, fb->Height); + if (st->pipe->surface_copy) { + st->pipe->surface_copy(st->pipe, + surf_front, 0, 0, /* dest */ + surf_back, 0, 0, /* src */ + fb->Width, fb->Height); + } else { + util_surface_copy(st->pipe, FALSE, + surf_front, 0, 0, + surf_back, 0, 0, + fb->Width, fb->Height); + } } } diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 4398ab2839..b2d5c39a3a 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -45,6 +45,7 @@ #include "st_context.h" #include "st_program.h" #include "st_atom_shader.h" +#include "st_mesa_to_tgsi.h" #include "st_cb_program.h" @@ -152,7 +153,7 @@ st_delete_program(GLcontext *ctx, struct gl_program *prog) } if (stvp->state.tokens) { - _mesa_free((void *) stvp->state.tokens); + st_free_tokens(stvp->state.tokens); stvp->state.tokens = NULL; } } @@ -167,7 +168,7 @@ st_delete_program(GLcontext *ctx, struct gl_program *prog) } if (stfp->state.tokens) { - _mesa_free((void *) stfp->state.tokens); + st_free_tokens(stfp->state.tokens); stfp->state.tokens = NULL; } @@ -214,7 +215,7 @@ static void st_program_string_notify( GLcontext *ctx, } if (stfp->state.tokens) { - _mesa_free((void *) stfp->state.tokens); + st_free_tokens(stfp->state.tokens); stfp->state.tokens = NULL; } @@ -242,7 +243,7 @@ static void st_program_string_notify( GLcontext *ctx, } if (stvp->state.tokens) { - _mesa_free((void *) stvp->state.tokens); + st_free_tokens(stvp->state.tokens); stvp->state.tokens = NULL; } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 75424aa2e7..772bb3bb69 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -43,6 +43,7 @@ #include "pipe/p_inlines.h" #include "util/u_tile.h" +#include "st_debug.h" #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" @@ -416,6 +417,9 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, yStep = 1; } + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + /* * Copy pixels from pipe_transfer to user memory */ diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 50675b5896..a1953342b4 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -43,6 +43,7 @@ #include "main/texobj.h" #include "main/texstore.h" +#include "state_tracker/st_debug.h" #include "state_tracker/st_context.h" #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_cb_texture.h" @@ -328,10 +329,13 @@ guess_and_alloc_texture(struct st_context *st, stObj->base.MinFilter == GL_LINEAR || stImage->base._BaseFormat == GL_DEPTH_COMPONENT || stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) && + !stObj->base.GenerateMipmap && stImage->level == firstLevel) { + /* only alloc space for a single mipmap level */ lastLevel = firstLevel; } else { + /* alloc space for a full mipmap */ GLuint l2width = util_logbase2(width); GLuint l2height = util_logbase2(height); GLuint l2depth = util_logbase2(depth); @@ -768,10 +772,6 @@ done: st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); - } } @@ -904,6 +904,9 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level, GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width, height, format, type, row, 0); + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback format translation\n", __FUNCTION__); + /* get float[4] rgba row from surface */ pipe_get_tile_rgba(tex_xfer, 0, row, width, 1, rgba); @@ -1142,10 +1145,6 @@ done: st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); - } } @@ -1299,6 +1298,9 @@ fallback_copy_texsubimage(GLcontext *ctx, GLenum target, GLint level, struct pipe_transfer *src_trans; GLvoid *texDest; enum pipe_transfer_usage transfer_usage; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); assert(width <= MAX_WIDTH); @@ -1424,6 +1426,12 @@ compatible_src_dst_formats(const struct gl_renderbuffer *src, return TGSI_WRITEMASK_XYZ; /* A ==> 1.0 */ } else { + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s failed for src %s, dst %s\n", + __FUNCTION__, + _mesa_lookup_enum_by_nr(srcFormat), + _mesa_lookup_enum_by_nr(dstLogicalFormat)); + /* Otherwise fail. */ return 0; @@ -1538,7 +1546,8 @@ st_copy_texsubimage(GLcontext *ctx, if (ctx->_ImageTransferState == 0x0) { - if (matching_base_formats && + if (pipe->surface_copy && + matching_base_formats && src_format == dest_format && !do_flip) { @@ -1609,10 +1618,6 @@ st_copy_texsubimage(GLcontext *ctx, destX, destY, destZ, srcX, srcY, width, height); } - - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); - } } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 8514b6b375..f0eddafd33 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -36,6 +36,7 @@ #include "shader/shader_api.h" #include "glapi/glapi.h" #include "st_public.h" +#include "st_debug.h" #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_bitmap.h" @@ -113,6 +114,9 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st->ctx = ctx; st->pipe = pipe; + /* XXX: this is one-off, per-screen init: */ + st_debug_init(); + /* state tracker needs the VBO module */ _vbo_CreateContext(ctx); @@ -332,7 +336,7 @@ void st_init_driver_functions(struct dd_function_table *functions) st_init_feedback_functions(functions); #endif st_init_program_functions(functions); -#if FEATURE_ARB_occlusion_query +#if FEATURE_queryobj st_init_query_functions(functions); #endif st_init_readpixels_functions(functions); diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index c7d26ce33c..3009cde9d5 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -41,6 +41,32 @@ +#ifdef DEBUG +int ST_DEBUG = 0; + +static const struct debug_named_value st_debug_flags[] = { + { "mesa", DEBUG_MESA }, + { "tgsi", DEBUG_TGSI }, + { "pipe", DEBUG_PIPE }, + { "tex", DEBUG_TEX }, + { "fallback", DEBUG_FALLBACK }, + { "screen", DEBUG_SCREEN }, + { "query", DEBUG_QUERY }, + {NULL, 0} +}; +#endif + + +void +st_debug_init(void) +{ +#ifdef DEBUG + ST_DEBUG = debug_get_flags_option("ST_DEBUG", st_debug_flags, 0 ); +#endif +} + + + /** * Print current state. May be called from inside gdb to see currently * bound vertex/fragment shaders and associated constants. @@ -68,3 +94,5 @@ st_print_current(void) if (st->fp->Base.Base.Parameters) _mesa_print_parameter_list(st->fp->Base.Base.Parameters); } + + diff --git a/src/mesa/state_tracker/st_debug.h b/src/mesa/state_tracker/st_debug.h index 49d752e1b2..4a060d7759 100644 --- a/src/mesa/state_tracker/st_debug.h +++ b/src/mesa/state_tracker/st_debug.h @@ -29,8 +29,44 @@ #ifndef ST_DEBUG_H #define ST_DEBUG_H +#include "pipe/p_compiler.h" +#include "util/u_debug.h" + extern void st_print_current(void); +#define DEBUG_MESA 0x1 +#define DEBUG_TGSI 0x2 +#define DEBUG_CONSTANTS 0x4 +#define DEBUG_PIPE 0x8 +#define DEBUG_TEX 0x10 +#define DEBUG_FALLBACK 0x20 +#define DEBUG_QUERY 0x40 +#define DEBUG_SCREEN 0x80 + +#ifdef DEBUG +extern int ST_DEBUG; +#define DBSTR(x) x +#else +#define ST_DEBUG 0 +#define DBSTR(x) "" +#endif + +void st_debug_init( void ); + +static INLINE void +ST_DBG( unsigned flag, const char *fmt, ... ) +{ + if (ST_DEBUG & flag) + { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + + #endif /* ST_DEBUG_H */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index dcb90a3107..3e0db37414 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -392,10 +392,9 @@ default_depth_format(struct pipe_screen *screen, * or PIPE_TEXTURE_USAGE_SAMPLER */ enum pipe_format -st_choose_format(struct pipe_context *pipe, GLenum internalFormat, +st_choose_format(struct pipe_screen *screen, GLenum internalFormat, enum pipe_texture_target target, unsigned tex_usage) { - struct pipe_screen *screen = pipe->screen; unsigned geom_flags = 0; switch (internalFormat) { @@ -618,14 +617,15 @@ is_depth_or_stencil_format(GLenum internalFormat) * Called by FBO code to choose a PIPE_FORMAT_ for drawing surfaces. */ enum pipe_format -st_choose_renderbuffer_format(struct pipe_context *pipe, GLenum internalFormat) +st_choose_renderbuffer_format(struct pipe_screen *screen, + GLenum internalFormat) { uint usage; if (is_depth_or_stencil_format(internalFormat)) usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL; else usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; - return st_choose_format(pipe, internalFormat, PIPE_TEXTURE_2D, usage); + return st_choose_format(screen, internalFormat, PIPE_TEXTURE_2D, usage); } @@ -713,8 +713,8 @@ st_ChooseTextureFormat(GLcontext *ctx, GLint internalFormat, (void) format; (void) type; - pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE_2D, - PIPE_TEXTURE_USAGE_SAMPLER); + pFormat = st_choose_format(ctx->st->pipe->screen, internalFormat, + PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); if (pFormat == PIPE_FORMAT_NONE) return NULL; diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index 9d9e02fe9b..e4a788c89b 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -64,11 +64,12 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat); extern enum pipe_format -st_choose_format(struct pipe_context *pipe, GLenum internalFormat, +st_choose_format(struct pipe_screen *screen, GLenum internalFormat, enum pipe_texture_target target, unsigned tex_usage); extern enum pipe_format -st_choose_renderbuffer_format(struct pipe_context *pipe, GLenum internalFormat); +st_choose_renderbuffer_format(struct pipe_screen *screen, + GLenum internalFormat); extern const struct gl_texture_format * diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index f75b2348b8..16ca2771b0 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -42,6 +42,7 @@ #include "cso_cache/cso_cache.h" #include "cso_cache/cso_context.h" +#include "st_debug.h" #include "st_context.h" #include "st_draw.h" #include "st_gen_mipmap.h" @@ -113,6 +114,9 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, uint dstLevel; GLenum datatype; GLuint comps; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); assert(target != GL_TEXTURE_3D); /* not done yet */ diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 2ab12d3cf3..3d6c215819 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -27,836 +27,654 @@ /* * \author - * Michal Krol + * Michal Krol, + * Keith Whitwell */ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_build.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" -#include "tgsi/tgsi_sanity.h" +#include "pipe/p_state.h" +#include "tgsi/tgsi_ureg.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" #include "shader/prog_print.h" #include "util/u_debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +struct label { + unsigned branch_target; + unsigned token; +}; + +struct st_translate { + struct ureg_program *ureg; + + struct ureg_dst temps[MAX_PROGRAM_TEMPS]; + struct ureg_src *constants; + struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; + struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; + struct ureg_dst address[1]; + struct ureg_src samplers[PIPE_MAX_SAMPLERS]; + + const GLuint *inputMapping; + const GLuint *outputMapping; + + /* For every instruction that contains a label (eg CALL), keep + * details so that we can go back afterwards and emit the correct + * tgsi instruction number for each label. + */ + struct label *labels; + unsigned labels_size; + unsigned labels_count; + + /* Keep a record of the tgsi instruction number that each mesa + * instruction starts at, will be used to fix up labels after + * translation. + */ + unsigned *insn; + unsigned insn_size; + unsigned insn_count; + + unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ + + boolean error; +}; + + +static unsigned *get_label( struct st_translate *t, + unsigned branch_target ) +{ + unsigned i; + + if (t->labels_count + 1 >= t->labels_size) { + unsigned old_size = t->labels_size; + t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); + t->labels = REALLOC( t->labels, + old_size * sizeof t->labels[0], + t->labels_size * sizeof t->labels[0] ); + if (t->labels == NULL) { + static unsigned dummy; + t->error = TRUE; + return &dummy; + } + } + + i = t->labels_count++; + t->labels[i].branch_target = branch_target; + return &t->labels[i].token; +} + + +static void set_insn_start( struct st_translate *t, + unsigned start ) +{ + if (t->insn_count + 1 >= t->insn_size) { + unsigned old_size = t->insn_size; + t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); + t->insn = REALLOC( t->insn, + old_size * sizeof t->insn[0], + t->insn_size * sizeof t->insn[0] ); + if (t->insn == NULL) { + t->error = TRUE; + return; + } + } + + t->insn[t->insn_count++] = start; +} + /* * Map mesa register file to TGSI register file. */ -static GLuint -map_register_file( - gl_register_file file, - GLuint index, - const GLuint immediateMapping[], - GLboolean indirectAccess ) +static struct ureg_dst +dst_register( struct st_translate *t, + gl_register_file file, + GLuint index ) { switch( file ) { case PROGRAM_UNDEFINED: - return TGSI_FILE_NULL; + return ureg_dst_undef(); + case PROGRAM_TEMPORARY: - return TGSI_FILE_TEMPORARY; - /*case PROGRAM_LOCAL_PARAM:*/ - /*case PROGRAM_ENV_PARAM:*/ - - /* Because of the longstanding problem with mesa arb shaders - * where constants, immediates and state variables are all - * bundled together as PROGRAM_STATE_VAR, we can't tell from the - * mesa register file whether this is a CONSTANT or an - * IMMEDIATE, hence we need all the other information. - */ - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_UNIFORM: - if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0) - return TGSI_FILE_IMMEDIATE; - else - return TGSI_FILE_CONSTANT; - case PROGRAM_CONSTANT: - if (indirectAccess) - return TGSI_FILE_CONSTANT; - assert(immediateMapping[index] != ~0); - return TGSI_FILE_IMMEDIATE; - case PROGRAM_INPUT: - return TGSI_FILE_INPUT; + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + + return t->temps[index]; + case PROGRAM_OUTPUT: - return TGSI_FILE_OUTPUT; + return t->outputs[t->outputMapping[index]]; + case PROGRAM_ADDRESS: - return TGSI_FILE_ADDRESS; + return t->address[index]; + default: - assert( 0 ); - return TGSI_FILE_NULL; + debug_assert( 0 ); + return ureg_dst_undef(); } } -/** - * Map mesa register file index to TGSI index. - * Take special care when processing input and output indices. - * \param file one of TGSI_FILE_x - * \param index the mesa register file index - * \param inputMapping maps Mesa input indexes to TGSI input indexes - * \param outputMapping maps Mesa output indexes to TGSI output indexes - */ -static GLuint -map_register_file_index( - GLuint procType, - GLuint file, - GLuint index, - GLuint *swizzle, - const GLuint inputMapping[], - const GLuint outputMapping[], - const GLuint immediateMapping[], - GLboolean indirectAccess ) + +static struct ureg_src +src_register( struct st_translate *t, + gl_register_file file, + GLuint index ) { switch( file ) { - case TGSI_FILE_INPUT: - /* inputs are mapped according to the user-defined map */ - return inputMapping[index]; + case PROGRAM_UNDEFINED: + return ureg_src_undef(); + + case PROGRAM_TEMPORARY: + if (ureg_dst_is_undef(t->temps[index])) + t->temps[index] = ureg_DECL_temporary( t->ureg ); + return ureg_src(t->temps[index]); + + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: /* ie, immediate */ + return t->constants[index]; + + case PROGRAM_INPUT: + return t->inputs[t->inputMapping[index]]; - case TGSI_FILE_OUTPUT: - return outputMapping[index]; + case PROGRAM_OUTPUT: + return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ - case TGSI_FILE_IMMEDIATE: - if (indirectAccess) - return index; - assert(immediateMapping[index] != ~0); - return immediateMapping[index]; + case PROGRAM_ADDRESS: + return ureg_src(t->address[index]); default: - return index; + debug_assert( 0 ); + return ureg_src_undef(); } } -/* + +/** * Map mesa texture target to TGSI texture target. */ -static GLuint -map_texture_target( - GLuint textarget, - GLboolean shadow ) +static unsigned +translate_texture_target( GLuint textarget, + GLboolean shadow ) { - switch( textarget ) { - case TEXTURE_1D_INDEX: - if (shadow) - return TGSI_TEXTURE_SHADOW1D; - else - return TGSI_TEXTURE_1D; - case TEXTURE_2D_INDEX: - if (shadow) - return TGSI_TEXTURE_SHADOW2D; - else - return TGSI_TEXTURE_2D; - case TEXTURE_3D_INDEX: - return TGSI_TEXTURE_3D; - case TEXTURE_CUBE_INDEX: - return TGSI_TEXTURE_CUBE; - case TEXTURE_RECT_INDEX: - if (shadow) - return TGSI_TEXTURE_SHADOWRECT; - else - return TGSI_TEXTURE_RECT; - default: - assert( 0 ); + if (shadow) { + switch( textarget ) { + case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; + case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; + case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; + default: break; + } } - return TGSI_TEXTURE_1D; -} - -static GLuint -convert_sat( - GLuint sat ) -{ - switch( sat ) { - case SATURATE_OFF: - return TGSI_SAT_NONE; - case SATURATE_ZERO_ONE: - return TGSI_SAT_ZERO_ONE; - case SATURATE_PLUS_MINUS_ONE: - return TGSI_SAT_MINUS_PLUS_ONE; + switch( textarget ) { + case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; + case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; + case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; + case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; + case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; default: - assert( 0 ); - return TGSI_SAT_NONE; + debug_assert( 0 ); + return TGSI_TEXTURE_1D; } } -static GLuint -convert_writemask( - GLuint writemask ) + +static struct ureg_dst +translate_dst( struct st_translate *t, + const struct prog_dst_register *DstReg, + boolean saturate ) { - assert( WRITEMASK_X == TGSI_WRITEMASK_X ); - assert( WRITEMASK_Y == TGSI_WRITEMASK_Y ); - assert( WRITEMASK_Z == TGSI_WRITEMASK_Z ); - assert( WRITEMASK_W == TGSI_WRITEMASK_W ); - assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 ); + struct ureg_dst dst = dst_register( t, + DstReg->File, + DstReg->Index ); + + dst = ureg_writemask( dst, + DstReg->WriteMask ); + + if (saturate) + dst = ureg_saturate( dst ); + + if (DstReg->RelAddr) + dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); - return writemask; + return dst; } -static struct tgsi_full_immediate -make_immediate(const float *value, uint size) + +static struct ureg_src +translate_src( struct st_translate *t, + const struct prog_src_register *SrcReg ) { - struct tgsi_full_immediate imm; - unsigned i; + struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); - imm = tgsi_default_full_immediate(); - imm.Immediate.NrTokens += size; - imm.Immediate.DataType = TGSI_IMM_FLOAT32; + src = ureg_swizzle( src, + GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, + GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, + GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, + GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); - for (i = 0; i < size; i++) - imm.u[i].Float = value[i]; + if (SrcReg->Negate == NEGATE_XYZW) + src = ureg_negate(src); - return imm; + if (SrcReg->Abs) + src = ureg_abs(src); + + if (SrcReg->RelAddr) + src = ureg_src_indirect( src, ureg_src(t->address[0])); + + return src; } -static void -compile_instruction( - const struct prog_instruction *inst, - struct tgsi_full_instruction *fullinst, - const GLuint inputMapping[], - const GLuint outputMapping[], - const GLuint immediateMapping[], - GLboolean indirectAccess, - GLuint preamble_size, - GLuint procType, - GLboolean *insideSubroutine, - GLint wposTemp) + +static struct ureg_src swizzle_4v( struct ureg_src src, + const unsigned *swz ) { - GLuint i; - struct tgsi_full_dst_register *fulldst; - struct tgsi_full_src_register *fullsrc; - - *fullinst = tgsi_default_full_instruction(); - - fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode ); - fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode ); - fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode ); - - fulldst = &fullinst->FullDstRegisters[0]; - fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE ); - fulldst->DstRegister.Index = map_register_file_index( - procType, - fulldst->DstRegister.File, - inst->DstReg.Index, - NULL, - inputMapping, - outputMapping, - NULL, - GL_FALSE ); - fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask ); - if (inst->DstReg.RelAddr) { - fulldst->DstRegister.Indirect = 1; - fulldst->DstRegisterInd.File = TGSI_FILE_ADDRESS; - fulldst->DstRegisterInd.Index = 0; + return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); +} + + +/** + * Translate SWZ instructions into a single MAD. EG: + * + * SWZ dst, src.x-y10 + * + * becomes: + * + * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} + */ +static void emit_swz( struct st_translate *t, + struct ureg_dst dst, + const struct prog_src_register *SrcReg ) +{ + struct ureg_program *ureg = t->ureg; + struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); + + unsigned negate_mask = SrcReg->Negate; + + unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | + (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | + (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | + (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); + + unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | + (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | + (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | + (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); + + unsigned negative_one_mask = one_mask & negate_mask; + unsigned positive_one_mask = one_mask & ~negate_mask; + + struct ureg_src imm; + unsigned i; + unsigned mul_swizzle[4] = {0,0,0,0}; + unsigned add_swizzle[4] = {0,0,0,0}; + unsigned src_swizzle[4] = {0,0,0,0}; + boolean need_add = FALSE; + boolean need_mul = FALSE; + + if (dst.WriteMask == 0) + return; + + /* Is this just a MOV? + */ + if (zero_mask == 0 && + one_mask == 0 && + (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) + { + ureg_MOV( ureg, dst, translate_src( t, SrcReg )); + return; } - for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { - GLuint j; - GLuint swizzle = inst->SrcReg[i].Swizzle; +#define IMM_ZERO 0 +#define IMM_ONE 1 +#define IMM_NEG_ONE 2 - fullsrc = &fullinst->FullSrcRegisters[i]; + imm = ureg_imm3f( ureg, 0, 1, -1 ); - if (procType == TGSI_PROCESSOR_FRAGMENT && - inst->SrcReg[i].File == PROGRAM_INPUT && - inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { - /* special case of INPUT[WPOS] */ - fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; - fullsrc->SrcRegister.Index = wposTemp; - } - else { - /* any other src register */ - fullsrc->SrcRegister.File = map_register_file( - inst->SrcReg[i].File, - inst->SrcReg[i].Index, - immediateMapping, - indirectAccess ); - fullsrc->SrcRegister.Index = map_register_file_index( - procType, - fullsrc->SrcRegister.File, - inst->SrcReg[i].Index, - &swizzle, - inputMapping, - outputMapping, - immediateMapping, - indirectAccess ); - } + for (i = 0; i < 4; i++) { + unsigned bit = 1 << i; - /* swizzle (ext swizzle also depends on negation) */ - { - GLuint swz[4]; - GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE && - inst->SrcReg[i].Negate != NEGATE_XYZW); - for( j = 0; j < 4; j++ ) { - swz[j] = GET_SWZ( swizzle, j ); - if (swz[j] > SWIZZLE_W) - extended = GL_TRUE; + if (dst.WriteMask & bit) { + if (positive_one_mask & bit) { + mul_swizzle[i] = IMM_ZERO; + add_swizzle[i] = IMM_ONE; + need_add = TRUE; } - if (extended) { - for (j = 0; j < 4; j++) { - tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz, - swz[j], j); - } + else if (negative_one_mask & bit) { + mul_swizzle[i] = IMM_ZERO; + add_swizzle[i] = IMM_NEG_ONE; + need_add = TRUE; + } + else if (zero_mask & bit) { + mul_swizzle[i] = IMM_ZERO; + add_swizzle[i] = IMM_ZERO; + need_add = TRUE; } else { - for (j = 0; j < 4; j++) { - tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister, - swz[j], j); + add_swizzle[i] = IMM_ZERO; + src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); + need_mul = TRUE; + if (negate_mask & bit) { + mul_swizzle[i] = IMM_NEG_ONE; + } + else { + mul_swizzle[i] = IMM_ONE; } } } + } - if( inst->SrcReg[i].Negate == NEGATE_XYZW ) { - fullsrc->SrcRegister.Negate = 1; - } - else if( inst->SrcReg[i].Negate != NEGATE_NONE ) { - if( inst->SrcReg[i].Negate & NEGATE_X ) { - fullsrc->SrcRegisterExtSwz.NegateX = 1; - } - if( inst->SrcReg[i].Negate & NEGATE_Y ) { - fullsrc->SrcRegisterExtSwz.NegateY = 1; - } - if( inst->SrcReg[i].Negate & NEGATE_Z ) { - fullsrc->SrcRegisterExtSwz.NegateZ = 1; - } - if( inst->SrcReg[i].Negate & NEGATE_W ) { - fullsrc->SrcRegisterExtSwz.NegateW = 1; - } - } + if (need_mul && need_add) { + ureg_MAD( ureg, + dst, + swizzle_4v( src, src_swizzle ), + swizzle_4v( imm, mul_swizzle ), + swizzle_4v( imm, add_swizzle ) ); + } + else if (need_mul) { + ureg_MUL( ureg, + dst, + swizzle_4v( src, src_swizzle ), + swizzle_4v( imm, mul_swizzle ) ); + } + else if (need_add) { + ureg_MOV( ureg, + dst, + swizzle_4v( imm, add_swizzle ) ); + } + else { + debug_assert(0); + } - if( inst->SrcReg[i].Abs ) { - fullsrc->SrcRegisterExtMod.Absolute = 1; - } +#undef IMM_ZERO +#undef IMM_ONE +#undef IMM_NEG_ONE +} - if( inst->SrcReg[i].RelAddr ) { - fullsrc->SrcRegister.Indirect = 1; - fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS; - fullsrc->SrcRegisterInd.Index = 0; - } - } - switch( inst->Opcode ) { +static unsigned +translate_opcode( unsigned op ) +{ + switch( op ) { case OPCODE_ARL: - fullinst->Instruction.Opcode = TGSI_OPCODE_ARL; - break; + return TGSI_OPCODE_ARL; case OPCODE_ABS: - fullinst->Instruction.Opcode = TGSI_OPCODE_ABS; - break; + return TGSI_OPCODE_ABS; case OPCODE_ADD: - fullinst->Instruction.Opcode = TGSI_OPCODE_ADD; - break; + return TGSI_OPCODE_ADD; case OPCODE_BGNLOOP: - fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP; - fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; - break; + return TGSI_OPCODE_BGNLOOP; case OPCODE_BGNSUB: - fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB; - *insideSubroutine = GL_TRUE; - break; + return TGSI_OPCODE_BGNSUB; case OPCODE_BRA: - fullinst->Instruction.Opcode = TGSI_OPCODE_BRA; - break; + return TGSI_OPCODE_BRA; case OPCODE_BRK: - fullinst->Instruction.Opcode = TGSI_OPCODE_BRK; - break; + return TGSI_OPCODE_BRK; case OPCODE_CAL: - fullinst->Instruction.Opcode = TGSI_OPCODE_CAL; - fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; - break; + return TGSI_OPCODE_CAL; case OPCODE_CMP: - fullinst->Instruction.Opcode = TGSI_OPCODE_CMP; - break; + return TGSI_OPCODE_CMP; case OPCODE_CONT: - fullinst->Instruction.Opcode = TGSI_OPCODE_CONT; - break; + return TGSI_OPCODE_CONT; case OPCODE_COS: - fullinst->Instruction.Opcode = TGSI_OPCODE_COS; - break; + return TGSI_OPCODE_COS; case OPCODE_DDX: - fullinst->Instruction.Opcode = TGSI_OPCODE_DDX; - break; + return TGSI_OPCODE_DDX; case OPCODE_DDY: - fullinst->Instruction.Opcode = TGSI_OPCODE_DDY; - break; + return TGSI_OPCODE_DDY; case OPCODE_DP2: - fullinst->Instruction.Opcode = TGSI_OPCODE_DP2; - break; + return TGSI_OPCODE_DP2; case OPCODE_DP2A: - fullinst->Instruction.Opcode = TGSI_OPCODE_DP2A; - break; + return TGSI_OPCODE_DP2A; case OPCODE_DP3: - fullinst->Instruction.Opcode = TGSI_OPCODE_DP3; - break; + return TGSI_OPCODE_DP3; case OPCODE_DP4: - fullinst->Instruction.Opcode = TGSI_OPCODE_DP4; - break; + return TGSI_OPCODE_DP4; case OPCODE_DPH: - fullinst->Instruction.Opcode = TGSI_OPCODE_DPH; - break; + return TGSI_OPCODE_DPH; case OPCODE_DST: - fullinst->Instruction.Opcode = TGSI_OPCODE_DST; - break; + return TGSI_OPCODE_DST; case OPCODE_ELSE: - fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE; - fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; - break; + return TGSI_OPCODE_ELSE; case OPCODE_ENDIF: - fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF; - break; + return TGSI_OPCODE_ENDIF; case OPCODE_ENDLOOP: - fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP; - fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; - break; + return TGSI_OPCODE_ENDLOOP; case OPCODE_ENDSUB: - fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB; - *insideSubroutine = GL_FALSE; - break; + return TGSI_OPCODE_ENDSUB; case OPCODE_EX2: - fullinst->Instruction.Opcode = TGSI_OPCODE_EX2; - break; + return TGSI_OPCODE_EX2; case OPCODE_EXP: - fullinst->Instruction.Opcode = TGSI_OPCODE_EXP; - break; + return TGSI_OPCODE_EXP; case OPCODE_FLR: - fullinst->Instruction.Opcode = TGSI_OPCODE_FLR; - break; + return TGSI_OPCODE_FLR; case OPCODE_FRC: - fullinst->Instruction.Opcode = TGSI_OPCODE_FRC; - break; + return TGSI_OPCODE_FRC; case OPCODE_IF: - fullinst->Instruction.Opcode = TGSI_OPCODE_IF; - fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; - break; + return TGSI_OPCODE_IF; case OPCODE_TRUNC: - fullinst->Instruction.Opcode = TGSI_OPCODE_TRUNC; - break; + return TGSI_OPCODE_TRUNC; case OPCODE_KIL: - /* conditional */ - fullinst->Instruction.Opcode = TGSI_OPCODE_KIL; - break; + return TGSI_OPCODE_KIL; case OPCODE_KIL_NV: - /* predicated */ - assert(inst->DstReg.CondMask == COND_TR); - fullinst->Instruction.Opcode = TGSI_OPCODE_KILP; - break; + return TGSI_OPCODE_KILP; case OPCODE_LG2: - fullinst->Instruction.Opcode = TGSI_OPCODE_LG2; - break; + return TGSI_OPCODE_LG2; case OPCODE_LOG: - fullinst->Instruction.Opcode = TGSI_OPCODE_LOG; - break; + return TGSI_OPCODE_LOG; case OPCODE_LIT: - fullinst->Instruction.Opcode = TGSI_OPCODE_LIT; - break; + return TGSI_OPCODE_LIT; case OPCODE_LRP: - fullinst->Instruction.Opcode = TGSI_OPCODE_LRP; - break; + return TGSI_OPCODE_LRP; case OPCODE_MAD: - fullinst->Instruction.Opcode = TGSI_OPCODE_MAD; - break; + return TGSI_OPCODE_MAD; case OPCODE_MAX: - fullinst->Instruction.Opcode = TGSI_OPCODE_MAX; - break; + return TGSI_OPCODE_MAX; case OPCODE_MIN: - fullinst->Instruction.Opcode = TGSI_OPCODE_MIN; - break; + return TGSI_OPCODE_MIN; case OPCODE_MOV: - fullinst->Instruction.Opcode = TGSI_OPCODE_MOV; - break; + return TGSI_OPCODE_MOV; case OPCODE_MUL: - fullinst->Instruction.Opcode = TGSI_OPCODE_MUL; - break; - case OPCODE_NOISE1: - fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1; - break; - case OPCODE_NOISE2: - fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2; - break; - case OPCODE_NOISE3: - fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3; - break; - case OPCODE_NOISE4: - fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4; - break; + return TGSI_OPCODE_MUL; case OPCODE_NOP: - fullinst->Instruction.Opcode = TGSI_OPCODE_NOP; - break; + return TGSI_OPCODE_NOP; case OPCODE_NRM3: - fullinst->Instruction.Opcode = TGSI_OPCODE_NRM; - break; + return TGSI_OPCODE_NRM; case OPCODE_NRM4: - fullinst->Instruction.Opcode = TGSI_OPCODE_NRM4; - break; + return TGSI_OPCODE_NRM4; case OPCODE_POW: - fullinst->Instruction.Opcode = TGSI_OPCODE_POW; - break; + return TGSI_OPCODE_POW; case OPCODE_RCP: - fullinst->Instruction.Opcode = TGSI_OPCODE_RCP; - break; + return TGSI_OPCODE_RCP; case OPCODE_RET: - /* If RET is used inside main (not a real subroutine) we may want - * to execute END instead of RET. TBD... - */ - if (1 /* *insideSubroutine */) { - fullinst->Instruction.Opcode = TGSI_OPCODE_RET; - } - else { - /* inside main() pseudo-function */ - fullinst->Instruction.Opcode = TGSI_OPCODE_END; - } - break; + return TGSI_OPCODE_RET; case OPCODE_RSQ: - fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ; - break; + return TGSI_OPCODE_RSQ; case OPCODE_SCS: - fullinst->Instruction.Opcode = TGSI_OPCODE_SCS; - fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY; - break; + return TGSI_OPCODE_SCS; case OPCODE_SEQ: - fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ; - break; + return TGSI_OPCODE_SEQ; case OPCODE_SGE: - fullinst->Instruction.Opcode = TGSI_OPCODE_SGE; - break; + return TGSI_OPCODE_SGE; case OPCODE_SGT: - fullinst->Instruction.Opcode = TGSI_OPCODE_SGT; - break; + return TGSI_OPCODE_SGT; case OPCODE_SIN: - fullinst->Instruction.Opcode = TGSI_OPCODE_SIN; - break; + return TGSI_OPCODE_SIN; case OPCODE_SLE: - fullinst->Instruction.Opcode = TGSI_OPCODE_SLE; - break; + return TGSI_OPCODE_SLE; case OPCODE_SLT: - fullinst->Instruction.Opcode = TGSI_OPCODE_SLT; - break; + return TGSI_OPCODE_SLT; case OPCODE_SNE: - fullinst->Instruction.Opcode = TGSI_OPCODE_SNE; - break; + return TGSI_OPCODE_SNE; case OPCODE_SSG: - fullinst->Instruction.Opcode = TGSI_OPCODE_SSG; - break; + return TGSI_OPCODE_SSG; case OPCODE_SUB: - fullinst->Instruction.Opcode = TGSI_OPCODE_SUB; - break; - case OPCODE_SWZ: - fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ; - break; + return TGSI_OPCODE_SUB; case OPCODE_TEX: - /* ordinary texture lookup */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TEX; - fullinst->Instruction.NumSrcRegs = 2; - fullinst->InstructionExtTexture.Texture = - map_texture_target( inst->TexSrcTarget, inst->TexShadow ); - fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; - break; + return TGSI_OPCODE_TEX; case OPCODE_TXB: - /* texture lookup with LOD bias */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TXB; - fullinst->Instruction.NumSrcRegs = 2; - fullinst->InstructionExtTexture.Texture = - map_texture_target( inst->TexSrcTarget, inst->TexShadow ); - fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; - break; + return TGSI_OPCODE_TXB; case OPCODE_TXD: - /* texture lookup with explicit partial derivatives */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TXD; - fullinst->Instruction.NumSrcRegs = 4; - fullinst->InstructionExtTexture.Texture = - map_texture_target( inst->TexSrcTarget, inst->TexShadow ); - /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */ - fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER; - fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit; - break; + return TGSI_OPCODE_TXD; case OPCODE_TXL: - /* texture lookup with explicit LOD */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TXL; - fullinst->Instruction.NumSrcRegs = 2; - fullinst->InstructionExtTexture.Texture = - map_texture_target( inst->TexSrcTarget, inst->TexShadow ); - fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; - break; + return TGSI_OPCODE_TXL; case OPCODE_TXP: - /* texture lookup with divide by Q component */ - /* convert to TEX w/ special flag for division */ - fullinst->Instruction.Opcode = TGSI_OPCODE_TXP; - fullinst->Instruction.NumSrcRegs = 2; - fullinst->InstructionExtTexture.Texture = - map_texture_target( inst->TexSrcTarget, inst->TexShadow ); - fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; - break; + return TGSI_OPCODE_TXP; case OPCODE_XPD: - fullinst->Instruction.Opcode = TGSI_OPCODE_XPD; - fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ; - break; + return TGSI_OPCODE_XPD; case OPCODE_END: - fullinst->Instruction.Opcode = TGSI_OPCODE_END; - break; + return TGSI_OPCODE_END; default: - assert( 0 ); + debug_assert( 0 ); + return TGSI_OPCODE_NOP; } } -/** - * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens - */ -static struct tgsi_full_declaration -make_input_decl( - GLuint index, - GLboolean interpolate_info, - GLuint interpolate, - GLuint usage_mask, - GLboolean semantic_info, - GLuint semantic_name, - GLbitfield semantic_index, - GLbitfield input_flags) -{ - struct tgsi_full_declaration decl; - - assert(semantic_name < TGSI_SEMANTIC_COUNT); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.UsageMask = usage_mask; - decl.Declaration.Semantic = semantic_info; - decl.DeclarationRange.First = index; - decl.DeclarationRange.Last = index; - if (semantic_info) { - decl.Semantic.SemanticName = semantic_name; - decl.Semantic.SemanticIndex = semantic_index; - } - if (interpolate_info) { - decl.Declaration.Interpolate = interpolate; - } - if (input_flags & PROG_PARAM_BIT_CENTROID) - decl.Declaration.Centroid = 1; - if (input_flags & PROG_PARAM_BIT_INVARIANT) - decl.Declaration.Invariant = 1; - return decl; -} - -/** - * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens - */ -static struct tgsi_full_declaration -make_output_decl( - GLuint index, - GLuint semantic_name, - GLuint semantic_index, - GLuint usage_mask, - GLbitfield output_flags) +static void +compile_instruction( + struct st_translate *t, + const struct prog_instruction *inst ) { - struct tgsi_full_declaration decl; - - assert(semantic_name < TGSI_SEMANTIC_COUNT); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.UsageMask = usage_mask; - decl.Declaration.Semantic = 1; - decl.DeclarationRange.First = index; - decl.DeclarationRange.Last = index; - decl.Semantic.SemanticName = semantic_name; - decl.Semantic.SemanticIndex = semantic_index; - if (output_flags & PROG_PARAM_BIT_CENTROID) - decl.Declaration.Centroid = 1; - if (output_flags & PROG_PARAM_BIT_INVARIANT) - decl.Declaration.Invariant = 1; - - return decl; -} + struct ureg_program *ureg = t->ureg; + GLuint i; + struct ureg_dst dst[1]; + struct ureg_src src[4]; + unsigned num_dst; + unsigned num_src; + num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); + num_src = _mesa_num_inst_src_regs( inst->Opcode ); -static struct tgsi_full_declaration -make_temp_decl( - GLuint start_index, - GLuint end_index ) -{ - struct tgsi_full_declaration decl; - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.DeclarationRange.First = start_index; - decl.DeclarationRange.Last = end_index; - return decl; -} + if (num_dst) + dst[0] = translate_dst( t, + &inst->DstReg, + inst->SaturateMode ); -static struct tgsi_full_declaration -make_addr_decl( - GLuint start_index, - GLuint end_index ) -{ - struct tgsi_full_declaration decl; + for (i = 0; i < num_src; i++) + src[i] = translate_src( t, &inst->SrcReg[i] ); - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_ADDRESS; - decl.DeclarationRange.First = start_index; - decl.DeclarationRange.Last = end_index; - return decl; -} + switch( inst->Opcode ) { + case OPCODE_SWZ: + emit_swz( t, dst[0], &inst->SrcReg[0] ); + return; -static struct tgsi_full_declaration -make_sampler_decl(GLuint index) -{ - struct tgsi_full_declaration decl; - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.DeclarationRange.First = index; - decl.DeclarationRange.Last = index; - return decl; -} + case OPCODE_BGNLOOP: + case OPCODE_CAL: + case OPCODE_ELSE: + case OPCODE_ENDLOOP: + case OPCODE_IF: + debug_assert(num_dst == 0); + ureg_label_insn( ureg, + translate_opcode( inst->Opcode ), + src, num_src, + get_label( t, inst->BranchTarget )); + return; -/** Reference into a constant buffer */ -static struct tgsi_full_declaration -make_constant_decl(GLuint first, GLuint last) -{ - struct tgsi_full_declaration decl; - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; - return decl; -} + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXD: + case OPCODE_TXL: + case OPCODE_TXP: + src[num_src++] = t->samplers[inst->TexSrcUnit]; + ureg_tex_insn( ureg, + translate_opcode( inst->Opcode ), + dst, num_dst, + translate_texture_target( inst->TexSrcTarget, + inst->TexShadow ), + src, num_src ); + return; + case OPCODE_SCS: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); + ureg_insn( ureg, + translate_opcode( inst->Opcode ), + dst, num_dst, + src, num_src ); + break; + case OPCODE_XPD: + dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); + ureg_insn( ureg, + translate_opcode( inst->Opcode ), + dst, num_dst, + src, num_src ); + break; -/** - * Find the temporaries which are used in the given program. - */ -static void -find_temporaries(const struct gl_program *program, - GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) -{ - GLuint i, j; + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + /* At some point, a motivated person could add a better + * implementation of noise. Currently not even the nvidia + * binary drivers do anything more than this. In any case, the + * place to do this is in the GL state tracker, not the poor + * driver. + */ + ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); + break; + - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) - tempsUsed[i] = GL_FALSE; - for (i = 0; i < program->NumInstructions; i++) { - const struct prog_instruction *inst = program->Instructions + i; - const GLuint n = _mesa_num_inst_src_regs( inst->Opcode ); - for (j = 0; j < n; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) - tempsUsed[inst->SrcReg[j].Index] = GL_TRUE; - if (inst->DstReg.File == PROGRAM_TEMPORARY) - tempsUsed[inst->DstReg.Index] = GL_TRUE; - } + default: + ureg_insn( ureg, + translate_opcode( inst->Opcode ), + dst, num_dst, + src, num_src ); + break; } } /** - * Find an unused temporary in the tempsUsed array. + * Emit the TGSI instructions for inverting the WPOS y coordinate. */ -static int -find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) -{ - int i; - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { - if (!tempsUsed[i]) { - tempsUsed[i] = GL_TRUE; - return i; - } - } - return -1; -} - - -/** helper for building simple TGSI instruction, one src register */ -static void -build_tgsi_instruction1(struct tgsi_full_instruction *inst, - int opcode, - int dstFile, int dstIndex, int writemask, - int srcFile1, int srcIndex1) -{ - *inst = tgsi_default_full_instruction(); - - inst->Instruction.Opcode = opcode; - - inst->Instruction.NumDstRegs = 1; - inst->FullDstRegisters[0].DstRegister.File = dstFile; - inst->FullDstRegisters[0].DstRegister.Index = dstIndex; - inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; - - inst->Instruction.NumSrcRegs = 1; - inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; - inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; -} - - -/** helper for building simple TGSI instruction, two src registers */ static void -build_tgsi_instruction2(struct tgsi_full_instruction *inst, - int opcode, - int dstFile, int dstIndex, int writemask, - int srcFile1, int srcIndex1, - int srcFile2, int srcIndex2) +emit_inverted_wpos( struct st_translate *t, + const struct gl_program *program ) { - *inst = tgsi_default_full_instruction(); + struct ureg_program *ureg = t->ureg; - inst->Instruction.Opcode = opcode; + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index winSizeState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + + /* XXX: note we are modifying the incoming shader here! Need to + * do this before emitting the constant decls below, or this + * will be missed: + */ + unsigned winHeightConst = _mesa_add_state_reference(program->Parameters, + winSizeState); - inst->Instruction.NumDstRegs = 1; - inst->FullDstRegisters[0].DstRegister.File = dstFile; - inst->FullDstRegisters[0].DstRegister.Index = dstIndex; - inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; - - inst->Instruction.NumSrcRegs = 2; - inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; - inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; - inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; - inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; -} + struct ureg_src winsize = ureg_DECL_constant( ureg, winHeightConst ); + struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); + struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; + /* MOV wpos_temp, input[wpos] + */ + ureg_MOV( ureg, wpos_temp, wpos_input ); + /* SUB wpos_temp.y, winsize_const, wpos_input + */ + ureg_SUB( ureg, + ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), + winsize, + wpos_input); -/** - * Emit the TGSI instructions for inverting the WPOS y coordinate. - */ -static int -emit_inverted_wpos(struct tgsi_token *tokens, - int wpos_temp, - int winsize_const, - int wpos_input, - struct tgsi_header *header, int maxTokens) -{ - struct tgsi_full_instruction fullinst; - int ti = 0; - - /* MOV wpos_temp.xzw, input[wpos]; */ - build_tgsi_instruction1(&fullinst, - TGSI_OPCODE_MOV, - TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, - TGSI_FILE_INPUT, 0); - - ti += tgsi_build_full_instruction(&fullinst, - &tokens[ti], - header, - maxTokens - ti); - - /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ - build_tgsi_instruction2(&fullinst, - TGSI_OPCODE_SUB, - TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, - TGSI_FILE_CONSTANT, winsize_const, - TGSI_FILE_INPUT, wpos_input); - - ti += tgsi_build_full_instruction(&fullinst, - &tokens[ti], - header, - maxTokens - ti); - - return ti; + /* Use wpos_temp as position input from here on: + */ + t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); } - - /** * Translate Mesa program to TGSI format. * \param program the program to translate @@ -864,20 +682,19 @@ emit_inverted_wpos(struct tgsi_token *tokens, * \param inputMapping maps Mesa fragment program inputs to TGSI generic * input indexes * \param inputSemanticName the TGSI_SEMANTIC flag for each input - * \param inputSemanticIndex the semantic index (ex: which texcoord) for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for + * each input * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input - * \param numOutputs number of output registers used * \param outputMapping maps Mesa fragment program outputs to TGSI * generic outputs * \param outputSemanticName the TGSI_SEMANTIC flag for each output - * \param outputSemanticIndex the semantic index (ex: which texcoord) for each output - * \param tokens array to store translated tokens in - * \param maxTokens size of the tokens array + * \param outputSemanticIndex the semantic index (ex: which texcoord) for + * each output * - * \return number of tokens placed in 'tokens' buffer, or zero if error + * \return array of translated tokens, caller's responsibility to free */ -GLuint +const struct tgsi_token * st_translate_mesa_program( GLcontext *ctx, uint procType, @@ -892,252 +709,124 @@ st_translate_mesa_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - const GLbitfield outputFlags[], - struct tgsi_token *tokens, - GLuint maxTokens ) + const GLbitfield outputFlags[] ) { - GLuint i; - GLuint ti; /* token index */ - struct tgsi_header *header; - struct tgsi_processor *processor; - GLuint preamble_size = 0; - GLuint immediates[1000]; - GLuint numImmediates = 0; - GLboolean insideSubroutine = GL_FALSE; - GLboolean indirectAccess = GL_FALSE; - GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; - GLint wposTemp = -1, winHeightConst = -1; - - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX); - - find_temporaries(program, tempsUsed); - - if (procType == TGSI_PROCESSOR_FRAGMENT) { - if (program->InputsRead & FRAG_BIT_WPOS) { - /* Fragment program uses fragment position input. - * Need to replace instances of INPUT[WPOS] with temp T - * where T = INPUT[WPOS] by y is inverted. - */ - static const gl_state_index winSizeState[STATE_LENGTH] - = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; - winHeightConst = _mesa_add_state_reference(program->Parameters, - winSizeState); - wposTemp = find_free_temporary(tempsUsed); - } - } - + struct st_translate translate, *t; + struct ureg_program *ureg; + const struct tgsi_token *tokens = NULL; + unsigned i; - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + t = &translate; + memset(t, 0, sizeof *t); - header = (struct tgsi_header *) &tokens[1]; - *header = tgsi_build_header(); + t->procType = procType; + t->inputMapping = inputMapping; + t->outputMapping = outputMapping; + t->ureg = ureg_create( procType ); + if (t->ureg == NULL) + return NULL; - processor = (struct tgsi_processor *) &tokens[2]; - *processor = tgsi_build_processor( procType, header ); + ureg = t->ureg; - ti = 3; + /*_mesa_print_program(program);*/ /* * Declare input attributes. */ if (procType == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < numInputs; i++) { - struct tgsi_full_declaration fulldecl; - fulldecl = make_input_decl(i, - GL_TRUE, interpMode[i], - TGSI_WRITEMASK_XYZW, - GL_TRUE, inputSemanticName[i], - inputSemanticIndex[i], - inputFlags[i]); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); + t->inputs[i] = ureg_DECL_fs_input(ureg, + inputSemanticName[i], + inputSemanticIndex[i], + interpMode[i]); } - } - else { - /* vertex prog */ - /* XXX: this could probaby be merged with the clause above. - * the only difference is the semantic tags. - */ - for (i = 0; i < numInputs; i++) { - struct tgsi_full_declaration fulldecl; - fulldecl = make_input_decl(i, - GL_FALSE, 0, - TGSI_WRITEMASK_XYZW, - GL_FALSE, 0, 0, - inputFlags[i]); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); + + if (program->InputsRead & FRAG_BIT_WPOS) { + /* Must do this after setting up t->inputs, and before + * emitting constant references, below: + */ + emit_inverted_wpos( t, program ); } - } - /* - * Declare output attributes. - */ - if (procType == TGSI_PROCESSOR_FRAGMENT) { + /* + * Declare output attributes. + */ for (i = 0; i < numOutputs; i++) { - struct tgsi_full_declaration fulldecl; switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: - fulldecl = make_output_decl(i, - TGSI_SEMANTIC_POSITION, /* Z / Depth */ - outputSemanticIndex[i], - TGSI_WRITEMASK_Z, - outputFlags[i]); + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i] ); + + t->outputs[i] = ureg_writemask( t->outputs[i], + TGSI_WRITEMASK_Z ); break; case TGSI_SEMANTIC_COLOR: - fulldecl = make_output_decl(i, - TGSI_SEMANTIC_COLOR, - outputSemanticIndex[i], - TGSI_WRITEMASK_XYZW, - outputFlags[i]); + t->outputs[i] = ureg_DECL_output( ureg, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i] ); break; default: - assert(0); + debug_assert(0); return 0; } - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); } } else { - /* vertex prog */ - for (i = 0; i < numOutputs; i++) { - struct tgsi_full_declaration fulldecl; - fulldecl = make_output_decl(i, - outputSemanticName[i], - outputSemanticIndex[i], - TGSI_WRITEMASK_XYZW, - outputFlags[i]); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); + for (i = 0; i < numInputs; i++) { + t->inputs[i] = ureg_DECL_vs_input(ureg, i); } - } - /* temporary decls */ - { - GLboolean inside_range = GL_FALSE; - GLuint start_range = 0; - - tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; - for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { - if (tempsUsed[i] && !inside_range) { - inside_range = GL_TRUE; - start_range = i; - } - else if (!tempsUsed[i] && inside_range) { - struct tgsi_full_declaration fulldecl; - - inside_range = GL_FALSE; - fulldecl = make_temp_decl( start_range, i - 1 ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - } + for (i = 0; i < numOutputs; i++) { + t->outputs[i] = ureg_DECL_output( ureg, + outputSemanticName[i], + outputSemanticIndex[i] ); } } /* Declare address register. - */ + */ if (program->NumAddressRegs > 0) { - struct tgsi_full_declaration fulldecl; - - assert( program->NumAddressRegs == 1 ); - - fulldecl = make_addr_decl( 0, 0 ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - - indirectAccess = GL_TRUE; + debug_assert( program->NumAddressRegs == 1 ); + t->address[0] = ureg_DECL_address( ureg ); } - /* immediates/literals */ - memset(immediates, ~0, sizeof(immediates)); - /* Emit immediates only when there is no address register in use. - * FIXME: Be smarter and recognize param arrays -- indirect addressing is - * only valid within the referenced array. + /* Emit constants and immediates. Mesa uses a single index space + * for these, so we put all the translated regs in t->constants. */ - if (program->Parameters && !indirectAccess) { - for (i = 0; i < program->Parameters->NumParameters; i++) { - if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) { - struct tgsi_full_immediate fullimm; - - fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 ); - ti += tgsi_build_full_immediate( - &fullimm, - &tokens[ti], - header, - maxTokens - ti ); - immediates[i] = numImmediates; - numImmediates++; - } - } - } - - /* constant buffer refs */ if (program->Parameters) { - GLint start = -1, end = -1; - + + t->constants = CALLOC( program->Parameters->NumParameters, + sizeof t->constants[0] ); + if (t->constants == NULL) + goto out; + for (i = 0; i < program->Parameters->NumParameters; i++) { - GLboolean emit = (i == program->Parameters->NumParameters - 1); - GLboolean matches; - switch (program->Parameters->Parameters[i].Type) { case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - matches = GL_TRUE; + t->constants[i] = ureg_DECL_constant( ureg, i ); break; + + /* Emit immediates only when there is no address register + * in use. FIXME: Be smarter and recognize param arrays: + * indirect addressing is only valid within the referenced + * array. + */ case PROGRAM_CONSTANT: - matches = indirectAccess; + if (program->NumAddressRegs > 0) + t->constants[i] = ureg_DECL_constant( ureg, i ); + else + t->constants[i] = + ureg_DECL_immediate( ureg, + program->Parameters->ParameterValues[i], + 4 ); break; default: - matches = GL_FALSE; - } - - if (matches) { - if (start == -1) { - /* begin a sequence */ - start = i; - end = i; - } - else { - /* continue sequence */ - end = i; - } - } - else { - if (start != -1) { - /* end of sequence */ - emit = GL_TRUE; - } - } - - if (emit && start >= 0) { - struct tgsi_full_declaration fulldecl; - - fulldecl = make_constant_decl( start, end ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - start = end = -1; + break; } } } @@ -1145,58 +834,55 @@ st_translate_mesa_program( /* texture samplers */ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { if (program->SamplersUsed & (1 << i)) { - struct tgsi_full_declaration fulldecl; - - fulldecl = make_sampler_decl( i ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); + t->samplers[i] = ureg_DECL_sampler( ureg, i ); } } - /* invert WPOS fragment input */ - if (wposTemp >= 0) { - ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, - inputMapping[FRAG_ATTRIB_WPOS], - header, maxTokens - ti); - preamble_size = 2; /* two instructions added */ + /* Emit each instruction in turn: + */ + for (i = 0; i < program->NumInstructions; i++) { + set_insn_start( t, ureg_get_instruction_number( ureg )); + compile_instruction( t, &program->Instructions[i] ); } - for (i = 0; i < program->NumInstructions; i++) { - struct tgsi_full_instruction fullinst; - - compile_instruction( - &program->Instructions[i], - &fullinst, - inputMapping, - outputMapping, - immediates, - indirectAccess, - preamble_size, - procType, - &insideSubroutine, - wposTemp); - - ti += tgsi_build_full_instruction( - &fullinst, - &tokens[ti], - header, - maxTokens - ti ); + /* Fix up all emitted labels: + */ + for (i = 0; i < t->labels_count; i++) { + ureg_fixup_label( ureg, + t->labels[i].token, + t->insn[t->labels[i].branch_target] ); } -#if DEBUG - if(!tgsi_sanity_check(tokens)) { - debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n"); - debug_printf("\nOriginal program:\n%s", program->String); - debug_printf("\nMesa program:\n"); + tokens = ureg_get_tokens( ureg, NULL ); + ureg_destroy( ureg ); + +out: + FREE(t->insn); + FREE(t->labels); + FREE(t->constants); + + if (t->error) { + debug_printf("%s: translate error flag set\n", __FUNCTION__); + FREE((void *)tokens); + tokens = NULL; + } + + if (!tokens) { + debug_printf("%s: failed to translate Mesa program:\n", __FUNCTION__); _mesa_print_program(program); - debug_printf("\nTGSI program:\n"); - tgsi_dump(tokens, 0); - assert(0); + debug_assert(0); } -#endif - return ti; + return tokens; +} + + +/** + * Tokens cannot be free with _mesa_free otherwise the builtin gallium + * malloc debugging will get confused. + */ +void +st_free_tokens(const struct tgsi_token *tokens) +{ + FREE((void *)tokens); } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index b465b3bddc..c0d1ff59e1 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -39,7 +39,7 @@ extern "C" { struct tgsi_token; struct gl_program; -GLuint +const struct tgsi_token * st_translate_mesa_program( GLcontext *ctx, uint procType, @@ -54,9 +54,10 @@ st_translate_mesa_program( const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[], - const GLbitfield outputFlags[], - struct tgsi_token *tokens, - GLuint maxTokens ); + const GLbitfield outputFlags[] ); + +void +st_free_tokens(const struct tgsi_token *tokens); #if defined __cplusplus diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index d2da20ae42..a9be80ce8f 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -42,6 +42,7 @@ #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" +#include "st_debug.h" #include "st_context.h" #include "st_atom.h" #include "st_program.h" @@ -49,12 +50,6 @@ #include "cso_cache/cso_context.h" -#define ST_MAX_SHADER_TOKENS (8 * 1024) - - -#define TGSI_DEBUG 0 - - /** * Translate a Mesa vertex shader into a TGSI shader. * \param outputMapping to map vertex program output registers (VERT_RESULT_x) @@ -70,12 +65,9 @@ st_translate_vertex_program(struct st_context *st, const ubyte *outputSemanticIndex) { struct pipe_context *pipe = st->pipe; - struct tgsi_token *tokens; GLuint defaultOutputMapping[VERT_RESULT_MAX]; - struct pipe_shader_state vs; GLuint attr, i; GLuint num_generic = 0; - GLuint num_tokens; ubyte vs_input_semantic_name[PIPE_MAX_SHADER_INPUTS]; ubyte vs_input_semantic_index[PIPE_MAX_SHADER_INPUTS]; @@ -88,14 +80,7 @@ st_translate_vertex_program(struct st_context *st, GLbitfield input_flags[MAX_PROGRAM_INPUTS]; GLbitfield output_flags[MAX_PROGRAM_OUTPUTS]; - tokens = (struct tgsi_token *)MALLOC(ST_MAX_SHADER_TOKENS * sizeof *tokens); - if(!tokens) { - /* FIXME: propagate error to the caller */ - assert(0); - return; - } - - memset(&vs, 0, sizeof(vs)); +// memset(&vs, 0, sizeof(vs)); memset(input_flags, 0, sizeof(input_flags)); memset(output_flags, 0, sizeof(output_flags)); @@ -330,7 +315,7 @@ st_translate_vertex_program(struct st_context *st, /* free old shader state, if any */ if (stvp->state.tokens) { - _mesa_free((void *) stvp->state.tokens); + st_free_tokens(stvp->state.tokens); stvp->state.tokens = NULL; } if (stvp->driver_shader) { @@ -338,43 +323,36 @@ st_translate_vertex_program(struct st_context *st, stvp->driver_shader = NULL; } - /* XXX: fix static allocation of tokens: - */ - num_tokens = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_VERTEX, - &stvp->Base.Base, - /* inputs */ - vs_num_inputs, - stvp->input_to_index, - vs_input_semantic_name, - vs_input_semantic_index, - NULL, - input_flags, - /* outputs */ - vs_num_outputs, - outputMapping, - vs_output_semantic_name, - vs_output_semantic_index, - output_flags, - /* tokenized result */ - tokens, ST_MAX_SHADER_TOKENS); - - assert(num_tokens < ST_MAX_SHADER_TOKENS); - - vs.tokens = (struct tgsi_token *) - _mesa_realloc(tokens, - ST_MAX_SHADER_TOKENS * sizeof *tokens, - num_tokens * sizeof *tokens); + stvp->state.tokens = + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_VERTEX, + &stvp->Base.Base, + /* inputs */ + vs_num_inputs, + stvp->input_to_index, + vs_input_semantic_name, + vs_input_semantic_index, + NULL, + input_flags, + /* outputs */ + vs_num_outputs, + outputMapping, + vs_output_semantic_name, + vs_output_semantic_index, + output_flags ); stvp->num_inputs = vs_num_inputs; - stvp->state = vs; /* struct copy */ - stvp->driver_shader = pipe->create_vs_state(pipe, &vs); + stvp->driver_shader = pipe->create_vs_state(pipe, &stvp->state); - if (0) + if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) { _mesa_print_program(&stvp->Base.Base); + debug_printf("\n"); + } - if (TGSI_DEBUG) - tgsi_dump( vs.tokens, 0 ); + if (ST_DEBUG & DEBUG_TGSI) { + tgsi_dump( stvp->state.tokens, 0 ); + debug_printf("\n"); + } } @@ -383,7 +361,6 @@ st_translate_vertex_program(struct st_context *st, * Translate a Mesa fragment shader into a TGSI shader. * \param inputMapping to map fragment program input registers to TGSI * input slots - * \param tokensOut destination for TGSI tokens * \return pointer to cached pipe_shader object. */ void @@ -392,16 +369,13 @@ st_translate_fragment_program(struct st_context *st, const GLuint inputMapping[]) { struct pipe_context *pipe = st->pipe; - struct tgsi_token *tokens; GLuint outputMapping[FRAG_RESULT_MAX]; GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; - struct pipe_shader_state fs; GLuint interpMode[16]; /* XXX size? */ GLuint attr; const GLbitfield inputsRead = stfp->Base.Base.InputsRead; GLuint vslot = 0; GLuint num_generic = 0; - GLuint num_tokens; uint fs_num_inputs = 0; @@ -412,14 +386,7 @@ st_translate_fragment_program(struct st_context *st, GLbitfield input_flags[MAX_PROGRAM_INPUTS]; GLbitfield output_flags[MAX_PROGRAM_OUTPUTS]; - tokens = (struct tgsi_token *)MALLOC(ST_MAX_SHADER_TOKENS * sizeof *tokens); - if(!tokens) { - /* FIXME: propagate error to the caller */ - assert(0); - return; - } - - memset(&fs, 0, sizeof(fs)); +// memset(&fs, 0, sizeof(fs)); memset(input_flags, 0, sizeof(input_flags)); memset(output_flags, 0, sizeof(output_flags)); @@ -541,42 +508,35 @@ st_translate_fragment_program(struct st_context *st, if (!inputMapping) inputMapping = defaultInputMapping; - /* XXX: fix static allocation of tokens: - */ - num_tokens = st_translate_mesa_program(st->ctx, - TGSI_PROCESSOR_FRAGMENT, - &stfp->Base.Base, - /* inputs */ - fs_num_inputs, - inputMapping, - stfp->input_semantic_name, - stfp->input_semantic_index, - interpMode, - input_flags, - /* outputs */ - fs_num_outputs, - outputMapping, - fs_output_semantic_name, - fs_output_semantic_index, - output_flags, - /* tokenized result */ - tokens, ST_MAX_SHADER_TOKENS); - - assert(num_tokens < ST_MAX_SHADER_TOKENS); - - fs.tokens = (struct tgsi_token *) - _mesa_realloc(tokens, - ST_MAX_SHADER_TOKENS * sizeof *tokens, - num_tokens * sizeof *tokens); - - stfp->state = fs; /* struct copy */ - stfp->driver_shader = pipe->create_fs_state(pipe, &fs); - - if (0) + stfp->state.tokens = + st_translate_mesa_program(st->ctx, + TGSI_PROCESSOR_FRAGMENT, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + stfp->input_semantic_name, + stfp->input_semantic_index, + interpMode, + input_flags, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, + output_flags ); + + stfp->driver_shader = pipe->create_fs_state(pipe, &stfp->state); + + if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) { _mesa_print_program(&stfp->Base.Base); + debug_printf("\n"); + } - if (TGSI_DEBUG) - tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); + if (ST_DEBUG & DEBUG_TGSI) { + tgsi_dump( stfp->state.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); + debug_printf("\n"); + } } diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index bbc2830e69..3f30137747 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -342,12 +342,21 @@ st_texture_image_copy(struct pipe_context *pipe, src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i, PIPE_BUFFER_USAGE_GPU_READ); - pipe->surface_copy(pipe, - dst_surface, - 0, 0, /* destX, Y */ - src_surface, - 0, 0, /* srcX, Y */ - width, height); + if (pipe->surface_copy) { + pipe->surface_copy(pipe, + dst_surface, + 0, 0, /* destX, Y */ + src_surface, + 0, 0, /* srcX, Y */ + width, height); + } else { + util_surface_copy(pipe, FALSE, + dst_surface, + 0, 0, /* destX, Y */ + src_surface, + 0, 0, /* srcX, Y */ + width, height); + } pipe_surface_reference(&src_surface, NULL); pipe_surface_reference(&dst_surface, NULL); @@ -577,7 +586,6 @@ st_teximage_flush_before_map(struct st_context *st, pipe->is_texture_referenced(pipe, pt, face, level); if (referenced && ((referenced & PIPE_REFERENCED_FOR_WRITE) || - usage == PIPE_TRANSFER_WRITE || - usage == PIPE_TRANSFER_READ_WRITE)) + (usage & PIPE_TRANSFER_WRITE))) st_flush(st, PIPE_FLUSH_RENDER_CACHE, NULL); } |