From be37e8350f9b95fb50fbce3a2bd84098ad27e462 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 8 Apr 2008 21:38:44 -0600 Subject: gallium: remove obsolete/unused PIPE_ATTRIB_MAX --- src/gallium/include/pipe/p_state.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 2dc9a92186..79efc4f53d 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -55,7 +55,6 @@ extern "C" { #define PIPE_MAX_CLIP_PLANES 6 #define PIPE_MAX_CONSTANT 32 #define PIPE_MAX_ATTRIBS 32 -#define PIPE_ATTRIB_MAX 32 /* XXX obsolete - remove */ #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_FEEDBACK_ATTRIBS 16 -- cgit v1.2.3 From da8934034b33adef5dc41395cfde4bdd26ba207c Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 8 Apr 2008 21:43:36 -0600 Subject: gallium: re-order, clean-up PIPE_MAX_* definitions --- src/gallium/include/pipe/p_state.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 79efc4f53d..912d84e7b9 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -51,15 +51,14 @@ extern "C" { /** * Implementation limits */ -#define PIPE_MAX_SAMPLERS 16 -#define PIPE_MAX_CLIP_PLANES 6 -#define PIPE_MAX_CONSTANT 32 -#define PIPE_MAX_ATTRIBS 32 -#define PIPE_MAX_COLOR_BUFS 8 -#define PIPE_MAX_TEXTURE_LEVELS 16 -#define PIPE_MAX_FEEDBACK_ATTRIBS 16 -#define PIPE_MAX_SHADER_INPUTS 16 -#define PIPE_MAX_SHADER_OUTPUTS 16 +#define PIPE_MAX_ATTRIBS 32 +#define PIPE_MAX_CLIP_PLANES 6 +#define PIPE_MAX_COLOR_BUFS 8 +#define PIPE_MAX_CONSTANT 32 +#define PIPE_MAX_SAMPLERS 16 +#define PIPE_MAX_SHADER_INPUTS 16 +#define PIPE_MAX_SHADER_OUTPUTS 16 +#define PIPE_MAX_TEXTURE_LEVELS 16 /* fwd decls */ -- cgit v1.2.3 From f7e475280a0b98484a8c1a98f18b2733532486b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 14:17:48 -0600 Subject: gallium: fix bug in PIPE_BLENDFACTOR_INV_DST_ALPHA case --- src/gallium/drivers/softpipe/sp_quad_blend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 8be8025f40..802472df45 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -561,7 +561,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) case PIPE_BLENDFACTOR_INV_DST_ALPHA: { float inv_comp[4]; - VEC4_SUB(inv_comp, one, quadColor[3]); /* A */ + VEC4_SUB(inv_comp, one, dest[3]); /* A */ VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ -- cgit v1.2.3 From a52faa9325db178601811f4bdad6d9747de5f238 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:09:46 -0600 Subject: gallium: remove unneeded st->haveFramebufferSurfaces field. --- src/gallium/winsys/xlib/xm_api.c | 3 --- src/mesa/state_tracker/st_cb_drawpixels.c | 6 +----- src/mesa/state_tracker/st_context.c | 2 -- src/mesa/state_tracker/st_context.h | 6 ------ 4 files changed, 1 insertion(+), 16 deletions(-) diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/winsys/xlib/xm_api.c index a82d3c990e..0c248344b1 100644 --- a/src/gallium/winsys/xlib/xm_api.c +++ b/src/gallium/winsys/xlib/xm_api.c @@ -814,9 +814,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) mesaCtx->Const.CheckArrayBounds = GL_TRUE; #endif - /* finish up xmesa context initializations */ - c->st->haveFramebufferSurfaces = GL_TRUE; - return c; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index c57e05312a..b1affc4a91 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1017,11 +1017,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, srcy = ctx->DrawBuffer->Height - srcy - height; } - /* For some drivers (like Xlib) it's not possible to treat the - * front/back color buffers as surfaces (they're XImages and Pixmaps). - * So, this var tells us if we can use surface_copy here... - */ - if (st->haveFramebufferSurfaces && srcFormat == texFormat) { + if (srcFormat == texFormat) { /* copy source framebuffer surface into mipmap/texture */ pipe->surface_copy(pipe, FALSE, diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 7511c28074..154327239d 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -128,8 +128,6 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - st->haveFramebufferSurfaces = GL_TRUE; - st->pixel_xfer.cache = _mesa_new_program_cache(); /* GL limits and extensions */ diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index bcebbd4943..212687cf4a 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -111,12 +111,6 @@ struct st_context char vendor[100]; char renderer[100]; - /** Can we access the front/back color buffers as pipe_surfaces? - * We can't with the Xlib driver... - * This is a hack that should be fixed someday. - */ - GLboolean haveFramebufferSurfaces; - /* State to be validated: */ struct st_tracked_state **atoms; -- cgit v1.2.3 From 9c86c0e88b09370584f767747c52b7f352844ac5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:26:56 -0600 Subject: gallium: refactor code, new flush_front_buffer() function --- src/mesa/state_tracker/st_cb_flush.c | 42 +++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index c7efa40e38..1fc3a20d05 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -44,26 +44,12 @@ #include "pipe/p_winsys.h" -void st_flush( struct st_context *st, uint pipeFlushFlags, - struct pipe_fence_handle **fence ) -{ - FLUSH_VERTICES(st->ctx, 0); - - st_flush_bitmap_cache(st); - - st->pipe->flush( st->pipe, pipeFlushFlags, fence ); -} - - -static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, - struct pipe_fence_handle **fence ) +static void +flush_front_buffer(struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence) { GLframebuffer *fb = st->ctx->DrawBuffer; - st_flush_bitmap_cache(st); - - FLUSH_VERTICES(st->ctx, 0); - if (!fb) return; @@ -105,6 +91,28 @@ static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, } +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) +{ + FLUSH_VERTICES(st->ctx, 0); + + st_flush_bitmap_cache(st); + + st->pipe->flush( st->pipe, pipeFlushFlags, fence ); +} + + +static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) +{ + st_flush_bitmap_cache(st); + + FLUSH_VERTICES(st->ctx, 0); + + flush_front_buffer(st, pipeFlushFlags, fence); +} + + /** * Called via ctx->Driver.Flush() */ -- cgit v1.2.3 From 574f964667c5ec35f4832c839a9dcc24f92e2aab Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:30:28 -0600 Subject: gallium: fold st_gl_flush() into st_glFlush() --- src/mesa/state_tracker/st_cb_flush.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index 1fc3a20d05..d60ad7b82a 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -102,23 +102,16 @@ void st_flush( struct st_context *st, uint pipeFlushFlags, } -static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, - struct pipe_fence_handle **fence ) -{ - st_flush_bitmap_cache(st); - - FLUSH_VERTICES(st->ctx, 0); - - flush_front_buffer(st, pipeFlushFlags, fence); -} - - /** * Called via ctx->Driver.Flush() */ static void st_glFlush(GLcontext *ctx) { - st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + st_flush_bitmap_cache(ctx->st); + + FLUSH_VERTICES(ctx, 0); + + flush_front_buffer(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); } -- cgit v1.2.3 From aade2f41b0d5cf0fb44e094c0b10cfaf1f621aec Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:32:22 -0600 Subject: gallium: call_flush_front_buffer() from st_glFinish() --- src/mesa/state_tracker/st_cb_flush.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index d60ad7b82a..167c7ccbe2 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -132,6 +132,8 @@ void st_finish( struct st_context *st ) static void st_glFinish(GLcontext *ctx) { st_finish( ctx->st ); + + flush_front_buffer(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); } -- cgit v1.2.3 From 4ecbd5a70fba3e7d5f8b56ede34867ea5964afc6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:32:46 -0600 Subject: gallium: reorder funcs --- src/mesa/state_tracker/st_cb_flush.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index 167c7ccbe2..6bda877bb0 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -102,6 +102,18 @@ void st_flush( struct st_context *st, uint pipeFlushFlags, } +void st_finish( struct st_context *st ) +{ + struct pipe_fence_handle *fence = NULL; + + st_flush(st, PIPE_FLUSH_RENDER_CACHE, &fence); + + st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); + st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); +} + + + /** * Called via ctx->Driver.Flush() */ @@ -115,17 +127,6 @@ static void st_glFlush(GLcontext *ctx) } -void st_finish( struct st_context *st ) -{ - struct pipe_fence_handle *fence = NULL; - - st_flush(st, PIPE_FLUSH_RENDER_CACHE, &fence); - - st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); - st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); -} - - /** * Called via ctx->Driver.Finish() */ -- cgit v1.2.3 From 54d7c399a888283711bdc00f93cb54a3ce0b30eb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 16:59:14 -0600 Subject: gallium: more flush/finish changes New, separate is_front_buffer_dirty() function. --- src/mesa/state_tracker/st_cb_flush.c | 70 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index 6bda877bb0..1bcd1b4cd9 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -44,14 +44,13 @@ #include "pipe/p_winsys.h" -static void -flush_front_buffer(struct st_context *st, uint pipeFlushFlags, - struct pipe_fence_handle **fence) +static GLboolean +is_front_buffer_dirty(struct st_context *st) { GLframebuffer *fb = st->ctx->DrawBuffer; if (!fb) - return; + return GL_FALSE; /* XXX: temporary hack. This flag should only be set if we do any * rendering to the front buffer. @@ -68,26 +67,26 @@ flush_front_buffer(struct st_context *st, uint pipeFlushFlags, st->flags.frontbuffer_dirty = (fb->_ColorDrawBufferMask[0] & BUFFER_BIT_FRONT_LEFT); - if (st->flags.frontbuffer_dirty) { - struct st_renderbuffer *strb - = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); - struct pipe_surface *front_surf = strb->surface; - - /* If we aren't rendering to the frontbuffer, this is a noop. - * This should be uncontroversial for glFlush, though people may - * feel more strongly about glFinish. - * - * Additionally, need to make sure that the frontbuffer_dirty - * flag really gets set on frontbuffer rendering. - */ - st->pipe->flush( st->pipe, pipeFlushFlags, fence ); - - /* Hook for copying "fake" frontbuffer if necessary: - */ - st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, - st->pipe->priv ); - st->flags.frontbuffer_dirty = 0; - } + return st->flags.frontbuffer_dirty; +} + + +/** + * Tell the winsys to display the front color buffer on-screen. + */ +static void +display_front_buffer(struct st_context *st) +{ + GLframebuffer *fb = st->ctx->DrawBuffer; + struct st_renderbuffer *strb + = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + struct pipe_surface *front_surf = strb->surface; + + /* Hook for copying "fake" frontbuffer if necessary: + */ + st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, + st->pipe->priv ); + st->flags.frontbuffer_dirty = 0; } @@ -102,6 +101,9 @@ void st_flush( struct st_context *st, uint pipeFlushFlags, } +/** + * Flush, and wait for completion. + */ void st_finish( struct st_context *st ) { struct pipe_fence_handle *fence = NULL; @@ -119,11 +121,15 @@ void st_finish( struct st_context *st ) */ static void st_glFlush(GLcontext *ctx) { - st_flush_bitmap_cache(ctx->st); - - FLUSH_VERTICES(ctx, 0); + struct st_context *st = ctx->st; - flush_front_buffer(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + if (is_front_buffer_dirty(st)) { + st_finish(st); + display_front_buffer(st); + } + else { + st_flush(st, PIPE_FLUSH_RENDER_CACHE, NULL); + } } @@ -132,9 +138,13 @@ static void st_glFlush(GLcontext *ctx) */ static void st_glFinish(GLcontext *ctx) { - st_finish( ctx->st ); + struct st_context *st = ctx->st; - flush_front_buffer(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + st_finish(st); + + if (is_front_buffer_dirty(st)) { + display_front_buffer(st); + } } -- cgit v1.2.3 From 311e40268414649f047ee177ba22a17a2d437843 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Apr 2008 18:39:51 -0600 Subject: gallium: more elaborate tracking of front color buffer state This fixes the case where the app calls SwapBuffers then calls glReadPixels to read the front color buffer. We now keep track of when the front buffer is a _logically_ copy of the back buffer (after SwapBuffers) and read from the back color buffer instead of the front. --- src/mesa/state_tracker/st_atom_framebuffer.c | 8 +++++++ src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_flush.c | 27 ++++------------------- src/mesa/state_tracker/st_cb_readpixels.c | 33 ++++++++++++++++++++++++++-- src/mesa/state_tracker/st_cb_readpixels.h | 3 +++ src/mesa/state_tracker/st_context.h | 9 +++++--- src/mesa/state_tracker/st_framebuffer.c | 1 + 7 files changed, 54 insertions(+), 29 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 02573af8f0..8a95096ec9 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -81,6 +81,14 @@ update_framebuffer_state( struct st_context *st ) } cso_set_framebuffer(st->cso_context, framebuffer); + + if (fb->_ColorDrawBufferMask[0] & BUFFER_BIT_FRONT_LEFT) { + if (st->frontbuffer_status == FRONT_STATUS_COPY_OF_BACK) { + /* XXX copy back buf to front? */ + } + /* we're assuming we'll really draw to the front buffer */ + st->frontbuffer_status = FRONT_STATUS_DIRTY; + } } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index b1affc4a91..5f8c90cbba 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -961,7 +961,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, } if (type == GL_COLOR) { - rbRead = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + rbRead = st_get_color_read_renderbuffer(ctx); color = NULL; stfp = combined_drawpix_fragment_program(ctx); stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index 1bcd1b4cd9..1b3402cee2 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -44,30 +44,10 @@ #include "pipe/p_winsys.h" -static GLboolean +static INLINE GLboolean is_front_buffer_dirty(struct st_context *st) { - GLframebuffer *fb = st->ctx->DrawBuffer; - - if (!fb) - return GL_FALSE; - - /* XXX: temporary hack. This flag should only be set if we do any - * rendering to the front buffer. - * - * Further more, the scissor rectangle could be tracked to - * construct a dirty region of the front buffer, to avoid - * situations where it must be copied repeatedly. - * - * In the extreme case, some kind of timer could be set up to allow - * coalescing of multiple flushes to the frontbuffer, which can be - * quite a performance drain if there are a sufficient number of - * them. - */ - st->flags.frontbuffer_dirty - = (fb->_ColorDrawBufferMask[0] & BUFFER_BIT_FRONT_LEFT); - - return st->flags.frontbuffer_dirty; + return st->frontbuffer_status == FRONT_STATUS_DIRTY; } @@ -86,7 +66,8 @@ display_front_buffer(struct st_context *st) */ st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, st->pipe->priv ); - st->flags.frontbuffer_dirty = 0; + + st->frontbuffer_status = FRONT_STATUS_UNDEFINED; } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 2bcc8c99fb..29a06ffaa9 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -128,6 +128,34 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } +/** + * Return renderbuffer to use for reading color pixels for glRead/CopyPixel + * commands. + * Special care is needed for the front buffer. + */ +struct st_renderbuffer * +st_get_color_read_renderbuffer(GLcontext *ctx) +{ + struct gl_framebuffer *fb = ctx->ReadBuffer; + struct st_renderbuffer *strb = + st_renderbuffer(fb->_ColorReadBuffer); + struct st_renderbuffer *front = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + + if (strb == front + && ctx->st->frontbuffer_status == FRONT_STATUS_COPY_OF_BACK) { + /* reading from front color buffer, which is a logical copy of the + * back color buffer. + */ + struct st_renderbuffer *back = + st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + strb = back; + } + + return strb; +} + + /** * Do glReadPixels by getting rows from the framebuffer surface with * get_tile(). Convert to requested format/type with Mesa image routines. @@ -173,12 +201,13 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, strb = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); } else { - strb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + /* Read color buffer */ + strb = st_get_color_read_renderbuffer(ctx); } + if (!strb) return; - if (format == GL_RGBA && type == GL_FLOAT) { /* write tile(row) directly into user's buffer */ df = (GLfloat *) _mesa_image_address2d(&clippedPacking, dest, width, diff --git a/src/mesa/state_tracker/st_cb_readpixels.h b/src/mesa/state_tracker/st_cb_readpixels.h index 79acdad88e..9e151be51f 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.h +++ b/src/mesa/state_tracker/st_cb_readpixels.h @@ -29,6 +29,9 @@ #ifndef ST_CB_READPIXELS_H #define ST_CB_READPIXELS_H +extern struct st_renderbuffer * +st_get_color_read_renderbuffer(GLcontext *ctx); + extern void st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum type, diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 212687cf4a..d89e54c43c 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -45,6 +45,11 @@ struct blit_state; struct bitmap_cache; +#define FRONT_STATUS_UNDEFINED 0 +#define FRONT_STATUS_DIRTY 1 +#define FRONT_STATUS_COPY_OF_BACK 2 + + #define ST_NEW_MESA 0x1 /* Mesa state has changed */ #define ST_NEW_FRAGMENT_PROGRAM 0x2 #define ST_NEW_VERTEX_PROGRAM 0x4 @@ -104,9 +109,7 @@ struct st_context struct gl_fragment_program *fragment_program; } cb; - struct { - GLuint frontbuffer_dirty:1; - } flags; + GLuint frontbuffer_status; /**< one of FRONT_STATUS_ */ char vendor[100]; char renderer[100]; diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index ea09d9234c..47466c97d8 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -188,6 +188,7 @@ st_notify_swapbuffers(struct st_framebuffer *stfb) st_flush( ctx->st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_SWAPBUFFERS, NULL ); + ctx->st->frontbuffer_status = FRONT_STATUS_COPY_OF_BACK; } } -- cgit v1.2.3 From 7a8ad75c89b45520043693a37d9f0c7e0b24fc5d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Apr 2008 10:30:46 -0600 Subject: gallium: fix readback of z16 values --- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 29a06ffaa9..1fef55b844 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -269,7 +269,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, } else if (strb->surface->format == PIPE_FORMAT_Z16_UNORM) { for (i = 0; i < height; i++) { - GLshort ztemp[MAX_WIDTH], j; + GLushort ztemp[MAX_WIDTH], j; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffff; pipe_get_tile_raw(pipe, strb->surface, x, y, width, 1, ztemp, 0); -- cgit v1.2.3 From c95dcc49629b72b95826e87e067d7a48753605fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 8 Apr 2008 17:59:28 +0100 Subject: remove usage of vertex_header --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 - src/gallium/auxiliary/draw/draw_vbuf.c | 20 - src/gallium/auxiliary/draw/draw_vertex.c | 5 - src/gallium/auxiliary/draw/draw_vertex.h | 2 - src/gallium/auxiliary/draw/draw_vf.c | 56 +- src/gallium/drivers/softpipe/Makefile | 1 + src/gallium/drivers/softpipe/SConscript | 1 + src/gallium/drivers/softpipe/sp_prim_setup.c | 1189 +-------------------- src/gallium/drivers/softpipe/sp_prim_setup.h | 6 + src/gallium/drivers/softpipe/sp_prim_vbuf.c | 223 ++-- src/gallium/drivers/softpipe/sp_setup.c | 1249 +++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_setup.h | 53 + src/gallium/drivers/softpipe/sp_state_derived.c | 13 +- 13 files changed, 1480 insertions(+), 1344 deletions(-) create mode 100644 src/gallium/drivers/softpipe/sp_setup.c create mode 100644 src/gallium/drivers/softpipe/sp_setup.h diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0806076956..e4e144ef19 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -212,12 +212,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, case EMIT_1F: feme->fetch[i].emit = emit_R32_FLOAT; break; - case EMIT_HEADER: - feme->fetch[i].ptr = (const ubyte *)&zero; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; - break; case EMIT_1F_PSIZE: feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; feme->fetch[i].pitch = 0; diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index f83b441e93..e3216ff711 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -126,13 +126,6 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; - case EMIT_ALL: - assert(i == 0); - assert(j == 0); - debug_printf("EMIT_ALL:\t"); - for(k = 0; k < vinfo->size*4; ++k) - debug_printf("%02x ", *data++); - break; case EMIT_1F: debug_printf("EMIT_1F:\t"); debug_printf("%f ", *(float *)data); data += sizeof(float); @@ -217,19 +210,6 @@ emit_vertex( struct vbuf_stage *vbuf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: - /* just copy the whole vertex as-is to the vbuf */ - assert(i == 0); - assert(j == 0); - memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); - vbuf->vertex_ptr += vinfo->size; - count += vinfo->size; - break; - case EMIT_HEADER: - memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex)); - *vbuf->vertex_ptr += sizeof(*vertex) / 4; - count += sizeof(*vertex) / 4; - break; case EMIT_1F: *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); count++; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 970adc95e7..168036eee8 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -52,9 +52,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) switch (vinfo->emit[i]) { case EMIT_OMIT: break; - case EMIT_HEADER: - vinfo->size += sizeof(struct vertex_header) / 4; - break; case EMIT_4UB: /* fall-through */ case EMIT_1F_PSIZE: @@ -71,8 +68,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) case EMIT_4F: vinfo->size += 4; break; - case EMIT_ALL: - /* fall-through */ default: assert(0); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index abd2017ed3..65818463ca 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -47,8 +47,6 @@ */ enum attrib_emit { EMIT_OMIT, /**< don't emit the attribute */ - EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ - EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */ EMIT_1F, EMIT_1F_PSIZE, /**< insert constant point size */ EMIT_2F, diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index 7bb34ace7a..9d0154c50d 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -205,7 +205,7 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, const struct vertex_info *vinfo, float point_size ) { - unsigned i, j, k; + unsigned i, j; struct draw_vf_attr *a = vf->attr; struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; unsigned count = 0; /* for debug/sanity */ @@ -217,60 +217,6 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: { - /* just copy the whole vertex as-is to the vbuf */ - unsigned s = vinfo->size; - assert(i == 0); - assert(j == 0); - /* copy the vertex header */ - /* XXX: we actually don't copy the header, just pad it */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - s -= offsetof(struct vertex_header, data)/4; - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - /* copy the vertex data */ - for(k = 0; k < (s & ~0x3); k += 4) { - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - } - /* tail */ - /* XXX: actually, this shouldn't be needed */ - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].offset = 0; - switch(s & 0x3) { - case 0: - break; - case 1: - attrs[nr_attrs].format = DRAW_EMIT_1F; - nr_attrs++; - count += 1; - break; - case 2: - attrs[nr_attrs].format = DRAW_EMIT_2F; - nr_attrs++; - count += 2; - break; - case 3: - attrs[nr_attrs].format = DRAW_EMIT_3F; - nr_attrs++; - count += 3; - break; - } - break; - } - case EMIT_HEADER: - /* XXX emit new DRAW_EMIT_HEADER attribute??? */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - break; case EMIT_1F: attrs[nr_attrs].attrib = j; attrs[nr_attrs].format = DRAW_EMIT_1F; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 19dfd8c1a7..120bdfd9dd 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ sp_quad_stencil.c \ sp_quad_stipple.c \ sp_screen.c \ + sp_setup.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 06931fa8d8..c1f7daa8ab 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -14,6 +14,7 @@ softpipe = env.ConvenienceLibrary( 'sp_flush.c', 'sp_prim_setup.c', 'sp_prim_vbuf.c', + 'sp_setup.c', 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad.c', diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index c7eb12b3bb..6fe463b74c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -26,7 +26,9 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline. One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. * * \author Keith Whitwell * \author Brian Paul @@ -34,29 +36,12 @@ #include "sp_context.h" -#include "sp_headers.h" -#include "sp_quad.h" +#include "sp_setup.h" #include "sp_state.h" #include "sp_prim_setup.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" - -#define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 - -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - /** * Triangle setup info (derived from draw_stage). @@ -65,39 +50,7 @@ struct edge { struct setup_stage { struct draw_stage stage; /**< This must be first (base class) */ - struct softpipe_context *softpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - struct tgsi_interp_coef posCoef; /* For Z, W */ - struct quad_header quad; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif + struct setup_context *setup; }; @@ -111,1112 +64,50 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) } -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { - /* totally clipped */ - setup->quad.mask = 0x0; - return; - } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup->quad.mask) { - struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); - } -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - */ -static INLINE void -emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) -{ - struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; -#if DEBUG_FRAGS - if (mask & 1) setup->numFragsEmitted++; - if (mask & 2) setup->numFragsEmitted++; - if (mask & 4) setup->numFragsEmitted++; - if (mask & 8) setup->numFragsEmitted++; -#endif - sp->quad.first->run(sp->quad.first, &setup->quad); -#if DEBUG_FRAGS - mask = setup->quad.mask; - if (mask & 1) setup->numFragsWritten++; - if (mask & 2) setup->numFragsWritten++; - if (mask & 4) setup->numFragsWritten++; - if (mask & 8) setup->numFragsWritten++; -#endif -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~1; -} - - -/** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. - */ -static unsigned calculate_mask( struct setup_stage *setup, int x ) -{ - unsigned mask = 0x0; - - if (x >= setup->span.left[0] && x < setup->span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup->span.left[1] && x < setup->span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - - return mask; -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_stage *setup ) -{ - int minleft, maxright; - int x; - - switch (setup->span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup->span.left[0], setup->span.left[1]); - maxright = MAX2(setup->span.right[0], setup->span.right[1]); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = setup->span.left[0]; - maxright = setup->span.right[0]; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = setup->span.left[1]; - maxright = setup->span.right[1]; - break; - - default: - return; - } - - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( setup, x, setup->span.y, - calculate_mask( setup, x ) ); - } - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; -} - -#if DEBUG_VERTS -static void print_vertex(const struct setup_stage *setup, - const struct vertex_header *v) -{ - int i; - debug_printf("Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); - } -} -#endif - -static boolean setup_sort_vertices( struct setup_stage *setup, - const struct prim_header *prim ) -{ - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; - const struct vertex_header *v2 = prim->v[2]; - -#if DEBUG_VERTS - debug_printf("Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0->data[0][1]; - float y1 = v1->data[0][1]; - float y2 = v2->data[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; - setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; - setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, prim->det ); - */ - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - - coef->dadx[i] = 0; - coef->dady[i] = 0; - /* need provoking vertex info! - */ - coef->a0[i] = setup->vprovoke->data[vertSlot][i]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* premultiply by 1/w (v->data[0][3] is always W): - */ - float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin->data[vertSlot][i], - setup->vmid->data[vertSlot][i], - setup->vmax->data[vertSlot][i] - ); - */ - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (mina - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ static void -setup_fragcoord_coeff(struct setup_stage *setup, uint slot) -{ - /*X*/ - setup->coef[slot].a0[0] = 0; - setup->coef[slot].dadx[0] = 1.0; - setup->coef[slot].dady[0] = 0.0; - /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } - setup->coef[slot].dadx[1] = 0.0; - /*Z*/ - setup->coef[slot].a0[2] = setup->posCoef.a0[2]; - setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; - setup->coef[slot].dady[2] = setup->posCoef.dady[2]; - /*W*/ - setup->coef[slot].a0[3] = setup->posCoef.a0[3]; - setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; - setup->coef[slot].dady[3] = setup->posCoef.dady[3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_stage *setup ) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_linear_coeff(setup, &setup->posCoef, 0, 2); - tri_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_stage *setup ) -{ - float vmin_x = setup->vmin->data[0][0] + 0.5f; - float vmid_x = setup->vmid->data[0][0] + 0.5f; - - float vmin_y = setup->vmin->data[0][1] - 0.5f; - float vmid_y = setup->vmid->data[0][1] - 0.5f; - float vmax_y = setup->vmax->data[0][1] - 0.5f; - - setup->emaj.sy = CEILF(vmin_y); - setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = CEILF(vmid_y); - setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = CEILF(vmin_y); - setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_stage *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -static void setup_tri( struct draw_stage *stage, - struct prim_header *prim ) +do_tri(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - setup_sort_vertices( setup, prim ); - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - setup->quad.prim = PRIM_TRI; - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE void -setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* use setup->vmin, vmax to point to vertices */ - setup->vprovoke = prim->v[1]; - setup->vmin = prim->v[0]; - setup->vmax = prim->v[1]; - - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - /* NOTE: this is not really 1/area */ - setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + - setup->emaj.dy * setup->emaj.dy); - - /* z and w are done by linear interpolation: - */ - line_linear_coeff(setup, &setup->posCoef, 0, 2); - line_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } + setup_tri( setup->setup, + prim->det, + prim->v[0]->data, + prim->v[1]->data, + prim->v[2]->data ); } - -/** - * Plot a pixel in a line segment. - */ -static INLINE void -plot(struct setup_stage *setup, int x, int y) -{ - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad.x0 || - quadY != setup->quad.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad.x0 != -1) - clip_emit_quad(setup); - - setup->quad.x0 = quadX; - setup->quad.y0 = quadY; - setup->quad.mask = 0x0; - } - - setup->quad.mask |= mask; -} - - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ static void -setup_line(struct draw_stage *stage, struct prim_header *prim) +do_line(struct draw_stage *stage, struct prim_header *prim) { - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; struct setup_stage *setup = setup_stage( stage ); - int x0 = (int) v0->data[0][0]; - int x1 = (int) v1->data[0][0]; - int y0 = (int) v0->data[0][1]; - int y1 = (int) v1->data[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - if (dx == 0 && dy == 0) - return; - - setup_line_coefficients(setup, prim); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - - setup->quad.x0 = setup->quad.y0 = -1; - setup->quad.mask = 0x0; - setup->quad.prim = PRIM_LINE; - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad.coverage[0] = - setup->quad.coverage[1] = - setup->quad.coverage[2] = - setup->quad.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad.mask) { - clip_emit_quad(setup); - } + setup_line( setup->setup, + prim->v[0]->data, + prim->v[1]->data ); } - static void -point_persp_coeff(struct setup_stage *setup, - const struct vertex_header *vert, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - coef->dadx[i] = 0.0F; - coef->dady[i] = 0.0F; - coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; -} - - -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. - */ -static void -setup_point(struct draw_stage *stage, struct prim_header *prim) +do_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_header *v0 = prim->v[0]; - const int sizeAttr = setup->softpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0->data[sizeAttr][0] - : setup->softpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; - const float x = v0->data[0][0]; /* Note: data[0] is always position */ - const float y = v0->data[0][1]; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = prim->v[0]; - - /* setup Z, W */ - const_coeff(setup, &setup->posCoef, 0, 2); - const_coeff(setup, &setup->posCoef, 0, 3); - - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - point_persp_coeff(setup, setup->vprovoke, - &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } - - setup->quad.prim = PRIM_POINT; - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad.x0 = (int) x - ix; - setup->quad.y0 = (int) y - iy; - setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_RIGHT; - } - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad.mask) { - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad.mask = mask; - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } + setup_point( setup->setup, + prim->v[0]->data ); } + static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); - struct softpipe_context *sp = setup->softpipe; - const struct sp_fragment_shader *fs = setup->softpipe->fs; - uint i; - - if (sp->dirty) { - softpipe_update_derived(sp); - } - /* Mark surfaces as defined now */ - for (i = 0; i < sp->framebuffer.num_cbufs; i++){ - if (sp->framebuffer.cbufs[i]) { - sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; - } - } - if (sp->framebuffer.zsbuf) { - sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; - } + setup_prepare( setup->setup ); - setup->quad.nr_attrs = fs->info.num_inputs; - - sp->quad.first->begin(sp->quad.first); - - stage->point = setup_point; - stage->line = setup_line; - stage->tri = setup_tri; + stage->point = do_point; + stage->line = do_line; + stage->tri = do_tri; } @@ -1269,19 +160,29 @@ static void render_destroy( struct draw_stage *stage ) */ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) { - struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - setup->softpipe = softpipe; - setup->stage.draw = softpipe->draw; - setup->stage.point = setup_first_point; - setup->stage.line = setup_first_line; - setup->stage.tri = setup_first_tri; - setup->stage.flush = setup_flush; - setup->stage.reset_stipple_counter = reset_stipple_counter; - setup->stage.destroy = render_destroy; + sstage->setup = setup_create_context(softpipe); + sstage->stage.draw = softpipe->draw; + sstage->stage.point = setup_first_point; + sstage->stage.line = setup_first_line; + sstage->stage.tri = setup_first_tri; + sstage->stage.flush = setup_flush; + sstage->stage.reset_stipple_counter = reset_stipple_counter; + sstage->stage.destroy = render_destroy; - setup->quad.coef = setup->coef; - setup->quad.posCoef = &setup->posCoef; + return (struct draw_stage *)sstage; +} - return &setup->stage; +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ + stage->flush( stage, 0 ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h index f3e8a79dd9..49bdd98ed8 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.h +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -69,6 +69,12 @@ typedef void (*vbuf_draw_func)( struct pipe_context *pipe, extern struct draw_stage * sp_draw_render_stage( struct softpipe_context *softpipe ); +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + extern struct draw_stage * sp_draw_vbuf_stage( struct draw_context *draw_context, diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index d940718ed2..025a0113bf 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** @@ -30,7 +30,7 @@ * softpipe rendering pipeline. * Probably not desired in general, but useful for testing/debuggin. * Enabled/Disabled with SP_VBUF env var. - * + * * Authors * Brian Paul */ @@ -39,14 +39,16 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 #define SP_MAX_VBUF_SIZE 4096 +typedef const float (*cptrf4)[4]; /** * Subclass of vbuf_render. @@ -69,7 +71,6 @@ softpipe_vbuf_render(struct vbuf_render *vbr) } - static const struct vertex_info * sp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -91,7 +92,7 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void -sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, unsigned vertex_size, unsigned vertices_used) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); @@ -114,7 +115,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) else { return FALSE; } - + } @@ -122,22 +123,19 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) * Recalculate prim's determinant. * XXX is this needed? */ -static void -calc_det(struct prim_header *header) +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) { - /* Window coords: */ - const float *v0 = header->v[0]->data[0]; - const float *v1 = header->v[1]->data[0]; - const float *v2 = header->v[2]->data[0]; - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0] - v2[0]; - const float ey = v0[1] - v2[1]; - const float fx = v1[0] - v2[0]; - const float fy = v1[1] - v2[1]; - + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + /* det = cross(e,f).z */ - header->det = ex * fy - ey * fx; + return ex * fy - ey * fx; } @@ -147,48 +145,51 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i, j; void *vertex_buffer = cvbr->vertex_buffer; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + cptrf4 v[3]; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); switch (cvbr->prim) { case PIPE_PRIM_TRIANGLES: for (i = 0; i < nr_indices; i += 3) { - for (j = 0; j < 3; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - calc_det(&prim); - setup->tri( setup, &prim ); + for (j = 0; j < 3; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2]); } break; case PIPE_PRIM_LINES: for (i = 0; i < nr_indices; i += 2) { - for (j = 0; j < 2; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - setup->line( setup, &prim ); + for (j = 0; j < 2; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_line( setup_ctx, + v[0], + v[1] ); } break; case PIPE_PRIM_POINTS: for (i = 0; i < nr_indices; i++) { - prim.v[0] = (struct vertex_header *)((char *)vertex_buffer + - indices[i] * vertex_size); - setup->point( setup, &prim ); + v[0] = (cptrf4)((char *)vertex_buffer + + indices[i] * vertex_size); + + setup_point( setup_ctx, + v[0] ); } break; } - setup->flush( setup, 0 ); + sp_draw_flush( setup ); } @@ -202,111 +203,131 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; const void *vertex_buffer = cvbr->vertex_buffer; const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + cptrf4 v[3]; #define VERTEX(I) \ - (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) + (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - prim.v[0] = VERTEX(i); - setup->point( setup, &prim ); + v[0] = VERTEX(i); + setup_point( setup_ctx, v[0] ); } break; case PIPE_PRIM_LINES: assert(nr % 2 == 0); for (i = 0; i < nr; i += 2) { - prim.v[0] = VERTEX(i); - prim.v[1] = VERTEX(i + 1); - setup->line( setup, &prim ); + v[0] = VERTEX(i); + v[1] = VERTEX(i + 1); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i++) { - prim.v[0] = VERTEX(i - 1); - prim.v[1] = VERTEX(i); - setup->line( setup, &prim ); + v[0] = VERTEX(i - 1); + v[1] = VERTEX(i); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_TRIANGLES: assert(nr % 3 == 0); for (i = 0; i < nr; i += 3) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_STRIP: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_FAN: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUADS: assert(nr % 4 == 0); for (i = 0; i < nr; i += 4) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 2); - prim.v[2] = VERTEX(i + 3); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 2); + v[2] = VERTEX(i + 3); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUAD_STRIP: assert(nr >= 4); for (i = 2; i < nr; i += 2) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i); - prim.v[2] = VERTEX(i + 1); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i - 1); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i); + v[2] = VERTEX(i + 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i - 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_POLYGON: /* draw as tri fan */ for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; default: diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 0000000000..48617a66ed --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1249 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell + * \author Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const float (*vmax)[4]; + const float (*vmid)[4]; + const float (*vmin)[4]; + const float (*vprovoke)[4]; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif +}; + + + + + +/** + * Recalculate prim's determinant. + * XXX is this needed? + */ +static INLINE float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_context *setup) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup->quad.x0 >= maxx || + setup->quad.y0 >= maxy || + setup->quad.x0 + 1 < minx || + setup->quad.y0 + 1 < miny) { + /* totally clipped */ + setup->quad.mask = 0x0; + return; + } + if (setup->quad.x0 < minx) + setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup->quad.y0 < miny) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup->quad.x0 == maxx - 1) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup->quad.y0 == maxy - 1) + setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_context *setup) +{ + quad_clip(setup); + if (setup->quad.mask) { + struct softpipe_context *sp = setup->softpipe; + sp->quad.first->run(sp->quad.first, &setup->quad); + } +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +{ + struct softpipe_context *sp = setup->softpipe; + setup->quad.x0 = x; + setup->quad.y0 = y; + setup->quad.mask = mask; +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif + sp->quad.first->run(sp->quad.first, &setup->quad); +#if DEBUG_FRAGS + mask = setup->quad.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * + * this is pretty nasty... may need to rework flush_spans again to + * fix it, if possible. + */ +static unsigned calculate_mask( struct setup_context *setup, int x ) +{ + unsigned mask = 0x0; + + if (x >= setup->span.left[0] && x < setup->span.right[0]) + mask |= MASK_TOP_LEFT; + + if (x >= setup->span.left[1] && x < setup->span.right[1]) + mask |= MASK_BOTTOM_LEFT; + + if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) + mask |= MASK_TOP_RIGHT; + + if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) + mask |= MASK_BOTTOM_RIGHT; + + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup->span.left[0], setup->span.left[1]); + maxright = MAX2(setup->span.right[0], setup->span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup->span.left[0]; + maxright = setup->span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup->span.left[1]; + maxright = setup->span.right[1]; + break; + + default: + return; + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( setup, x, setup->span.y, + calculate_mask( setup, x ) ); + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, + const float (*v)[4]) +{ + int i; + debug_printf("Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v[i][0], v[i][1], v[i][2], v[i][3]); + } +} +#endif + +static boolean setup_sort_vertices( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0[0][1]; + float y1 = v1[0][1]; + float y2 = v2[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; + setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; + setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, det ); + */ + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; + float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v[0][3] is always W): + */ + float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; + float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin[vertSlot][i], + setup->vmid[vertSlot][i], + setup->vmax[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ + /*X*/ + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.height; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; + } + setup->coef[slot].dadx[1] = 0.0; + /*Z*/ + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ + float vmin_x = setup->vmin[0][0] + 0.5f; + float vmid_x = setup->vmid[0][0] + 0.5f; + + float vmin_y = setup->vmin[0][1] - 0.5f; + float vmid_y = setup->vmid[0][1] - 0.5f; + float vmax_y = setup->vmax[0][1] - 0.5f; + + setup->emaj.sy = CEILF(vmin_y); + setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = CEILF(vmid_y); + setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = CEILF(vmin_y); + setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + + setup_sort_vertices( setup, calc_det(v0, v1, v2), + v0, v1, v2 ); + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE void +setup_line_coefficients(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = v1; + setup->vmin = v0; + setup->vmax = v1; + + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + /* NOTE: this is not really 1/area */ + setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + + setup->emaj.dy * setup->emaj.dy); + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.x0 || + quadY != setup->quad.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.x0 != -1) + clip_emit_quad(setup); + + setup->quad.x0 = quadX; + setup->quad.y0 = quadY; + setup->quad.mask = 0x0; + } + + setup->quad.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + int x0 = (int) v0[0][0]; + int x1 = (int) v1[0][0]; + int y0 = (int) v0[0][1]; + int y1 = (int) v1[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + + if (dx == 0 && dy == 0) + return; + + setup_line_coefficients(setup, v0, v1); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.x0 = setup->quad.y0 = -1; + setup->quad.mask = 0x0; + setup->quad.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.coverage[0] = + setup->quad.coverage[1] = + setup->quad.coverage[2] = + setup->quad.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.mask) { + clip_emit_quad(setup); + } +} + + +static void +point_persp_coeff(struct setup_context *setup, + const float (*vert)[4], + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0[0][0]; /* Note: data[0] is always position */ + const float y = v0[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = v0; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.x0 = (int) x - ix; + setup->quad.y0 = (int) y - iy; + setup->quad.mask = (1 << ix) << (2 * iy); + clip_emit_quad(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.mask) { + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.mask = mask; + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } +} + +void setup_prepare( struct setup_context *setup ) +{ + struct softpipe_context *sp = setup->softpipe; + unsigned i; + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + + { + const struct sp_fragment_shader *fs = setup->softpipe->fs; + setup->quad.nr_attrs = fs->info.num_inputs; + sp->quad.first->begin(sp->quad.first); + } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ + FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ + struct setup_context *setup = CALLOC_STRUCT(setup_context); + + setup->softpipe = softpipe; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + + return setup; +} diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 0000000000..3133fc2a3d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 14abb20eeb..f10a1fa471 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -71,25 +71,16 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * simply emit the whole post-xform vertex as-is: */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; -#if 0 - vinfo_vbuf->num_attribs = 0; - /* special-case to allow memcpy of whole vertex */ - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - /* size in dwords or floats */ - vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) - + sizeof(struct vertex_header) / 4; -#else - /* for pass-through mode, we need a more explicit list of attribs */ const uint num = draw_num_vs_outputs(softpipe->draw); uint i; + /* No longer any need to try and emit draw vertex_header info. + */ vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); for (i = 0; i < num; i++) { draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } draw_compute_vertex_size(vinfo_vbuf); -#endif } /* -- cgit v1.2.3 From bc56e87ce180ddca63bcb9774366fc380214a2ef Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Apr 2008 22:57:21 +0900 Subject: gallium: Attribute realloc leaks to the first malloc call. --- src/gallium/auxiliary/util/p_debug_mem.c | 71 +++++++++++++++++++++++++----- src/gallium/auxiliary/util/u_double_list.h | 8 ++++ 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 97ec9c5b39..aba69c0294 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -73,6 +73,25 @@ static struct list_head list = { &list, &list }; static unsigned long last_no = 0; +static INLINE struct debug_memory_header * +header_from_data(void *data) +{ + if(data) + return (struct debug_memory_header *)((char *)data - sizeof(struct debug_memory_header)); + else + return NULL; +} + +static INLINE void * +data_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (void *)((char *)hdr + sizeof(struct debug_memory_header)); + else + return NULL; +} + + void * debug_malloc(const char *file, unsigned line, const char *function, size_t size) @@ -92,7 +111,7 @@ debug_malloc(const char *file, unsigned line, const char *function, LIST_ADDTAIL(&hdr->head, &list); - return (void *)((char *)hdr + sizeof(*hdr)); + return data_from_header(hdr); } void @@ -104,7 +123,7 @@ debug_free(const char *file, unsigned line, const char *function, if(!ptr) return; - hdr = (struct debug_memory_header *)((char *)ptr - sizeof(*hdr)); + hdr = header_from_data(ptr); if(hdr->magic != DEBUG_MEMORY_MAGIC) { debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n", file, line, function, @@ -133,16 +152,46 @@ void * debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ) { - void *new_ptr = NULL; - - if (new_size != 0) { - new_ptr = debug_malloc( file, line, function, new_size ); - - if( new_ptr && old_ptr ) - memcpy( new_ptr, old_ptr, old_size ); + struct debug_memory_header *old_hdr, *new_hdr; + void *new_ptr; + + if(!old_ptr) + return debug_malloc( file, line, function, new_size ); + + if(!new_size) { + debug_free( file, line, function, old_ptr ); + return NULL; } + + old_hdr = header_from_data(old_ptr); + if(old_hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n", + file, line, function, + old_ptr); + debug_assert(0); + return NULL; + } + + /* alloc new */ + new_hdr = real_malloc(sizeof(*new_hdr) + new_size); + if(!new_hdr) + return NULL; + new_hdr->no = old_hdr->no; + new_hdr->file = old_hdr->file; + new_hdr->line = old_hdr->line; + new_hdr->function = old_hdr->function; + new_hdr->size = new_size; + new_hdr->magic = DEBUG_MEMORY_MAGIC; + LIST_REPLACE(&old_hdr->head, &new_hdr->head); + + /* copy data */ + new_ptr = data_from_header(new_hdr); + memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); + + /* free old */ + old_hdr->magic = 0; + real_free(old_hdr); - debug_free( file, line, function, old_ptr ); return new_ptr; } @@ -162,7 +211,7 @@ debug_memory_end(unsigned long start_no) struct debug_memory_header *hdr; void *ptr; hdr = LIST_ENTRY(struct debug_memory_header, entry, head); - ptr = (void *)((char *)hdr + sizeof(*hdr)); + ptr = data_from_header(hdr); if(start_no <= hdr->no && hdr->no < last_no || last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) debug_printf("%s:%u:%s: %u bytes at %p not freed\n", diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 8cb10c9820..d108d92e52 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -70,6 +70,14 @@ struct list_head (__list)->prev = (__item); \ } while(0) +#define LIST_REPLACE(__from, __to) \ + do { \ + (__to)->prev = (__from)->prev; \ + (__to)->next = (__from)->next; \ + (__from)->next->prev = (__to); \ + (__from)->prev->next = (__to); \ + } while (0) + #define LIST_DEL(__item) \ do { \ (__item)->prev->next = (__item)->next; \ -- cgit v1.2.3 From 87b0b8e7bc3b4ce4be90a271b9fced1c97df6c95 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 7 Apr 2008 16:53:41 -0600 Subject: gallium: flags param to surface_alloc_storage() is unused and deprecated --- src/gallium/include/pipe/p_winsys.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 8569cdcf12..bf5aad7351 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -80,6 +80,7 @@ struct pipe_winsys /** * Allocate storage for a pipe_surface. * Returns 0 if succeeds. + * XXX: flags is unused and will be removed someday. */ int (*surface_alloc_storage)(struct pipe_winsys *ws, struct pipe_surface *surf, -- cgit v1.2.3 From 544c236db10a0d4889014cd82e81b1de3451b5db Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 10 Apr 2008 18:28:43 -0600 Subject: mesa: add a new test for glDrawPixels(GL_DEPTH_COMPONENT) --- progs/tests/Makefile | 1 + progs/tests/zdrawpix.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 progs/tests/zdrawpix.c diff --git a/progs/tests/Makefile b/progs/tests/Makefile index 27e2eafdfc..e04b95ce08 100644 --- a/progs/tests/Makefile +++ b/progs/tests/Makefile @@ -84,6 +84,7 @@ SOURCES = \ vpwarpmesh.c \ yuvrect.c \ yuvsquare.c \ + zdrawpix.c \ zreaddraw.c PROGS = $(SOURCES:%.c=%) diff --git a/progs/tests/zdrawpix.c b/progs/tests/zdrawpix.c new file mode 100644 index 0000000000..dd222e7dd0 --- /dev/null +++ b/progs/tests/zdrawpix.c @@ -0,0 +1,192 @@ +/** + * Test glDrawPixels(GL_DEPTH_COMPONENT) + * + * We load a window-sized buffer of Z values so that Z=1 at the top and + * Z=0 at the bottom (and interpolate between). + * We draw that image into the Z buffer, then draw an ordinary cube. + * The bottom part of the cube should be "clipped" where the cube fails + * the Z test. + * + * Press 'd' to view the Z buffer as a grayscale image. + */ + +#define GL_GLEXT_PROTOTYPES +#include +#include +#include +#include +#include "../util/showbuffer.c" + + +static int Win; +static GLfloat Xrot = 50, Yrot = 40, Zpos = 6; +static GLboolean Anim = GL_FALSE; + +static int Width = 200, Height = 200; +static GLfloat *z; +static GLboolean showZ = 0; + + +static void +Idle(void) +{ + Xrot += 3.0; + Yrot += 4.0; + glutPostRedisplay(); +} + + +static void +Draw(void) +{ + glClearColor(0, 0, 0.5, 0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + +#if 1 + glColor3f(1, 0, 0); + glWindowPos2i(0,0); + glColorMask(0,0,0,0); + glDrawPixels(Width, Height, GL_DEPTH_COMPONENT, GL_FLOAT, z); +#elif 0 + glPushMatrix(); + glTranslatef(-0.75, 0, Zpos); + glutSolidSphere(1.0, 20, 10); + glPopMatrix(); +#endif + glColorMask(1,1,1,1); + + /* draw cube */ + glPushMatrix(); + glTranslatef(0, 0, Zpos); + glRotatef(Xrot, 1, 0, 0); + glRotatef(Yrot, 0, 1, 0); + glutSolidCube(2.0); + glPopMatrix(); + +#if 0 + /* drawpixels after cube */ + glColor3f(1, 0, 0); + glWindowPos2i(0,0); + //glColorMask(0,0,0,0); + glDrawPixels(Width, Height, GL_DEPTH_COMPONENT, GL_FLOAT, z); +#endif + + if (showZ) { + ShowDepthBuffer(Width, Height, 0.0, 1.0); + } + + glutSwapBuffers(); +} + + +static void +Reshape(int width, int height) +{ + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glFrustum(-1.0, 1.0, -1.0, 1.0, 5.0, 30.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glTranslatef(0.0, 0.0, -15.0); + + Width = width; + Height = height; + + z = (float *) malloc(width * height * 4); + { + int i, j, k = 0; + for (i = 0; i < height; i++) { + float zval = (float) i / (height - 1); + for (j = 0; j < width; j++) { + z[k++] = zval; + } + } + } +} + + +static void +Key(unsigned char key, int x, int y) +{ + const GLfloat step = 1.0; + (void) x; + (void) y; + switch (key) { + case 'a': + Anim = !Anim; + if (Anim) + glutIdleFunc(Idle); + else + glutIdleFunc(NULL); + break; + case 'd': + showZ = !showZ; + break; + case 'z': + Zpos -= step; + break; + case 'Z': + Zpos += step; + break; + case 27: + glutDestroyWindow(Win); + exit(0); + break; + } + glutPostRedisplay(); +} + + +static void +SpecialKey(int key, int x, int y) +{ + const GLfloat step = 3.0; + (void) x; + (void) y; + switch (key) { + case GLUT_KEY_UP: + Xrot -= step; + break; + case GLUT_KEY_DOWN: + Xrot += step; + break; + case GLUT_KEY_LEFT: + Yrot -= step; + break; + case GLUT_KEY_RIGHT: + Yrot += step; + break; + } + glutPostRedisplay(); +} + + +static void +Init(void) +{ + /* setup lighting, etc */ + glEnable(GL_DEPTH_TEST); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); +} + + +int +main(int argc, char *argv[]) +{ + glutInit(&argc, argv); + glutInitWindowPosition(0, 0); + glutInitWindowSize(400, 400); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); + Win = glutCreateWindow(argv[0]); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutSpecialFunc(SpecialKey); + glutDisplayFunc(Draw); + if (Anim) + glutIdleFunc(Idle); + Init(); + glutMainLoop(); + return 0; +} -- cgit v1.2.3 From 03a3373bdf929b6b7c43600c9eebb0c4ee3ff38f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 11 Apr 2008 12:11:14 +0900 Subject: gallium: Thread condition variables. --- src/gallium/include/pipe/p_thread.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 4325abc951..af9150aafb 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -114,6 +114,26 @@ typedef pthread_mutex_t _glthread_Mutex; #define _glthread_UNLOCK_MUTEX(name) \ (void) pthread_mutex_unlock(&(name)) +typedef pthread_cond_t _glthread_Cond; + +#define _glthread_DECLARE_STATIC_COND(name) \ + static _glthread_Cond name = PTHREAD_COND_INITIALIZER + +#define _glthread_INIT_COND(cond) \ + pthread_cond_init(&(cond), NULL) + +#define _glthread_DESTROY_COND(name) \ + pthread_cond_destroy(&(name)) + +#define _glthread_COND_WAIT(cond, mutex) \ + pthread_cond_wait(&(cond), &(mutex)) + +#define _glthread_COND_SIGNAL(cond) \ + pthread_cond_signal(&(cond)) + +#define _glthread_COND_BROADCAST(cond) \ + pthread_cond_broadcast(&(cond)) + #endif /* PTHREADS */ @@ -273,6 +293,20 @@ typedef unsigned _glthread_Mutex; #define _glthread_UNLOCK_MUTEX(name) (void) name +typedef unsigned _glthread_Cond; + +#define _glthread_DECLARE_STATIC_COND(name) static _glthread_Cond name = 0 + +#define _glthread_INIT_COND(name) (void) name + +#define _glthread_DESTROY_COND(name) (void) name + +#define _glthread_COND_WAIT(name, mutex) (void) name + +#define _glthread_COND_SIGNAL(name) (void) name + +#define _glthread_COND_BROADCAST(name) (void) name + #endif /* THREADS */ -- cgit v1.2.3 From 2483062f10e93fbbc5e3f629627b9e8addcc3f84 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 7 Apr 2008 11:44:34 +0200 Subject: tgsi: Dump semantics before interpolator. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 7d292778ad..ad9770e474 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -746,17 +746,19 @@ dump_declaration_short( } } - if( decl->Declaration.Interpolate ) { + if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); + if (decl->Semantic.SemanticIndex != 0) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } } - if( decl->Declaration.Semantic ) { + if (decl->Declaration.Interpolate) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); - CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); - CHR( ']' ); + ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); } } -- cgit v1.2.3 From f41cc50ef063e9a56dc2d0fc6564a0c0f4344b83 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 7 Apr 2008 11:46:57 +0200 Subject: tgsi: Dump processor type and version as a single token. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index ad9770e474..1d6e8b155d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1402,7 +1402,6 @@ dump_gen( CHR( '\n' ); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); - CHR( ' ' ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); UID( parse.FullVersion.Version.MinorVersion ); -- cgit v1.2.3 From e1c35601529d396dcaa75523fe16114c25221a40 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 09:01:04 -0600 Subject: add missing glViewport calls cherry-picked from master --- progs/util/showbuffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/progs/util/showbuffer.c b/progs/util/showbuffer.c index 17f84dc62b..b0cedea217 100644 --- a/progs/util/showbuffer.c +++ b/progs/util/showbuffer.c @@ -71,6 +71,8 @@ ShowDepthBuffer( GLsizei winWidth, GLsizei winHeight, glPushMatrix(); glLoadIdentity(); + glViewport(0, 0, winWidth, winHeight); + glDisable(GL_STENCIL_TEST); glDisable(GL_DEPTH_TEST); glRasterPos2f(0, 0); @@ -120,6 +122,8 @@ ShowAlphaBuffer( GLsizei winWidth, GLsizei winHeight ) glPushMatrix(); glLoadIdentity(); + glViewport(0, 0, winWidth, winHeight); + glDisable(GL_STENCIL_TEST); glDisable(GL_DEPTH_TEST); glRasterPos2f(0, 0); @@ -170,6 +174,8 @@ ShowStencilBuffer( GLsizei winWidth, GLsizei winHeight, glPushMatrix(); glLoadIdentity(); + glViewport(0, 0, winWidth, winHeight); + glDisable(GL_STENCIL_TEST); glDisable(GL_DEPTH_TEST); glRasterPos2f(0, 0); -- cgit v1.2.3 From 6d0f9d5a01b1104716d9562e0775e6a144dc9f89 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 09:21:37 -0600 Subject: mesa: new Z-compositing test (glDrawPixels(GL_DEPTH) + stencil) --- progs/tests/Makefile | 1 + progs/tests/zcomp.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 progs/tests/zcomp.c diff --git a/progs/tests/Makefile b/progs/tests/Makefile index e04b95ce08..75d1eca08e 100644 --- a/progs/tests/Makefile +++ b/progs/tests/Makefile @@ -84,6 +84,7 @@ SOURCES = \ vpwarpmesh.c \ yuvrect.c \ yuvsquare.c \ + zcomp.c \ zdrawpix.c \ zreaddraw.c diff --git a/progs/tests/zcomp.c b/progs/tests/zcomp.c new file mode 100644 index 0000000000..b53079d07f --- /dev/null +++ b/progs/tests/zcomp.c @@ -0,0 +1,223 @@ +/** + * Test Z compositing with glDrawPixels(GL_DEPTH_COMPONENT) and stencil test. + */ + +#define GL_GLEXT_PROTOTYPES +#include +#include +#include +#include +#include "../util/showbuffer.c" + + +static int Win; +static GLfloat Xrot = 0, Yrot = 0, Zpos = 6; +static GLboolean Anim = GL_FALSE; + +static int Width = 400, Height = 200; +static GLfloat *Zimg; +static GLubyte *Cimg; +static GLboolean showZ = 0; + + +static void +Idle(void) +{ + Xrot += 3.0; + Yrot += 4.0; + glutPostRedisplay(); +} + + +/** + * Draw first object, save color+Z images + */ +static void +DrawFirst(void) +{ + static const GLfloat red[4] = { 1.0, 0.0, 0.0, 0.0 }; + + glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, red); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glPushMatrix(); + glTranslatef(-1, 0, 0); + glRotatef(45 + Xrot, 1, 0, 0); + + glutSolidTorus(0.75, 2.0, 10, 20); + + glPopMatrix(); + + glReadPixels(0, 0, Width, Height, GL_DEPTH_COMPONENT, GL_FLOAT, Zimg); + glReadPixels(0, 0, Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, Cimg); +} + + +/** + * Draw second object. + */ +static void +DrawSecond(void) +{ + static const GLfloat blue[4] = { 0.0, 0.0, 1.0, 0.0 }; + + glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, blue); + + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + + glPushMatrix(); + glTranslatef(+1, 0, 0); + glRotatef(-45 + Xrot, 1, 0, 0); + + glutSolidTorus(0.75, 2.0, 10, 20); + + glPopMatrix(); +} + + +/** + * Composite first/saved image over second rendering. + */ +static void +Composite(void) +{ + glWindowPos2i(0, 0); + + /* Draw Z values, set stencil where Z test passes */ + glEnable(GL_STENCIL_TEST); + glStencilFunc(GL_ALWAYS, 1, ~0); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + glColorMask(0,0,0,0); + glDrawPixels(Width, Height, GL_DEPTH_COMPONENT, GL_FLOAT, Zimg); + glColorMask(1,1,1,1); + + /* Draw color where stencil==1 */ + glStencilFunc(GL_EQUAL, 1, ~0); + glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); + glDisable(GL_DEPTH_TEST); + glDrawPixels(Width, Height, GL_RGBA, GL_UNSIGNED_BYTE, Cimg); + glEnable(GL_DEPTH_TEST); + + glDisable(GL_STENCIL_TEST); +} + + +static void +Draw(void) +{ + DrawFirst(); + DrawSecond(); + Composite(); + glutSwapBuffers(); +} + + +static void +Reshape(int width, int height) +{ + GLfloat ar = (float) width / height; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glFrustum(-ar, ar, -1.0, 1.0, 5.0, 30.0); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + glTranslatef(0.0, 0.0, -15.0); + + Width = width; + Height = height; + + if (Zimg) + free(Zimg); + if (Cimg) + free(Cimg); + Zimg = (float *) malloc(width * height * 4); + Cimg = (GLubyte *) malloc(width * height * 4); +} + + +static void +Key(unsigned char key, int x, int y) +{ + const GLfloat step = 1.0; + (void) x; + (void) y; + switch (key) { + case 'a': + Anim = !Anim; + if (Anim) + glutIdleFunc(Idle); + else + glutIdleFunc(NULL); + break; + case 'd': + showZ = !showZ; + break; + case 'z': + Zpos -= step; + break; + case 'Z': + Zpos += step; + break; + case 27: + glutDestroyWindow(Win); + exit(0); + break; + } + glutPostRedisplay(); +} + + +static void +SpecialKey(int key, int x, int y) +{ + const GLfloat step = 3.0; + (void) x; + (void) y; + switch (key) { + case GLUT_KEY_UP: + Xrot -= step; + break; + case GLUT_KEY_DOWN: + Xrot += step; + break; + case GLUT_KEY_LEFT: + Yrot -= step; + break; + case GLUT_KEY_RIGHT: + Yrot += step; + break; + } + glutPostRedisplay(); +} + + +static void +Init(void) +{ + /* setup lighting, etc */ + glEnable(GL_DEPTH_TEST); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); +} + + +int +main(int argc, char *argv[]) +{ + glutInit(&argc, argv); + glutInitWindowPosition(0, 0); + glutInitWindowSize(Width, Height); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_STENCIL); + Win = glutCreateWindow(argv[0]); + glutReshapeFunc(Reshape); + glutKeyboardFunc(Key); + glutSpecialFunc(SpecialKey); + glutDisplayFunc(Draw); + if (Anim) + glutIdleFunc(Idle); + Init(); + glutMainLoop(); + return 0; +} -- cgit v1.2.3 From d758479b9fbff803bdac15f3f39d32ef9064db71 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 10:14:17 -0600 Subject: mesa: Fix glBegin-time test for invalid programs/shaders. Cherry-picked from master. --- src/mesa/vbo/vbo_exec.h | 3 +++ src/mesa/vbo/vbo_exec_api.c | 31 +++++++++++++++++++++++-------- src/mesa/vbo/vbo_exec_array.c | 15 +++++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h index b7e8c9fe79..ddbcbe1181 100644 --- a/src/mesa/vbo/vbo_exec.h +++ b/src/mesa/vbo/vbo_exec.h @@ -162,4 +162,7 @@ void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec, void vbo_exec_do_EvalCoord1f( struct vbo_exec_context *exec, GLfloat u); +extern GLboolean +vbo_validate_shaders(GLcontext *ctx); + #endif diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index b7f4d8a307..98580170e3 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -477,6 +477,23 @@ static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j ) } +/** + * Check if programs/shaders are enabled and valid at glBegin time. + */ +GLboolean +vbo_validate_shaders(GLcontext *ctx) +{ + if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || + (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { + return GL_FALSE; + } + if (ctx->Shader.CurrentProgram && !ctx->Shader.CurrentProgram->LinkStatus) { + return GL_FALSE; + } + return GL_TRUE; +} + + /* Build a list of primitives on the fly. Keep * ctx->Driver.CurrentExecPrimitive uptodate as well. */ @@ -491,18 +508,16 @@ static void GLAPIENTRY vbo_exec_Begin( GLenum mode ) if (ctx->NewState) { _mesa_update_state( ctx ); - /* XXX also need to check if shader enabled, but invalid */ - if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || - (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBegin (invalid vertex/fragment program)"); - return; - } - CALL_Begin(ctx->Exec, (mode)); return; } + if (!vbo_validate_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBegin (invalid vertex/fragment program)"); + return; + } + /* Heuristic: attempt to isolate attributes occuring outside * begin/end pairs. */ diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 77f3cf1455..a52521db64 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -245,6 +245,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) if (ctx->NewState) _mesa_update_state( ctx ); + if (!vbo_validate_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawArrays(bad shader)"); + return; + } + bind_arrays( ctx ); prim[0].begin = 1; @@ -280,6 +285,11 @@ vbo_exec_DrawRangeElements(GLenum mode, if (ctx->NewState) _mesa_update_state( ctx ); + if (!vbo_validate_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawRangeElements(bad shader)"); + return; + } + bind_arrays( ctx ); ib.count = count; @@ -340,6 +350,11 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *ind if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices )) return; + if (!vbo_validate_shaders(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawElements(bad shader)"); + return; + } + if (ctx->Array.ElementArrayBufferObj->Name) { const GLvoid *map = ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, -- cgit v1.2.3 From 02250c855fbec5299a2d6118fefa0523ec73654c Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 11 Apr 2008 17:45:41 +0100 Subject: gallium: Flush render cache at the beginning of fallback_copy_texsubimage(). It may get stale bits otherwise. --- src/mesa/state_tracker/st_cb_texture.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 43b43ec14c..3db2790267 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -41,6 +41,7 @@ #include "state_tracker/st_cb_fbo.h" #include "state_tracker/st_cb_texture.h" #include "state_tracker/st_format.h" +#include "state_tracker/st_public.h" #include "state_tracker/st_texture.h" #include "state_tracker/st_gen_mipmap.h" @@ -1033,6 +1034,8 @@ fallback_copy_texsubimage(GLcontext *ctx, GLfloat *data; GLint row, yStep; + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + /* determine bottom-to-top vs. top-to-bottom order */ if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { destY = height - 1 - destY; -- cgit v1.2.3 From 4d184cc33131b440f9aafbcdd2d657050411db49 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 11 Apr 2008 13:20:52 -0600 Subject: gallium: fix broken x86_call() --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4d33950e99..aea8b28e58 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -317,7 +317,7 @@ void x86_call( struct x86_function *p, void (*label)()) void x86_call( struct x86_function *p, struct x86_reg reg) { emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } #endif -- cgit v1.2.3 From 16900515214912557dfd35e3b333e0e312b8bc61 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 11 Apr 2008 13:21:22 -0600 Subject: mesa: fix broken x86_call() --- src/mesa/x86/rtasm/x86sse.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 39c0e9b851..7a91364ed8 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -1,3 +1,4 @@ +#ifdef USE_X86_ASM #if defined(__i386__) || defined(__386__) #include "imports.h" @@ -290,7 +291,7 @@ void x86_call( struct x86_function *p, void (*label)()) void x86_call( struct x86_function *p, struct x86_reg reg) { emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } #endif @@ -1191,3 +1192,9 @@ void x86sse_dummy( void ) } #endif + +#else /* USE_X86_ASM */ + +int x86sse_c_dummy_var; /* silence warning */ + +#endif /* USE_X86_ASM */ -- cgit v1.2.3 From 9e8a85ef670dfb7d356d6066bad4710683a07fd9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 13:21:39 -0600 Subject: gallium: fix SCS codegen (sin scalar src comes from X, not Y) --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 4e80597b3f..35c63feccc 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2020,7 +2020,7 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_X ); emit_sin( func, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } -- cgit v1.2.3 From 097301395d33d57d19bc942f236b4a8c912cc0cc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 13:44:47 -0600 Subject: gallium: comments --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 35c63feccc..93813f36cb 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -548,6 +548,12 @@ emit_xorps( * Data fetch helpers. */ +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_const( struct x86_function *func, @@ -566,6 +572,12 @@ emit_const( SHUF( 0, 0, 0, 0 ) ); } +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_inputf( struct x86_function *func, @@ -579,6 +591,12 @@ emit_inputf( get_input( vec, chan ) ); } +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ static void emit_output( struct x86_function *func, @@ -592,6 +610,12 @@ emit_output( make_xmm( xmm ) ); } +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_tempf( struct x86_function *func, @@ -605,6 +629,13 @@ emit_tempf( get_temp( vec, chan ) ); } +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ static void emit_coef( struct x86_function *func, -- cgit v1.2.3 From e3cf0cd6a9f3f072594e5712763b98ce7e579bcf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 14:18:07 -0600 Subject: gallium: implement immediates (aka literals) for SSE fragment shaders --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 82 +++++++++++++++++++++++++++-- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 10 ++-- 3 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 93813f36cb..748c53dbfc 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -225,6 +225,15 @@ get_coef_base( void ) return get_output_base(); } +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + /** * Data access helpers. */ @@ -238,6 +247,16 @@ get_argument( (index + 1) * 4 ); } +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + static struct x86_reg get_const( unsigned vec, @@ -572,6 +591,25 @@ emit_const( SHUF( 0, 0, 0, 0 ) ); } +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + emit_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + /** * Copy a shader input to xmm register * \param xmm the destination xmm register @@ -1191,6 +1229,14 @@ emit_fetch( swizzle ); break; + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + case TGSI_FILE_INPUT: emit_inputf( func, @@ -2343,15 +2389,21 @@ tgsi_emit_sse2( * DECLARATION and INSTRUCTION phase. * GP register holding the output argument is aliased with the coeff argument, * as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func */ unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *func ) + struct x86_function *func, + float (*immediates)[4]) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; unsigned ok = 1; + uint num_immediates = 0; DUMP_START(); @@ -2362,6 +2414,7 @@ tgsi_emit_sse2_fs( func, get_input_base(), get_argument( 0 ) ); + /* skipping outputs argument here */ emit_mov( func, get_const_base(), @@ -2374,6 +2427,10 @@ tgsi_emit_sse2_fs( func, get_coef_base(), get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); tgsi_parse_init( &parse, tokens ); @@ -2407,9 +2464,26 @@ tgsi_emit_sse2_fs( break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 63b8ef3911..fde06047fe 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -16,7 +16,9 @@ tgsi_emit_sse2( unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *function ); + struct x86_function *function, + float (*immediates)[4] + ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8095d662ee..9c7948264f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -51,7 +51,8 @@ typedef void (XSTDCALL *codegen_function)( struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, - const struct tgsi_interp_coef *coef + const struct tgsi_interp_coef *coef, + float (*immediates)[4] //, const struct tgsi_exec_vector *quadPos ); @@ -60,6 +61,7 @@ struct sp_sse_fragment_shader { struct sp_fragment_shader base; struct x86_function sse2_program; codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -96,7 +98,8 @@ fs_sse_run( struct sp_fragment_shader *base, machine->Outputs, machine->Consts, machine->Temps, - machine->InterpCoefs + machine->InterpCoefs, + shader->immediates // , &machine->QuadPos ); @@ -129,7 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program )) { + if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 7c2416f06e518bc1491fe13e145dcc9487d75449 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 15:02:21 -0600 Subject: gallium: handle TGSI immediates in SSE code for vertex shaders --- src/gallium/auxiliary/draw/draw_vs_sse.c | 9 ++++++--- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 23 +++++++++++++++++++---- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +++- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index f40d65df08..13394129bc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -50,13 +50,15 @@ typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], - struct tgsi_exec_vector *temporary ); + struct tgsi_exec_vector *temporary, + float (*immediates)[4] ); struct draw_sse_vertex_shader { struct draw_vertex_shader base; struct x86_function sse2_program; codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -149,7 +151,8 @@ vs_sse_run( struct draw_vertex_shader *base, shader->func(machine->Inputs, machine->Outputs, machine->Consts, - machine->Temps ); + machine->Temps, + shader->immediates); } @@ -243,7 +246,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program )) + &vs->sse2_program, vs->immediates )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 748c53dbfc..e55fae6047 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2316,10 +2316,12 @@ emit_declaration( unsigned tgsi_emit_sse2( struct tgsi_token *tokens, - struct x86_function *func ) + struct x86_function *func, + float (*immediates)[4] ) { struct tgsi_parse_context parse; unsigned ok = 1; + uint num_immediates = 0; DUMP_START(); @@ -2341,6 +2343,10 @@ tgsi_emit_sse2( func, get_temp_base(), get_argument( 3 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 4 ) ); tgsi_parse_init( &parse, tokens ); @@ -2363,9 +2369,18 @@ tgsi_emit_sse2( break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + num_immediates++; + } break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index fde06047fe..d1190727d0 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -11,7 +11,9 @@ struct x86_function; unsigned tgsi_emit_sse2( struct tgsi_token *tokens, - struct x86_function *function ); + struct x86_function *function, + float (*immediates)[4] + ); unsigned tgsi_emit_sse2_fs( -- cgit v1.2.3 From 593cf5a6b55eb9b490a2aee2c3850d2d493fc4df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 15:27:00 -0600 Subject: gallium: merge the tgsi_emit_sse2() and tgsi_emit_sse2_fs() functions. The two functions were mostly the same. We can look at the shader header info to determine if it's a vertex or fragment shader. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 201 +++++++++++----------------- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 7 - src/gallium/drivers/softpipe/sp_fs_sse.c | 4 +- 3 files changed, 80 insertions(+), 132 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index e55fae6047..c37e201b2b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2313,104 +2313,25 @@ emit_declaration( } } -unsigned -tgsi_emit_sse2( - struct tgsi_token *tokens, - struct x86_function *func, - float (*immediates)[4] ) -{ - struct tgsi_parse_context parse; - unsigned ok = 1; - uint num_immediates = 0; - - DUMP_START(); - - func->csr = func->store; - - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 4 ) ); - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - uint i; - assert(size <= 4); - assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); - for( i = 0; i < size; i++ ) { - immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } - num_immediates++; - } - break; - - default: - assert( 0 ); - ok = 0; - break; - } - } - - tgsi_parse_free( &parse ); - - DUMP_END(); - - return ok; -} /** - * Fragment shaders are responsible for interpolating shader inputs. Because on - * x86 we have only 4 GP registers, and here we have 5 shader arguments (input, - * output, const, temp and coef), the code is split into two phases -- - * DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff argument, - * as outputs are not needed in the DECLARATION phase. + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. * * \param tokens the TGSI input shader * \param func the output SSE code/function * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed */ unsigned -tgsi_emit_sse2_fs( +tgsi_emit_sse2( struct tgsi_token *tokens, struct x86_function *func, float (*immediates)[4]) @@ -2424,50 +2345,84 @@ tgsi_emit_sse2_fs( func->csr = func->store; - /* DECLARATION phase, do not load output argument. */ - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - /* skipping outputs argument here */ - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_coef_base(), - get_argument( 4 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 5 ) ); - tgsi_parse_init( &parse, tokens ); + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + /* skipping outputs argument here */ + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_coef_base(), + get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 4 ) ); + } + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + } } + ok = emit_instruction( func, &parse.FullToken.FullInstruction ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index d1190727d0..d56bf7f98a 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -15,13 +15,6 @@ tgsi_emit_sse2( float (*immediates)[4] ); -unsigned -tgsi_emit_sse2_fs( - struct tgsi_token *tokens, - struct x86_function *function, - float (*immediates)[4] - ); - #if defined __cplusplus } #endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 9c7948264f..5ef02a7142 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -132,8 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, - shader->immediates)) { + if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 2ebc99fcbc0c8fc6f6ce50e2ee674312e214ea2f Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 12 Apr 2008 11:03:56 -0600 Subject: gallium: move duplicated compute_clipmask() code to draw_vs.h --- src/gallium/auxiliary/draw/draw_vs.h | 33 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 26 ------------------------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 27 ------------------------- src/gallium/auxiliary/draw/draw_vs_sse.c | 28 -------------------------- 4 files changed, 33 insertions(+), 81 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 4ee7e705e9..33ce1e335e 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -31,6 +31,10 @@ #ifndef DRAW_VS_H #define DRAW_VS_H +#include "draw_context.h" +#include "draw_private.h" + + struct draw_vertex_shader; struct draw_context; struct pipe_shader_state; @@ -47,4 +51,33 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); + +/* Should be part of the generated shader: + */ +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1< Date: Sat, 12 Apr 2008 22:59:17 +0200 Subject: tgsi: Fix source register short dump code. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 62 +++++++++++++++-------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 1d6e8b155d..cb3573ceb6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -968,18 +968,18 @@ dump_instruction_short( } CHR( ' ' ); - if( src->SrcRegisterExtMod.Complement ) { - TXT( "(1 - " ); - } - if( src->SrcRegisterExtMod.Negate ) { - CHR( '-' ); - } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegister.Negate ) { + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) CHR( '-' ); - } ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); @@ -987,35 +987,37 @@ dump_instruction_short( SID( src->SrcRegister.Index ); CHR( ']' ); - if (src->SrcRegister.Extended) { - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); - } - } - else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) { + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT ); } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); + } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegisterExtMod.Complement ) { + if (src->SrcRegisterExtMod.Negate) CHR( ')' ); - } first_reg = FALSE; } -- cgit v1.2.3 From cd5931240688cb8bd12834e3ba23f858f26dbf8c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 12:23:26 +0900 Subject: gallium: OS independent time-manipulation functions. --- src/gallium/auxiliary/util/Makefile | 3 +- src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_time.c | 152 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_time.h | 99 ++++++++++++++++++++++ 4 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/util/u_time.c create mode 100644 src/gallium/auxiliary/util/u_time.h diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 9b6c2708b6..fe82fa6378 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -14,7 +14,8 @@ C_SOURCES = \ u_hash_table.c \ u_mm.c \ u_simple_shaders.c \ - u_snprintf.c + u_snprintf.c \ + u_mm.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9b3929eb2d..d55d2c7081 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -15,6 +15,7 @@ util = env.ConvenienceLibrary( 'u_mm.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_time.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c new file mode 100644 index 0000000000..e6c0b19ff6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.c @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + + +#ifndef WIN32 +#include +#else +#include +#include +#endif + +#include "util/u_time.h" + + +#ifdef WIN32 +static LONGLONG frequency = 0; +#endif + + +void +util_time_get(struct util_time *t) +{ +#ifndef WIN32 + gettimeofday(&t->tv, NULL); +#else + EngQueryPerformanceCounter(&t->counter); +#endif +} + + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2) +{ +#ifndef WIN32 + t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; + t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; +#else + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL; +#endif +} + + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2) +{ +#ifndef WIN32 + return (t2->tv.tv_usec - t1->tv.tv_usec) + + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; +#else + return (t2->counter - t1->counter)*1000000LL/frequency; +#endif +} + + +/** + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. + */ +static INLINE int +util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ +#ifndef WIN32 + if (t1->tv.tv_sec < t2->tv.tv_sec) + return -1; + else if(t1->tv.tv_sec > t2->tv.tv_sec) + return 1; + else if (t1->tv.tv_usec < t2->tv.tv_usec) + return -1; + else if(t1->tv.tv_usec > t2->tv.tv_usec) + return 1; + else + return 0; +#else + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +#endif +} + + +int +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr) +{ + if(util_time_compare(start, end) <= 0) + return util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0; + else + return util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0; +} + + +#ifdef WIN32 +void util_time_usleep(unsigned usecs) +{ + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} +#endif diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h new file mode 100644 index 0000000000..32035cceb5 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + +#ifndef U_TIME_H_ +#define U_TIME_H_ + + +#ifndef WIN32 +#include /* timeval */ +#include /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Time abstraction. + * + * Do not access this structure directly. Use the provided function instead. + */ +struct util_time +{ +#ifndef WIN32 + struct timeval tv; +#else + long long counter; +#endif +}; + + +void +util_time_get(struct util_time *t); + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2); + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2); + +/** + * Returns zero when the timeout expires, non zero otherwise. + */ +int +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr); + +#ifndef WIN32 +#define util_time_sleep usleep +#else +int +util_time_sleep(unsigned usecs); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_TIME_H_ */ -- cgit v1.2.3 From 21c302b0ec39480a7eaab7827cce5b609d196606 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 14:10:46 +0900 Subject: gallium: Initial port of Thomas slab suballocator to pipebuffer. Not tested yet -- just compiles. This includes only the slab algorithm. Fencing is already implemented in pb_bufmgr_fence and time-based caching will be commited in a separate module shortly. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 9 + src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 429 ++++++++++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index a9fa518c67..0770cad45a 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ pb_winsys.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 3d41fd84a6..14c3c77e37 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -8,6 +8,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', 'pb_winsys.c', ]) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 0cf8e92e37..a2377f70e2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -114,6 +114,15 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, size_t size, size_t align2); +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + const struct pb_desc *desc, + size_t smallestSize, + size_t numSizes, + size_t desiredNumBuffers, + size_t maxSlabSize, + size_t pageAlignment); + /** * Fenced buffer manager. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c new file mode 100644 index 0000000000..676e8e29b9 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -0,0 +1,429 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, FREE of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * S-lab pool implementation. + * + * @author Thomas Hellstrom + * @author Jose Fonseca + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#define DRI_SLABPOOL_ALLOC_RETRIES 100 + + +struct pb_slab; + +struct pb_slab_buffer +{ + struct pb_buffer base; + + struct pb_slab *slab; + struct list_head head; + unsigned mapCount; + size_t start; + _glthread_Cond event; +}; + +struct pb_slab +{ + struct list_head head; + struct list_head freeBuffers; + size_t numBuffers; + size_t numFree; + struct pb_slab_buffer *buffers; + struct pb_slab_size_header *header; + + struct pb_buffer *bo; + size_t pageAlignment; + void *virtual; +}; + +struct pb_slab_size_header +{ + struct list_head slabs; + struct list_head freeSlabs; + struct pb_slab_manager *pool; + size_t bufSize; + _glthread_Mutex mutex; +}; + +/** + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ +struct pb_slab_manager +{ + struct pb_manager base; + + struct pb_desc desc; + size_t *bucketSizes; + size_t numBuckets; + size_t pageSize; + struct pb_manager *provider; + unsigned pageAlignment; + unsigned maxSlabSize; + unsigned desiredNumBuffers; + struct pb_slab_size_header *headers; +}; + + +static INLINE struct pb_slab_buffer * +pb_slab_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_slab_buffer *)buf; +} + + +static INLINE struct pb_slab_manager * +pb_slab_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_manager *)mgr; +} + + +/** + * Delete a buffer from the slab header delayed list and put + * it on the slab FREE list. + */ +static void +pb_slab_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + struct pb_slab *slab = buf->slab; + struct pb_slab_size_header *header = slab->header; + struct list_head *list = &buf->head; + + _glthread_LOCK_MUTEX(header->mutex); + + assert(buf->base.base.refcount == 0); + + buf->mapCount = 0; + + LIST_DEL(list); + LIST_ADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + LIST_ADDTAIL(&slab->head, &header->slabs); + + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + LIST_DEL(list); + LIST_ADDTAIL(list, &header->freeSlabs); + } + + if (header->slabs.next == &header->slabs || slab->numFree + != slab->numBuffers) { + + struct list_head *next; + + for (list = header->freeSlabs.next, next = list->next; list + != &header->freeSlabs; list = next, next = list->next) { + + slab = LIST_ENTRY(struct pb_slab, list, head); + + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); + } + } + + _glthread_UNLOCK_MUTEX(header->mutex); +} + + +static void * +pb_slab_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + ++buf->mapCount; + return (void *) ((uint8_t *) buf->slab->virtual + buf->start); +} + + +static void +pb_slab_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + --buf->mapCount; + if (buf->mapCount == 0) + _glthread_COND_BROADCAST(buf->event); +} + + +static void +pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_get_base_buffer(buf->slab->bo, base_buf, offset); + *offset += buf->start; +} + + +static const struct pb_vtbl +pb_slab_buffer_vtbl = { + pb_slab_buffer_destroy, + pb_slab_buffer_map, + pb_slab_buffer_unmap, + pb_slab_buffer_get_base_buffer +}; + + +static enum pipe_error +pb_slab_create(struct pb_slab_size_header *header) +{ + struct pb_slab_manager *pool = header->pool; + size_t size = header->bufSize * pool->desiredNumBuffers; + struct pb_slab *slab; + struct pb_slab_buffer *buf; + size_t numBuffers; + int ret; + unsigned i; + + slab = CALLOC_STRUCT(pb_slab); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* + * FIXME: We should perhaps allow some variation in slabsize in order + * to efficiently reuse slabs. + */ + + size = (size <= pool->maxSlabSize) ? size : pool->maxSlabSize; + size = (size + pool->pageSize - 1) & ~(pool->pageSize - 1); + + slab->bo = pool->provider->create_buffer(pool->provider, size, &pool->desc); + if(!slab->bo) + goto out_err0; + + slab->virtual = pb_map(slab->bo, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!slab->virtual) + goto out_err1; + + pb_unmap(slab->bo); + + numBuffers = slab->bo->base.size / header->bufSize; + + slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + LIST_INITHEAD(&slab->head); + LIST_INITHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->header = header; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + buf->base.base.refcount = 0; + buf->base.base.size = header->bufSize; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + buf->base.vtbl = &pb_slab_buffer_vtbl; + buf->slab = slab; + buf->start = i* header->bufSize; + buf->mapCount = 0; + _glthread_INIT_COND(buf->event); + LIST_ADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + LIST_ADDTAIL(&slab->head, &header->slabs); + + return PIPE_OK; + +out_err1: + pb_reference(&slab->bo, NULL); +out_err0: + FREE(slab); + return ret; +} + + +static struct pb_buffer * +pb_slab_manager_create_buffer(struct pb_manager *_pool, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_manager *pool = pb_slab_manager(_pool); + struct pb_slab_size_header *header; + unsigned i; + static struct pb_slab_buffer *buf; + struct pb_slab *slab; + struct list_head *list; + int count = DRI_SLABPOOL_ALLOC_RETRIES; + + /* + * FIXME: Check for compatibility. + */ + + header = pool->headers; + for (i=0; inumBuckets; ++i) { + if (header->bufSize >= size) + break; + header++; + } + + if (i >= pool->numBuckets) + /* Fall back to allocate a buffer object directly from the provider. */ + return pool->provider->create_buffer(pool->provider, size, desc); + + + _glthread_LOCK_MUTEX(header->mutex); + while (header->slabs.next == &header->slabs && count > 0) { + if (header->slabs.next != &header->slabs) + break; + + _glthread_UNLOCK_MUTEX(header->mutex); + if (count != DRI_SLABPOOL_ALLOC_RETRIES) + util_time_sleep(1); + _glthread_LOCK_MUTEX(header->mutex); + (void) pb_slab_create(header); + count--; + } + + list = header->slabs.next; + if (list == &header->slabs) { + _glthread_UNLOCK_MUTEX(header->mutex); + return NULL; + } + slab = LIST_ENTRY(struct pb_slab, list, head); + if (--slab->numFree == 0) + LIST_DELINIT(list); + + list = slab->freeBuffers.next; + LIST_DELINIT(list); + + _glthread_UNLOCK_MUTEX(header->mutex); + buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + ++buf->base.base.refcount; + return &buf->base; +} + + +static void +pb_slab_manager_destroy(struct pb_manager *_pool) +{ + struct pb_slab_manager *pool = pb_slab_manager(_pool); + + FREE(pool->headers); + FREE(pool->bucketSizes); + FREE(pool); +} + + +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + const struct pb_desc *desc, + size_t smallestSize, + size_t numSizes, + size_t desiredNumBuffers, + size_t maxSlabSize, + size_t pageAlignment) +{ + struct pb_slab_manager *pool; + size_t i; + + pool = CALLOC_STRUCT(pb_slab_manager); + if (!pool) + goto out_err0; + + pool->bucketSizes = CALLOC(numSizes, sizeof(*pool->bucketSizes)); + if (!pool->bucketSizes) + goto out_err1; + + pool->headers = CALLOC(numSizes, sizeof(*pool->headers)); + if (!pool->headers) + goto out_err2; + + pool->desc = *desc; + pool->numBuckets = numSizes; +#ifdef WIN32 + pool->pageSize = 4096; +#else + pool->pageSize = getpagesize(); +#endif + pool->provider = provider; + pool->pageAlignment = pageAlignment; + pool->maxSlabSize = maxSlabSize; + pool->desiredNumBuffers = desiredNumBuffers; + + for (i=0; inumBuckets; ++i) { + struct pb_slab_size_header *header = &pool->headers[i]; + + pool->bucketSizes[i] = (smallestSize << i); + + _glthread_INIT_MUTEX(header->mutex); + + LIST_INITHEAD(&header->slabs); + LIST_INITHEAD(&header->freeSlabs); + + header->pool = pool; + header->bufSize = (smallestSize << i); + } + + pool->base.destroy = pb_slab_manager_destroy; + pool->base.create_buffer = pb_slab_manager_create_buffer; + + return &pool->base; + +out_err2: + FREE(pool->bucketSizes); +out_err1: + FREE(pool); +out_err0: + return NULL; +} -- cgit v1.2.3 From fb2b5f7a4ac411a5bb5cde12ba15265b30c032e8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 15:10:01 +0900 Subject: gallium: Buffer cache. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 20 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 299 +++++++++++++++++++++ 4 files changed, 318 insertions(+), 3 deletions(-) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 0770cad45a..d654dbcc91 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -6,6 +6,7 @@ LIBNAME = pipebuffer C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ + pb_bufmgr_cache.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 14c3c77e37..604a217982 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -5,6 +5,7 @@ pipebuffer = env.ConvenienceLibrary( source = [ 'pb_buffer_fenced.c', 'pb_buffer_malloc.c', + 'pb_bufmgr_cache.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index a2377f70e2..b2d2520b67 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -80,7 +80,7 @@ struct pb_manager /** - * Static buffer pool manager. + * Static buffer pool sub-allocator. * * Manages the allocation of equally sized buffers. It does so by allocating * a single big buffer and divide it equally sized buffers. @@ -94,7 +94,7 @@ pool_bufmgr_create(struct pb_manager *provider, /** - * Wraper around the old memory manager. + * Static sub-allocator based the old memory manager. * * It managers buffers of different sizes. It does so by allocating a buffer * with the size of the heap, and then using the old mm memory manager to manage @@ -114,6 +114,9 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, size_t size, size_t align2); +/** + * Slab sub-allocator. + */ struct pb_manager * pb_slab_manager_create(struct pb_manager *provider, const struct pb_desc *desc, @@ -122,7 +125,18 @@ pb_slab_manager_create(struct pb_manager *provider, size_t desiredNumBuffers, size_t maxSlabSize, size_t pageAlignment); - + + +/** + * Time-based buffer cache. + * + * This manager keeps a cache of destroyed buffers during a time interval. + */ +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs); + + /** * Fenced buffer manager. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c new file mode 100644 index 0000000000..06de0bb6c3 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer cache. + * + * \author José Fonseca + * \author Thomas Hellström + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_cache_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_cache_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_cache_manager *mgr; + + /** Caching time interval */ + struct util_time start, end; + + struct list_head head; +}; + + +struct pb_cache_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + unsigned usecs; + + _glthread_Mutex mutex; + + struct list_head delayed; + size_t numDelayed; +}; + + +static INLINE struct pb_cache_buffer * +pb_cache_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_cache_buffer *)buf; +} + + +static INLINE struct pb_cache_manager * +pb_cache_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_cache_manager *)mgr; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + assert(!buf->base.base.refcount); + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +/** + * Free as many cache buffers from the list head as possible. + */ +static void +_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + struct util_time now; + + util_time_get(&now); + + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(util_time_timeout(&buf->start, &buf->end, &now) != 0) + break; + + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } +} + + +static void +pb_cache_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + struct pb_cache_manager *mgr = buf->mgr; + + _glthread_LOCK_MUTEX(mgr->mutex); + assert(buf->base.base.refcount == 0); + + _pb_cache_buffer_list_check_free(mgr); + + util_time_get(&buf->start); + util_time_add(&buf->start, mgr->usecs, &buf->end); + LIST_ADDTAIL(&buf->head, &mgr->delayed); + ++mgr->numDelayed; + _glthread_UNLOCK_MUTEX(mgr->mutex); +} + + +static void * +pb_cache_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_map(buf->buffer, flags); +} + + +static void +pb_cache_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static void +pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_cache_buffer_vtbl = { + pb_cache_buffer_destroy, + pb_cache_buffer_map, + pb_cache_buffer_unmap, + pb_cache_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_cache_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct pb_cache_buffer *buf; + struct list_head *curr, *next; + struct util_time now; + + util_time_get(&now); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(buf->base.base.size == size && + buf->base.base.alignment >= desc->alignment && + (buf->base.base.alignment % desc->alignment) == 0 && + /* buf->base.base.usage == usage */ 1) { + ++buf->base.base.refcount; + return &buf->base; + } + + if(util_time_timeout(&buf->start, &buf->end, &now) != 0) + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } + + buf = CALLOC_STRUCT(pb_cache_buffer); + if(!buf) + return NULL; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + buf->base.base.refcount = 1; + buf->base.base.alignment = buf->buffer->base.alignment; + buf->base.base.usage = buf->buffer->base.usage; + buf->base.base.size = buf->buffer->base.size; + + buf->base.vtbl = &pb_cache_buffer_vtbl; + buf->mgr = mgr; + + return &buf->base; +} + + +static void +pb_cache_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + + _glthread_LOCK_MUTEX(mgr->mutex); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + _pb_cache_buffer_destroy(buf); + curr = next; + next = curr->next; + } + _glthread_UNLOCK_MUTEX(mgr->mutex); + + FREE(mgr); +} + + +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs) +{ + struct pb_cache_manager *mgr; + + mgr = (struct pb_cache_manager *)CALLOC(1, sizeof(*mgr)); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_cache_manager_destroy; + mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->provider = provider; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->numDelayed = 0; + _glthread_INIT_MUTEX(mgr->mutex); + + return &mgr->base; +} -- cgit v1.2.3 From 84994693f53668d5ca72d57765a0729fe343593b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 18:52:54 +0900 Subject: gallium: Add u_time.c --- src/gallium/auxiliary/util/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index fe82fa6378..05bc43131a 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -15,6 +15,7 @@ C_SOURCES = \ u_mm.c \ u_simple_shaders.c \ u_snprintf.c \ + u_time.c \ u_mm.c include ../../Makefile.template -- cgit v1.2.3 From 4f550ab821f9aef9f19d9f1e10785f8c1f511ad4 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 7 Apr 2008 20:38:39 -0400 Subject: introduce a define to maxout the processed vertices --- src/gallium/auxiliary/draw/draw_vertex_shader.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 133418baca..d5f37bca21 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -37,7 +37,7 @@ #include "draw_context.h" #include "draw_vs.h" - +#define MAX_SHADER_VERTICES 4 /** * Run the vertex shader on all vertices in the vertex queue. @@ -58,23 +58,23 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) // fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += 4) { - struct vertex_header *dests[4]; - unsigned elts[4]; - int j, n = MIN2(4, draw->vs.queue_nr - i); + for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { + struct vertex_header *dests[MAX_SHADER_VERTICES]; + unsigned elts[MAX_SHADER_VERTICES]; + int j, n = MIN2(MAX_SHADER_VERTICES, - i); for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; dests[j] = draw->vs.queue[i + j].vertex; } - for ( ; j < 4; j++) { + for ( ; j < MAX_SHADER_VERTICES; j++) { elts[j] = elts[0]; dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); - assert(n <= 4); + assert(n <= MAX_SHADER_VERTICES); shader->run(shader, draw, elts, n, dests); } -- cgit v1.2.3 From aadbb1d7fbbaada6e378cb60194e5861cadf98d1 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 12 Apr 2008 15:45:28 -0400 Subject: pass arbitrary number of vertices to the shader execution cycle --- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_vertex_shader.c | 4 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 144 ++++++++++++------------ src/gallium/auxiliary/draw/draw_vs_sse.c | 139 +++++++++++------------ 4 files changed, 146 insertions(+), 143 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4d056f6dba..f9aea9f355 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -56,6 +56,8 @@ struct gallivm_cpu_engine; struct draw_pt_middle_end; struct draw_pt_front_end; +#define MAX_SHADER_VERTICES 128 + /** * Basic vertex info. * Carry some useful information around with the vertices in the prim pipe. diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index d5f37bca21..726921d77b 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -37,8 +37,6 @@ #include "draw_context.h" #include "draw_vs.h" -#define MAX_SHADER_VERTICES 4 - /** * Run the vertex shader on all vertices in the vertex queue. * Called by the draw module when the vertx cache needs to be flushed. @@ -61,7 +59,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { struct vertex_header *dests[MAX_SHADER_VERTICES]; unsigned elts[MAX_SHADER_VERTICES]; - int j, n = MIN2(MAX_SHADER_VERTICES, - i); + int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 9629410abb..df0051d693 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -40,6 +40,7 @@ #include "tgsi/util/tgsi_parse.h" +#define MAX_TGSI_VERTICES 4 static void vs_exec_prepare( struct draw_vertex_shader *shader, @@ -71,14 +72,13 @@ vs_exec_run( struct draw_vertex_shader *shader, struct vertex_header *vOut[] ) { struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; + unsigned int i, j; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); @@ -92,80 +92,82 @@ vs_exec_run( struct draw_vertex_shader *shader, machine->Outputs = ALIGN16_ASSIGN(outputs); } - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices ); - if (!draw->rasterizer->bypass_vs) { - /* run interpreter */ - tgsi_exec_machine_run( machine ); - } - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_vs) { + /* run interpreter */ + tgsi_exec_machine_run( machine ); } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } + /* store machine results */ + for (j = 0; j < max_vertices; j++) { + unsigned slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + if (!draw->rasterizer->bypass_clipping) { + vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, + draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + } + else { + vOut[i + j]->clipmask = 0; + } + vOut[i + j]->edgeflag = 1; + + if (!draw->identity_viewport) { + /* Viewport mapping */ + vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; + vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; + vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; + vOut[i + j]->data[0][3] = w; + } + else { + vOut[i + j]->data[0][0] = x; + vOut[i + j]->data[0][1] = y; + vOut[i + j]->data[0][2] = z; + vOut[i + j]->data[0][3] = w; + } + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } #if 0 /*DEBUG*/ - printf("Post xform vert:\n"); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("%d: %f %f %f %f\n", slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3]); - } -#endif - - - } /* loop over vertices */ + printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < draw->num_vs_outputs; slot++) { + printf("\t%d: %f %f %f %f\n", slot, + vOut[i + j]->data[slot][0], + vOut[i + j]->data[slot][1], + vOut[i + j]->data[slot][2], + vOut[i + j]->data[slot][3]); + } +#endif + } /* loop over vertices */ + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0ee991d764..bfec89254e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -45,6 +45,7 @@ #include "tgsi/exec/tgsi_sse2.h" #include "tgsi/util/tgsi_parse.h" +#define SSE_MAX_VERTICES 4 typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, @@ -86,14 +87,13 @@ vs_sse_run( struct draw_vertex_shader *base, { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; + unsigned int i, j; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); @@ -108,77 +108,78 @@ vs_sse_run( struct draw_vertex_shader *base, machine->Outputs = ALIGN16_ASSIGN(outputs); } - - /* Fetch vertices. This may at some point be integrated into the - * compiled shader -- that would require a reorganization where - * multiple versions of the compiled shader might exist, - * specialized for each fetch state. - */ - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - - - if (!draw->rasterizer->bypass_vs) { - /* run compiled shader - */ - shader->func(machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps, - shader->immediates); - } - - - /* XXX: Computing the clipmask and emitting results should be done - * in the vertex program as a set of instructions appended to - * the user-provided code. - */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; + for (i = 0; i < count; i += SSE_MAX_VERTICES) { + unsigned int max_vertices = MIN2(SSE_MAX_VERTICES, count - i); + /* Fetch vertices. This may at some point be integrated into the + * compiled shader -- that would require a reorganization where + * multiple versions of the compiled shader might exist, + * specialized for each fetch state. + */ + draw->vertex_fetch.fetch_func(draw, machine, &elts[i], max_vertices); + + if (!draw->rasterizer->bypass_vs) { + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + shader->immediates); } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. + /* XXX: Computing the clipmask and emitting results should be done + * in the vertex program as a set of instructions appended to + * the user-provided code. */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + for (j = 0; j < max_vertices; j++) { + unsigned slot; + float x, y, z, w; + + x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + if (!draw->rasterizer->bypass_clipping) { + vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, + draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + } + else { + vOut[i + j]->clipmask = 0; + } + vOut[j]->edgeflag = 1; + + if (!draw->identity_viewport) { + /* Viewport mapping */ + vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; + vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; + vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; + vOut[i + j]->data[0][3] = w; + } + else { + vOut[i + j]->data[0][0] = x; + vOut[i + j]->data[0][1] = y; + vOut[i + j]->data[0][2] = z; + vOut[i + j]->data[0][3] = w; + } + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } } - } + } } -- cgit v1.2.3 From 808f968f3ad0cb32e86f517753d5715d00e9ec2c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 11 Apr 2008 19:58:22 -0400 Subject: return true if one of the vertices has been clipped --- src/gallium/auxiliary/draw/draw_private.h | 12 ++++++------ src/gallium/auxiliary/draw/draw_vs_exec.c | 5 ++++- src/gallium/auxiliary/draw/draw_vs_llvm.c | 18 +++++++++++------- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 ++++- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index f9aea9f355..5c710667fc 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -148,12 +148,12 @@ struct draw_vertex_shader { /* Run the shader - this interface will get cleaned up in the * future: */ - void (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ); - + boolean (*run)( struct draw_vertex_shader *shader, + struct draw_context *draw, + const unsigned *elts, + unsigned count, + struct vertex_header *vOut[] ); + void (*delete)( struct draw_vertex_shader * ); }; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index df0051d693..09e0d0eaab 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -64,7 +64,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, @@ -73,6 +73,7 @@ vs_exec_run( struct draw_vertex_shader *shader, { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -120,6 +121,7 @@ vs_exec_run( struct draw_vertex_shader *shader, if (!draw->rasterizer->bypass_clipping) { vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, draw->nr_planes); + clipped += vOut[i + j]->clipmask; /* divide by w */ w = 1.0f / w; @@ -168,6 +170,7 @@ vs_exec_run( struct draw_vertex_shader *shader, #endif } /* loop over vertices */ } + return clipped != 0; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 7e48428cdd..6db1e65a2d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -67,18 +67,19 @@ vs_llvm_prepare( struct draw_vertex_shader *base, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_llvm_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, + struct draw_context *draw, + const unsigned *elts, unsigned count, struct vertex_header *vOut[] ) { - struct draw_llvm_vertex_shader *shader = + struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -125,19 +126,21 @@ vs_llvm_run( struct draw_vertex_shader *base, w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, + draw->nr_planes); + clipped += vOut[j]->clipmask; /* divide by w */ w = 1.0f / w; x *= w; y *= w; - z *= w; + z *= w; } else { vOut[j]->clipmask = 0; } vOut[j]->edgeflag = 1; - + if (!draw->identity_viewport) { /* Viewport mapping */ vOut[j]->data[0][0] = x * scale[0] + trans[0]; @@ -162,6 +165,7 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } } /* loop over vertices */ + return clipped != 0; } static void diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index bfec89254e..6e8d2021f5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -78,7 +78,7 @@ vs_sse_prepare( struct draw_vertex_shader *base, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, @@ -88,6 +88,7 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -143,6 +144,7 @@ vs_sse_run( struct draw_vertex_shader *base, if (!draw->rasterizer->bypass_clipping) { vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, draw->nr_planes); + clipped += vOut[i + j]->clipmask; /* divide by w */ w = 1.0f / w; @@ -180,6 +182,7 @@ vs_sse_run( struct draw_vertex_shader *base, } } } + return clipped != 0; } -- cgit v1.2.3 From 3f7a3dd58c0ce2719af83ff1d89a26185d08c04c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 12 Apr 2008 21:52:46 -0400 Subject: Make shaders operate on a block of memory instead of arrays of vertex_header's --- src/gallium/auxiliary/draw/draw_context.c | 10 ++--- src/gallium/auxiliary/draw/draw_private.h | 15 ++++--- src/gallium/auxiliary/draw/draw_vertex_cache.c | 33 +++++++++------ src/gallium/auxiliary/draw/draw_vertex_shader.c | 7 ++-- src/gallium/auxiliary/draw/draw_vs_exec.c | 54 ++++++++++++------------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 56 +++++++++++++++---------- 7 files changed, 100 insertions(+), 77 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b3c65c90d6..cdca556fbd 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -86,14 +86,12 @@ struct draw_context *draw_create( void ) /* Statically allocate maximum sized vertices for the cache - could be cleverer... */ { - uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); + char *tmp = align_malloc(VS_QUEUE_LENGTH * size, 16); if (!tmp) goto fail; - for (i = 0; i < Elements(draw->vs.queue); i++) - draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); + draw->vs.vertex_cache = tmp; } draw->shader_queue_flush = draw_vertex_shader_queue_flush; @@ -156,8 +154,8 @@ void draw_destroy( struct draw_context *draw ) tgsi_exec_machine_free_data(&draw->machine); - if (draw->vs.queue[0].vertex) - align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + if (draw->vs.vertex_cache) + align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */ /* Not so fast -- we're just borrowing this at the moment. * diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 5c710667fc..1115166192 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -152,7 +152,7 @@ struct draw_vertex_shader { struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ); + void *out ); void (*delete)( struct draw_vertex_shader * ); @@ -321,10 +321,8 @@ struct draw_context /* Vertex shader queue: */ struct { - struct { - unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *vertex; - } queue[VS_QUEUE_LENGTH]; + unsigned elts[VS_QUEUE_LENGTH]; /**< index into the user's vertex arrays */ + char *vertex_cache; unsigned queue_nr; unsigned post_nr; } vs; @@ -450,4 +448,11 @@ dot4(const float *a, const float *b) return result; } +static INLINE struct vertex_header * +draw_header_from_block(char *block, int num) +{ + static const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + return (struct vertex_header*)(block + num * size); +} + #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index c0248979e2..9256dec4ea 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -71,25 +71,27 @@ static struct vertex_header *get_vertex( struct draw_context *draw, /* Cache hit? */ if (draw->vcache.idx[slot].in == i) { -// _mesa_printf("HIT %d %d\n", slot, i); + /*debug_printf("HIT %d %d\n", slot, i);*/ assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + return draw_header_from_block(draw->vs.vertex_cache, + draw->vcache.idx[slot].out); } /* Otherwise a collision */ slot = VCACHE_SIZE + draw->vcache.overflow++; -// _mesa_printf("XXX %d --> %d\n", i, slot); + /*debug_printf("XXX %d --> %d\n", i, slot);*/ } /* Deal with the cache miss: */ { unsigned out; - + struct vertex_header *header; + assert(slot < Elements(draw->vcache.idx)); -// _mesa_printf("NEW %d %d\n", slot, i); + /*debug_printf("NEW %d %d\n", slot, i);*/ draw->vcache.idx[slot].in = i; draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; draw->vcache.referenced |= (1 << slot); @@ -99,17 +101,19 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - draw->vs.queue[out].elt = i; - draw->vs.queue[out].vertex->clipmask = 0; - draw->vs.queue[out].vertex->edgeflag = draw_get_edgeflag(draw, i); - draw->vs.queue[out].vertex->pad = 0; - draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; + header = draw_header_from_block(draw->vs.vertex_cache, out); + draw->vs.elts[out] = i; + header->clipmask = 0; + header->edgeflag = draw_get_edgeflag(draw, i); + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; /* Need to set the vertex's edge flag here. If we're being called * by do_ef_triangle(), that function needs edge flag info! */ - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + return draw_header_from_block(draw->vs.vertex_cache, + draw->vcache.idx[slot].out); } } @@ -142,8 +146,11 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) { unsigned i; - for (i = 0; i < draw->vs.post_nr; i++) - draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; + for (i = 0; i < draw->vs.post_nr; i++) { + struct vertex_header * header = + draw_header_from_block(draw->vs.vertex_cache, i); + header->vertex_id = UNDEFINED_VERTEX_ID; + } } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 726921d77b..452d0175c3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -57,18 +57,17 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { - struct vertex_header *dests[MAX_SHADER_VERTICES]; unsigned elts[MAX_SHADER_VERTICES]; int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); + struct vertex_header *dests = + draw_header_from_block(draw->vs.vertex_cache, i); for (j = 0; j < n; j++) { - elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].vertex; + elts[j] = draw->vs.elts[i + j]; } for ( ; j < MAX_SHADER_VERTICES; j++) { elts[j] = elts[0]; - dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 09e0d0eaab..6fe4e554d5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -69,7 +69,7 @@ vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; @@ -106,6 +106,8 @@ vs_exec_run( struct draw_vertex_shader *shader, for (j = 0; j < max_vertices; j++) { unsigned slot; float x, y, z, w; + struct vertex_header *out = + draw_header_from_block(vOut, i + j); /* Handle attr[0] (position) specially: * @@ -113,15 +115,15 @@ vs_exec_run( struct draw_vertex_shader *shader, * program as a set of DP4 instructions appended to the * user-provided code. */ - x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[i + j]->clipmask; + out->clipmask = compute_clipmask(out->clip, draw->plane, + draw->nr_planes); + clipped += out->clipmask; /* divide by w */ w = 1.0f / w; @@ -130,42 +132,42 @@ vs_exec_run( struct draw_vertex_shader *shader, z *= w; } else { - vOut[i + j]->clipmask = 0; + out->clipmask = 0; } - vOut[i + j]->edgeflag = 1; + out->edgeflag = 1; if (!draw->identity_viewport) { /* Viewport mapping */ - vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; - vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; - vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x * scale[0] + trans[0]; + out->data[0][1] = y * scale[1] + trans[1]; + out->data[0][2] = z * scale[2] + trans[2]; + out->data[0][3] = w; } else { - vOut[i + j]->data[0][0] = x; - vOut[i + j]->data[0][1] = y; - vOut[i + j]->data[0][2] = z; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x; + out->data[0][1] = y; + out->data[0][2] = z; + out->data[0][3] = w; } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } #if 0 /*DEBUG*/ printf("%d) Post xform vert:\n", i + j); for (slot = 0; slot < draw->num_vs_outputs; slot++) { printf("\t%d: %f %f %f %f\n", slot, - vOut[i + j]->data[slot][0], - vOut[i + j]->data[slot][1], - vOut[i + j]->data[slot][2], - vOut[i + j]->data[slot][3]); + out->data[slot][0], + out->data[slot][1], + out->data[slot][2], + out->data[slot][3]); } #endif } /* loop over vertices */ diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6db1e65a2d..72317c67ae 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -72,7 +72,7 @@ vs_llvm_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 6e8d2021f5..c877f5ee3a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,7 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; @@ -135,16 +135,18 @@ vs_sse_run( struct draw_vertex_shader *base, for (j = 0; j < max_vertices; j++) { unsigned slot; float x, y, z, w; + struct vertex_header *out = + draw_header_from_block(vOut, i + j); - x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[i + j]->clipmask; + out->clipmask = compute_clipmask(out->clip, draw->plane, + draw->nr_planes); + clipped += out->clipmask; /* divide by w */ w = 1.0f / w; @@ -153,33 +155,43 @@ vs_sse_run( struct draw_vertex_shader *base, z *= w; } else { - vOut[i + j]->clipmask = 0; + out->clipmask = 0; } - vOut[j]->edgeflag = 1; + out->edgeflag = 1; if (!draw->identity_viewport) { /* Viewport mapping */ - vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; - vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; - vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x * scale[0] + trans[0]; + out->data[0][1] = y * scale[1] + trans[1]; + out->data[0][2] = z * scale[2] + trans[2]; + out->data[0][3] = w; } else { - vOut[i + j]->data[0][0] = x; - vOut[i + j]->data[0][1] = y; - vOut[i + j]->data[0][2] = z; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x; + out->data[0][1] = y; + out->data[0][2] = z; + out->data[0][3] = w; } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } +#if 0 /*DEBUG*/ + printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < draw->num_vs_outputs; slot++) { + printf("\t%d: %f %f %f %f\n", slot, + out->data[slot][0], + out->data[slot][1], + out->data[slot][2], + out->data[slot][3]); + } +#endif } } return clipped != 0; -- cgit v1.2.3 From 0c1cb54923f3ab31caa2821e095685277174dd2f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 13 Apr 2008 01:47:07 -0400 Subject: Implement fetch/shade/pipeline or emit vertex passthrough. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_pt.c | 54 +++-- src/gallium/auxiliary/draw/draw_pt.h | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 253 +++++++++++++++++++++ 4 files changed, 287 insertions(+), 23 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 28262a92c6..5ab3cfe5ce 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -19,6 +19,7 @@ C_SOURCES = \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_pipeline.c \ + draw_pt_fetch_shade_pipeline.c \ draw_pt_pipeline.c \ draw_pt_elts.c \ draw_prim.c \ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f59fb86f78..c8663c0e84 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -38,19 +38,23 @@ /* XXX: Shouldn't those two functions below use the '>' operator??? */ - -static boolean too_many_verts( struct draw_context *draw, - unsigned verts ) +static boolean too_many_elts( struct draw_context *draw, + unsigned elts ) { - return verts < 1024; + return elts > (8 * 1024); } -static boolean too_many_elts( struct draw_context *draw, - unsigned elts ) +static INLINE unsigned reduced_prim(unsigned prim) { - return elts < (16 * 1024); + /*FIXME*/ + return prim; } +static INLINE boolean good_prim(unsigned prim) +{ + /*FIXME*/ + return FALSE; +} boolean draw_pt_arrays(struct draw_context *draw, @@ -64,6 +68,9 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; + if (!draw->render) + return FALSE; + /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/ /* Overall we do: * - frontend -- prepare fetch_elts, draw_elts - eg vcache @@ -87,7 +94,6 @@ draw_pt_arrays(struct draw_context *draw, */ middle = draw->pt.middle.fetch_pipeline; } -#if 0 else if (!cliptest && !pipeline) { /* Fetch user verts, run vertex shader, emit hw verts: */ @@ -111,23 +117,15 @@ draw_pt_arrays(struct draw_context *draw, */ middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; } - else if (!cliptest) { - /* Fetch user verts, run vertex shader, run pipeline: - */ - middle = draw->pt.middle.fetch_shade_pipeline; - } else { /* This is what we're currently always doing: */ - /* Fetch user verts, run vertex shader, cliptest, run pipeline: + /* Fetch user verts, run vertex shader, cliptest, run pipeline + * or + * Fetch user verts, run vertex shader, run pipeline */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline; - } -#else - else { - return FALSE; + middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; } -#endif /* If !pipeline, need to make sure we respect the driver's limited @@ -143,7 +141,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.vcache; hw_prim = reduced_prim(prim); } - +#if 0 if (too_many_verts(nr_verts)) { /* if (is_verts(draw) && can_split(prim)) { draw = draw_arrays_split; @@ -153,6 +151,7 @@ draw_pt_arrays(struct draw_context *draw, hw_prim = reduced_prim(prim); } } +#endif if (too_many_elts(count)) { @@ -166,7 +165,7 @@ draw_pt_arrays(struct draw_context *draw, } if (!good_prim(hw_prim)) { - draw = draw->pt.front.vcache; + frontend = draw->pt.front.vcache; } } #else @@ -200,6 +199,11 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_pipeline) return FALSE; + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = + draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) + return FALSE; + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; @@ -220,6 +224,12 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_pipeline = NULL; } + if (draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) { + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit->destroy( + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit ); + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = NULL; + } + if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 590823fd33..48413b648a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -123,7 +123,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); - +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c new file mode 100644 index 0000000000..e7550fe328 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -0,0 +1,253 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct fetch_pipeline_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct { + const ubyte *ptr; + unsigned pitch; + void (*fetch)( const void *from, float *attrib); + void (*emit)( const float *attrib, float **out ); + } fetch[PIPE_MAX_ATTRIBS]; + + unsigned nr_fetch; + unsigned pipeline_vertex_size; + unsigned hw_vertex_size; + unsigned prim; +}; + +static void emit_R32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out) += 1; +} + +static void emit_R32G32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out) += 2; +} + +static void emit_R32G32B32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out) += 3; +} + +static void emit_R32G32B32A32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out)[3] = attrib[3]; + (*out) += 4; +} + +static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, + unsigned prim ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned i, nr = 0; + boolean ok; + const struct vertex_info *vinfo; + + fpme->prim = prim; + + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + /* Need to look at vertex shader inputs (we know it is a + * passthrough shader, so these define the outputs too). If we + * were running a shader, we'd still be looking at the inputs at + * this point. + */ + for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + enum pipe_format format = draw->vertex_element[i].src_format; + + fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset); + + fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; + fpme->fetch[nr].fetch = draw_get_fetch_func( format ); + + /* Always do this -- somewhat redundant... + */ + fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; + nr++; + } + + fpme->nr_fetch = nr; + //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + fpme->pipeline_vertex_size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + fpme->hw_vertex_size = vinfo->size * 4; +} + + + + +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vertex_shader; + char *pipeline_verts; + unsigned i; + + //debug_printf("fc = %d, VS = %d\n", fetch_count, VS_QUEUE_LENGTH); + if (fetch_count < VS_QUEUE_LENGTH) { + pipeline_verts = draw->vs.vertex_cache; + } else { + pipeline_verts = MALLOC(fpme->pipeline_vertex_size * + fetch_count); + } + + if (!pipeline_verts) { + assert(0); + return; + } + + /*FIXME: this init phase should go away */ + for (i = 0; i < fetch_count; ++i) { + struct vertex_header *header = + (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); + header->clipmask = 0; + header->edgeflag = draw_get_edgeflag(draw, i); + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; + } + + /* Shade + */ + shader->prepare(shader, draw); + if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts)) { + /* Run the pipeline */ + draw_pt_run_pipeline( fpme->draw, + fpme->prim, + pipeline_verts, + fpme->pipeline_vertex_size, + fetch_count, + draw_elts, + draw_count ); + } else { + unsigned i, j; + void *hw_verts; + float *out; + + hw_verts = draw->render->allocate_vertices(draw->render, + (ushort)fpme->hw_vertex_size, + (ushort)fetch_count); + if (!hw_verts) { + assert(0); + return; + } + + out = (float *)hw_verts; + for (i = 0; i < fetch_count; i++) { + struct vertex_header *header = + (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); + + for (j = 0; j < fpme->nr_fetch; j++) { + float *attrib = header->data[j]; + /*debug_printf("emiting [%f, %f, %f, %f]\n", + attrib[0], attrib[1], + attrib[2], attrib[3]);*/ + fpme->fetch[j].emit(attrib, &out); + } + } + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw(draw->render, + draw_elts, + draw_count); + + draw->render->release_vertices(draw->render, + hw_verts, + fpme->hw_vertex_size, + fetch_count); + } + + + if (pipeline_verts != draw->vs.vertex_cache) + FREE(pipeline_verts); +} + + + +static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) +{ + struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); + + fetch_pipeline->base.prepare = fetch_pipeline_prepare; + fetch_pipeline->base.run = fetch_pipeline_run; + fetch_pipeline->base.finish = fetch_pipeline_finish; + fetch_pipeline->base.destroy = fetch_pipeline_destroy; + + fetch_pipeline->draw = draw; + + return &fetch_pipeline->base; +} -- cgit v1.2.3 From 8e7326832a7420154fc0d526ac682494db1be160 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 11:32:50 +0100 Subject: softpipe: do our own culling, don't rely on the draw module. May not always happen due to passthrough modes, etc. --- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 21 +++++++++++--- src/gallium/drivers/softpipe/sp_setup.c | 44 +++++++++++++++++------------ 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 025a0113bf..74cd675908 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -120,8 +120,9 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) /** - * Recalculate prim's determinant. - * XXX is this needed? + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. */ static float calc_det( const float (*v0)[4], @@ -144,12 +145,21 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - struct draw_stage *setup = softpipe->setup; unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i, j; void *vertex_buffer = cvbr->vertex_buffer; cptrf4 v[3]; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup); + + /* XXX: call this from allocate_vertices: + */ + setup_prepare( setup_ctx ); + switch (cvbr->prim) { case PIPE_PRIM_TRIANGLES: @@ -189,6 +199,9 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) break; } + /* XXX: why are we calling this??? If we had to call something, it + * would be a function in sp_setup.c: + */ sp_draw_flush( setup ); } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 48617a66ed..5a30788850 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -97,31 +97,35 @@ struct setup_context { uint numFragsEmitted; /**< per primitive */ uint numFragsWritten; /**< per primitive */ #endif + + unsigned winding; /* which winding to cull */ }; -/** - * Recalculate prim's determinant. - * XXX is this needed? - */ -static INLINE float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static boolean cull_tri( struct setup_context *setup, + float det ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; + if (det != 0) + { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & setup->winding) == 0) + return FALSE; + } + + /* Culled: + */ + return TRUE; } + + /** * Clip setup->quad against the scissor/surface bounds. */ @@ -709,8 +713,10 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif - setup_sort_vertices( setup, calc_det(v0, v1, v2), - v0, v1, v2 ); + if (cull_tri( setup, det )) + return; + + setup_sort_vertices( setup, det, v0, v1, v2 ); setup_tri_coefficients( setup ); setup_tri_edges( setup ); @@ -1223,6 +1229,8 @@ void setup_prepare( struct setup_context *setup ) setup->quad.nr_attrs = fs->info.num_inputs; sp->quad.first->begin(sp->quad.first); } + + setup->winding = sp->rasterizer->cull_mode; } -- cgit v1.2.3 From caf293343fd236e97ce399533ac0ada3c7afee7a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:08:46 +0100 Subject: draw: hide passthrough shading paths behind an environment variable --- src/gallium/auxiliary/draw/draw_context.c | 2 ++ src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pt.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index cdca556fbd..1e70a77523 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -49,6 +49,8 @@ struct draw_context *draw_create( void ) draw->use_sse = FALSE; #endif + draw->use_pt_shaders = GETENV( "GALLIUM_PT_SHADERS" ) != NULL; + /* create pipeline stages */ draw->pipeline.wide_line = draw_wide_line_stage( draw ); draw->pipeline.wide_point = draw_wide_point_stage( draw ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1115166192..3042d26847 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -276,6 +276,7 @@ struct draw_context boolean line_stipple; /**< do line stipple? */ boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; + boolean use_pt_shaders; /* temporary flag to switch on pt shader paths */ /* If a prim stage introduces new vertex attributes, they'll be stored here */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c8663c0e84..c67a217982 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -79,6 +79,9 @@ draw_pt_arrays(struct draw_context *draw, * - backend -- the vbuf_render provided by the driver. */ + if (shading && !draw->use_pt_shaders) + return FALSE; + if (!cliptest && !pipeline && !shading) { /* This is the 'passthrough' path: -- cgit v1.2.3 From e106b2d3d65585b0aaa0e60afd541da020d9e220 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:27:25 +0100 Subject: draw: move vertex header init out of fetch_shade_pipeline.c --- src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c | 2 -- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 9 --------- src/gallium/auxiliary/draw/draw_vs_exec.c | 1 + src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs_sse.c | 1 + 5 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 4c2a281b29..0914a90440 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -211,7 +211,6 @@ fetch_store_general( struct fetch_pipeline_middle_end *fpme, static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { - static const float zero = 0; struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; unsigned i, nr = 0; @@ -264,7 +263,6 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, unsigned draw_count ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; char *pipeline_verts; pipeline_verts = MALLOC( fpme->pipeline_vertex_size * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index e7550fe328..fb9b2da5c0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -159,15 +159,6 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, return; } - /*FIXME: this init phase should go away */ - for (i = 0; i < fetch_count; ++i) { - struct vertex_header *header = - (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); - header->clipmask = 0; - header->edgeflag = draw_get_edgeflag(draw, i); - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - } /* Shade */ diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 6fe4e554d5..27cf060cc9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -135,6 +135,7 @@ vs_exec_run( struct draw_vertex_shader *shader, out->clipmask = 0; } out->edgeflag = 1; + out->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 72317c67ae..73076d2467 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -140,6 +140,7 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->clipmask = 0; } vOut[j]->edgeflag = 1; + vOut[j]->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index c877f5ee3a..92b9947e9f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,6 +158,7 @@ vs_sse_run( struct draw_vertex_shader *base, out->clipmask = 0; } out->edgeflag = 1; + out->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ -- cgit v1.2.3 From a82e4996a13ef3cae1497fef95c2fca7631cd889 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:32:53 +0100 Subject: draw: flush pipeline before trying to allocate more hw vertices --- src/gallium/auxiliary/draw/draw_pt.c | 4 ---- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 4 ++++ src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c67a217982..5c16165c15 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -175,10 +175,6 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.vcache; #endif - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - frontend->prepare( frontend, prim, middle ); frontend->run( frontend, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e4e144ef19..2f8b08db79 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -243,6 +243,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, struct draw_context *draw = feme->draw; void *hw_verts; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->hw_vertex_size, (ushort)fetch_count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index fb9b2da5c0..b5d2f81a14 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -177,6 +177,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, void *hw_verts; float *out; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + hw_verts = draw->render->allocate_vertices(draw->render, (ushort)fpme->hw_vertex_size, (ushort)fetch_count); -- cgit v1.2.3 From 36bacf97a6b10f7274f0d3fcf37bf7ebf9388161 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:46:47 +0100 Subject: draw: always malloc verts for fetch_shade_pipeline --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index b5d2f81a14..427128f335 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -144,15 +144,9 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; char *pipeline_verts; - unsigned i; - //debug_printf("fc = %d, VS = %d\n", fetch_count, VS_QUEUE_LENGTH); - if (fetch_count < VS_QUEUE_LENGTH) { - pipeline_verts = draw->vs.vertex_cache; - } else { - pipeline_verts = MALLOC(fpme->pipeline_vertex_size * - fetch_count); - } + pipeline_verts = MALLOC(fpme->pipeline_vertex_size * + fetch_count); if (!pipeline_verts) { assert(0); @@ -216,8 +210,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, } - if (pipeline_verts != draw->vs.vertex_cache) - FREE(pipeline_verts); + FREE(pipeline_verts); } -- cgit v1.2.3 From 8cbda9f1088718e5dbb97b9a6ddcc43737f94351 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 16:15:39 +0100 Subject: draw: remove dead code --- src/gallium/auxiliary/draw/draw_vf.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h index 7555d1bd58..0ef98d6257 100644 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ b/src/gallium/auxiliary/draw/draw_vf.h @@ -128,9 +128,6 @@ draw_vf_destroy( struct draw_vertex_fetch *vf ); struct draw_vf_attr; -typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a, - float *out, - const uint8_t *v ); typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, uint8_t *v, @@ -164,7 +161,6 @@ struct draw_vf_attr uint8_t *inputptr; const draw_vf_insert_func *insert; draw_vf_insert_func do_insert; - draw_vf_extract_func extract; }; struct draw_vertex_fetch -- cgit v1.2.3 From 871d39ec8c168fa58d8758013e99da63fa58111d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 16:18:00 +0100 Subject: softpipe: calculate determinant for all triangles, don't rely on draw module to do it --- src/gallium/drivers/softpipe/sp_prim_setup.c | 1 - src/gallium/drivers/softpipe/sp_prim_vbuf.c | 28 ---------------------------- src/gallium/drivers/softpipe/sp_setup.c | 26 +++++++++++++++++++++++++- src/gallium/drivers/softpipe/sp_setup.h | 10 +++++----- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 6fe463b74c..0ddb06764a 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -71,7 +71,6 @@ do_tri(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_tri( setup->setup, - prim->det, prim->v[0]->data, prim->v[1]->data, prim->v[2]->data ); diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 74cd675908..4fed19ecb6 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -119,25 +119,6 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) } -/** - * Recalculate prim's determinant. This is needed as we don't have - * get this information through the vbuf_render interface & we must - * calculate it here. - */ -static float -calc_det( const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) -{ - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - return ex * fy - ey * fx; -} static void @@ -169,7 +150,6 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) indices[i+j] * vertex_size); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2]); @@ -254,7 +234,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i + 2); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -267,7 +246,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -280,7 +258,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -293,7 +270,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i + 2); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -302,7 +278,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 2); v[2] = VERTEX(i + 3); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -315,7 +290,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i); v[2] = VERTEX(i + 1); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -324,7 +298,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i + 1); v[2] = VERTEX(i - 1); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); @@ -337,7 +310,6 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) v[1] = VERTEX(i - 1); v[2] = VERTEX(i); setup_tri( setup_ctx, - calc_det(v[0], v[1], v[2]), v[0], v[1], v[2] ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 5a30788850..0164d0588d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -695,15 +695,37 @@ static void subtriangle( struct setup_context *setup, } +/** + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. + */ +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + + /** * Do setup for triangle rasterization, then render the triangle. */ void setup_tri( struct setup_context *setup, - float det, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { + float det = calc_det(v0, v1, v2); + /* debug_printf("%s\n", __FUNCTION__ ); */ @@ -713,6 +735,8 @@ void setup_tri( struct setup_context *setup, setup->numFragsWritten = 0; #endif + + if (cull_tri( setup, det )) return; diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h index 3133fc2a3d..d54f334428 100644 --- a/src/gallium/drivers/softpipe/sp_setup.h +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -30,11 +30,11 @@ struct setup_context; struct softpipe_context; -void setup_tri( struct setup_context *setup, - float det, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ); +void +setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); void setup_line(struct setup_context *setup, -- cgit v1.2.3 From e3309197855b5caf7c4c167d1e7beedf33ed2fdd Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:27:24 -0400 Subject: pass vertex size to shaders so that callee can decide on the size of the vertices and not always have to use the maximum vertex allocation size for them --- src/gallium/auxiliary/draw/draw_private.h | 7 ++++--- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 ++- src/gallium/auxiliary/draw/draw_vertex_cache.c | 8 ++++++-- src/gallium/auxiliary/draw/draw_vertex_shader.c | 5 +++-- src/gallium/auxiliary/draw/draw_vs_exec.c | 5 +++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 +++-- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3042d26847..c8cb96c8ba 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -78,6 +78,7 @@ struct vertex_header { /* XXX This is too large */ #define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) +#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f) @@ -152,7 +153,8 @@ struct draw_vertex_shader { struct draw_context *draw, const unsigned *elts, unsigned count, - void *out ); + void *out, + unsigned vertex_size); void (*delete)( struct draw_vertex_shader * ); @@ -450,9 +452,8 @@ dot4(const float *a, const float *b) } static INLINE struct vertex_header * -draw_header_from_block(char *block, int num) +draw_header_from_block(char *block, int size, int num) { - static const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; return (struct vertex_header*)(block + num * size); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 427128f335..3fa6dcc5e7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -157,7 +157,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, /* Shade */ shader->prepare(shader, draw); - if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts)) { + if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts, + fpme->pipeline_vertex_size)) { /* Run the pipeline */ draw_pt_run_pipeline( fpme->draw, fpme->prim, diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 9256dec4ea..730c18bcb3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -74,6 +74,7 @@ static struct vertex_header *get_vertex( struct draw_context *draw, /*debug_printf("HIT %d %d\n", slot, i);*/ assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); return draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, draw->vcache.idx[slot].out); } @@ -101,7 +102,8 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - header = draw_header_from_block(draw->vs.vertex_cache, out); + header = draw_header_from_block(draw->vs.vertex_cache, MAX_VERTEX_ALLOCATION, + out); draw->vs.elts[out] = i; header->clipmask = 0; header->edgeflag = draw_get_edgeflag(draw, i); @@ -113,6 +115,7 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ return draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, draw->vcache.idx[slot].out); } } @@ -148,7 +151,8 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) for (i = 0; i < draw->vs.post_nr; i++) { struct vertex_header * header = - draw_header_from_block(draw->vs.vertex_cache, i); + draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, i); header->vertex_id = UNDEFINED_VERTEX_ID; } } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 452d0175c3..8572a6d40c 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -60,7 +60,8 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) unsigned elts[MAX_SHADER_VERTICES]; int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); struct vertex_header *dests = - draw_header_from_block(draw->vs.vertex_cache, i); + draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, i); for (j = 0; j < n; j++) { elts[j] = draw->vs.elts[i + j]; @@ -73,7 +74,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) assert(n > 0); assert(n <= MAX_SHADER_VERTICES); - shader->run(shader, draw, elts, n, dests); + shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 27cf060cc9..5c88c2e24e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -69,7 +69,8 @@ vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, unsigned count, - void *vOut ) + void *vOut, + unsigned vertex_size) { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; @@ -107,7 +108,7 @@ vs_exec_run( struct draw_vertex_shader *shader, unsigned slot; float x, y, z, w; struct vertex_header *out = - draw_header_from_block(vOut, i + j); + draw_header_from_block(vOut, vertex_size, i + j); /* Handle attr[0] (position) specially: * diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 92b9947e9f..ee0a3105b9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,8 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - void *vOut ) + void *vOut, + unsigned vertex_size ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; @@ -136,7 +137,7 @@ vs_sse_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; struct vertex_header *out = - draw_header_from_block(vOut, i + j); + draw_header_from_block(vOut, vertex_size, i + j); x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; -- cgit v1.2.3 From 2ba6e1fa71be07a2d75abe2d085d485046c0932b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:29:23 -0400 Subject: silence some warnings --- src/gallium/auxiliary/draw/draw_pt.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 -- src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 ++- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 5c16165c15..3d2e7bf7b8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,13 +36,13 @@ #include "draw/draw_pt.h" -/* XXX: Shouldn't those two functions below use the '>' operator??? - */ +#if 0 static boolean too_many_elts( struct draw_context *draw, unsigned elts ) { return elts > (8 * 1024); } +#endif static INLINE unsigned reduced_prim(unsigned prim) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 2f8b08db79..3a26a5d712 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -145,7 +145,6 @@ fetch_store_general( struct fetch_emit_middle_end *feme, unsigned count ) { float *out = (float *)out_ptr; - struct vbuf_render *render = feme->draw->render; uint i, j; for (i = 0; i < count; i++) { @@ -167,7 +166,6 @@ fetch_store_general( struct fetch_emit_middle_end *feme, static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { - static const float zero = 0; struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 0914a90440..a70d129c93 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -87,7 +87,7 @@ struct fetch_pipeline_middle_end { }; - +#if 0 static void emit_R32_FLOAT( const float *attrib, float **out ) { @@ -111,7 +111,7 @@ static void emit_R32G32B32_FLOAT( const float *attrib, (*out)[2] = attrib[2]; (*out) += 3; } - +#endif static void emit_R32G32B32A32_FLOAT( const float *attrib, float **out ) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 3fa6dcc5e7..b49c9efa65 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -49,6 +49,7 @@ struct fetch_pipeline_middle_end { unsigned prim; }; +#if 0 static void emit_R32_FLOAT( const float *attrib, float **out ) { @@ -72,7 +73,7 @@ static void emit_R32G32B32_FLOAT( const float *attrib, (*out)[2] = attrib[2]; (*out) += 3; } - +#endif static void emit_R32G32B32A32_FLOAT( const float *attrib, float **out ) { -- cgit v1.2.3 From 983b6a73e1842b436d158dc1d018bd483a1c9929 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:32:25 -0400 Subject: use the new macro --- src/gallium/auxiliary/draw/draw_context.c | 3 +-- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 1e70a77523..0c314f6e1d 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -88,8 +88,7 @@ struct draw_context *draw_create( void ) /* Statically allocate maximum sized vertices for the cache - could be cleverer... */ { - const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(VS_QUEUE_LENGTH * size, 16); + char *tmp = align_malloc(VS_QUEUE_LENGTH * MAX_VERTEX_ALLOCATION, 16); if (!tmp) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index b49c9efa65..04b3d2c4cf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -128,7 +128,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, fpme->nr_fetch = nr; //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - fpme->pipeline_vertex_size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; fpme->hw_vertex_size = vinfo->size * 4; } -- cgit v1.2.3 From f58ab8e75c082a0aea36ed63fe7e21fb5fac14b6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Apr 2008 10:56:56 -0600 Subject: gallium: take reduced prim, fill modes into account when culling --- src/gallium/drivers/softpipe/sp_context.h | 2 ++ src/gallium/drivers/softpipe/sp_draw_arrays.c | 15 +++++++++++++++ src/gallium/drivers/softpipe/sp_setup.c | 11 ++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 2442bd1eb0..0e1d5e561d 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -105,6 +105,8 @@ struct softpipe_context { int psize_slot; + unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + #if 0 /* Stipple derived state: */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 61bcf51899..421509495a 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -78,6 +78,20 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) } +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, unsigned count) @@ -115,6 +129,7 @@ softpipe_draw_elements(struct pipe_context *pipe, assert(0); #endif + sp->reduced_api_prim = reduced_prim[mode]; if (sp->dirty) softpipe_update_derived( sp ); diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0164d0588d..813d703108 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1254,7 +1254,16 @@ void setup_prepare( struct setup_context *setup ) sp->quad.first->begin(sp->quad.first); } - setup->winding = sp->rasterizer->cull_mode; + if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = sp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } } -- cgit v1.2.3 From 78852986e6379a615a5742951f0fc6470e7c9d12 Mon Sep 17 00:00:00 2001 From: David Flynn Date: Mon, 14 Apr 2008 12:56:10 -0600 Subject: mesa: define #extension GL_ARB_texture_rectangle --- src/mesa/shader/slang/slang_preprocess.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/shader/slang/slang_preprocess.c b/src/mesa/shader/slang/slang_preprocess.c index 076e982f8f..cf8cd5f87a 100644 --- a/src/mesa/shader/slang/slang_preprocess.c +++ b/src/mesa/shader/slang/slang_preprocess.c @@ -483,6 +483,7 @@ pp_cond_stack_reevaluate (pp_cond_stack *self) typedef struct { GLboolean MESA_shader_debug; /* GL_MESA_shader_debug enable */ + GLboolean GL_ARB_texture_rectangle; /* GL_ARB_texture_rectangle enable */ } pp_ext; /* @@ -498,6 +499,7 @@ static GLvoid pp_ext_init (pp_ext *self) { pp_ext_disable_all (self); + self->GL_ARB_texture_rectangle = GL_TRUE; /* Other initialization code goes here. */ } @@ -506,6 +508,8 @@ pp_ext_set (pp_ext *self, const char *name, GLboolean enable) { if (_mesa_strcmp (name, "MESA_shader_debug") == 0) self->MESA_shader_debug = enable; + else if (_mesa_strcmp (name, "GL_ARB_texture_rectangle") == 0) + self->GL_ARB_texture_rectangle = enable; /* Next extension name tests go here. */ else return GL_FALSE; -- cgit v1.2.3 From 5807c0242c38742d96c7ada3503f3198070aa208 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Apr 2008 14:38:11 -0600 Subject: fix GL_ARB_texture_rectangle breakage --- src/mesa/shader/slang/slang_preprocess.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/shader/slang/slang_preprocess.c b/src/mesa/shader/slang/slang_preprocess.c index cf8cd5f87a..1645afcc18 100644 --- a/src/mesa/shader/slang/slang_preprocess.c +++ b/src/mesa/shader/slang/slang_preprocess.c @@ -483,7 +483,7 @@ pp_cond_stack_reevaluate (pp_cond_stack *self) typedef struct { GLboolean MESA_shader_debug; /* GL_MESA_shader_debug enable */ - GLboolean GL_ARB_texture_rectangle; /* GL_ARB_texture_rectangle enable */ + GLboolean ARB_texture_rectangle; /* GL_ARB_texture_rectangle enable */ } pp_ext; /* @@ -499,7 +499,7 @@ static GLvoid pp_ext_init (pp_ext *self) { pp_ext_disable_all (self); - self->GL_ARB_texture_rectangle = GL_TRUE; + self->ARB_texture_rectangle = GL_TRUE; /* Other initialization code goes here. */ } @@ -509,7 +509,7 @@ pp_ext_set (pp_ext *self, const char *name, GLboolean enable) if (_mesa_strcmp (name, "MESA_shader_debug") == 0) self->MESA_shader_debug = enable; else if (_mesa_strcmp (name, "GL_ARB_texture_rectangle") == 0) - self->GL_ARB_texture_rectangle = enable; + self->ARB_texture_rectangle = enable; /* Next extension name tests go here. */ else return GL_FALSE; -- cgit v1.2.3 From 21ae3d2721326d56c76370fd8bfcc1536203925d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 14 Apr 2008 22:39:33 +0900 Subject: gallium: Serialize buffers writes. Allow concurrent reads from buffers by the CPU/GPU, but serialize all writes. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 81 ++++++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index b1f7d93057..65b6584003 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -35,6 +35,7 @@ #include "pipe/p_compiler.h" +#include "pipe/p_error.h" #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" @@ -54,6 +55,13 @@ */ #define SUPER(__derived) (&(__derived)->base) +#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) +#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) +#define PIPE_BUFFER_USAGE_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) + struct fenced_buffer_list { @@ -76,6 +84,15 @@ struct fenced_buffer struct pb_buffer *buffer; + /* FIXME: protect access with mutex */ + + /** + * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; struct pipe_fence_handle *fence; struct list_head head; @@ -98,7 +115,9 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) struct fenced_buffer_list *fenced_list = fenced_buf->list; assert(fenced_buf->base.base.refcount); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); + assert(!fenced_buf->head.prev); assert(!fenced_buf->head.next); LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); @@ -130,6 +149,7 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); assert(fenced_buf->head.next); @@ -186,6 +206,34 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, } +/** + * Serialize writes, but allow concurrent reads. + */ +static INLINE enum pipe_error +fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0) + return PIPE_OK; + + if(fenced_buf->mapcount) { + /* FIXME */ + debug_warning("attemp to write concurrently to buffer"); + return PIPE_ERROR_RETRY; + } + + if(fenced_buf->fence) { + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) + return PIPE_ERROR_RETRY; + _fenced_buffer_remove(fenced_buf); + } + + return PIPE_OK; +} + + static void fenced_buffer_destroy(struct pb_buffer *buf) { @@ -223,16 +271,30 @@ static void * fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - return pb_map(fenced_buf->buffer, flags); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + void *map; + assert((flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE) == 0); + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) + return NULL; + + map = pb_map(fenced_buf->buffer, flags); + if(map) + ++fenced_buf->mapcount; + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; + return map; } static void fenced_buffer_unmap(struct pb_buffer *buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + assert(fenced_buf->mapcount); pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } @@ -288,13 +350,22 @@ buffer_fence(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pipe_winsys *winsys = fenced_list->winsys; + /* FIXME: receive this as a parameter */ + unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) { + /* FIXME: propagate error */ + (void)0; + } _glthread_LOCK_MUTEX(fenced_list->mutex); if (fenced_buf->fence) _fenced_buffer_remove(fenced_buf); - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - if (fenced_buf->fence) + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; _fenced_buffer_add(fenced_buf); + } _glthread_UNLOCK_MUTEX(fenced_list->mutex); } -- cgit v1.2.3 From 5b8fa518476868530d748ce6d03674e9cca3d89f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 14 Apr 2008 23:55:36 +0900 Subject: gallium: Don't assume snprintf are always available. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ++-- src/gallium/auxiliary/util/p_debug.c | 7 +-- src/gallium/auxiliary/util/u_snprintf.c | 16 +++--- src/gallium/auxiliary/util/u_string.h | 63 ++++++++++++++++++++++ .../drivers/i915simple/i915_fpc_translate.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 3 +- src/gallium/include/pipe/p_format.h | 4 +- src/gallium/include/pipe/p_util.h | 8 --- 9 files changed, 88 insertions(+), 28 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_string.h diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index cb3573ceb6..ff6a2c4194 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -30,6 +30,7 @@ #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -147,7 +148,7 @@ gen_dump_uix( { char str[36]; - sprintf( str, "0x%x", ui ); + util_snprintf( str, sizeof(str), "0x%x", ui ); gen_dump_str( dump, str ); } @@ -158,7 +159,7 @@ gen_dump_uid( { char str[16]; - sprintf( str, "%u", ui ); + util_snprintf( str, sizeof(str), "%u", ui ); gen_dump_str( dump, str ); } @@ -169,7 +170,7 @@ gen_dump_sid( { char str[16]; - sprintf( str, "%d", si ); + util_snprintf( str, sizeof(str), "%d", si ); gen_dump_str( dump, str ); } @@ -180,7 +181,7 @@ gen_dump_flt( { char str[48]; - sprintf( str, "%10.4f", flt ); + util_snprintf( str, sizeof(str), "%10.4f", flt ); gen_dump_str( dump, str ); } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 090e3b7794..f9366467cd 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -39,6 +39,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_string.h" #ifdef WIN32 @@ -60,7 +61,7 @@ void _debug_vprintf(const char *format, va_list ap) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; - vsnprintf(buf, sizeof(buf), format, ap); + util_vsnprintf(buf, sizeof(buf), format, ap); _EngDebugPrint("%s", buf); #else /* TODO: Implement debug print for WINCE */ @@ -311,7 +312,7 @@ debug_dump_enum(const struct debug_named_value *names, ++names; } - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } @@ -344,7 +345,7 @@ debug_dump_flags(const struct debug_named_value *names, else first = 0; - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); strncat(output, rest, sizeof(output)); } diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 48426abcb7..c4f4bbd30c 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -166,8 +166,8 @@ #include #else #ifdef WIN32 -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf +#define vsnprintf util_vsnprintf +#define snprintf util_snprintf #define HAVE_VSNPRINTF 0 #define HAVE_SNPRINTF 0 #define HAVE_VASPRINTF 1 /* not needed */ @@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); int -rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +util_vsnprintf(char *str, size_t size, const char *format, va_list args) { LDOUBLE fvalue; INTMAX_T value; @@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len) #endif /* NEED_MYMEMCPY */ int -rpl_vasprintf(char **ret, const char *format, va_list ap) +util_vasprintf(char **ret, const char *format, va_list ap) { size_t size; int len; @@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap) #if !HAVE_SNPRINTF #if HAVE_STDARG_H int -rpl_snprintf(char *str, size_t size, const char *format, ...) +util_snprintf(char *str, size_t size, const char *format, ...) #else int -rpl_snprintf(va_alist) va_dcl +util_snprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H @@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl #if !HAVE_ASPRINTF #if HAVE_STDARG_H int -rpl_asprintf(char **ret, const char *format, ...) +util_asprintf(char **ret, const char *format, ...) #else int -rpl_asprintf(va_alist) va_dcl +util_asprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h new file mode 100644 index 0000000000..b99d4e8021 --- /dev/null +++ b/src/gallium/auxiliary/util/u_string.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#ifndef WIN32 +#include +#endif +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef WIN32 +int util_vsnprintf(char *, size_t, const char *, va_list); +int util_snprintf(char *str, size_t size, const char *format, ...); +#else +#define util_vsnprintf vsnprintf +#define util_snprintf snprintf +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 7b4fca5db1..3ccf74c72c 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,7 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi/util/tgsi_parse.h" #include "tgsi/util/tgsi_dump.h" @@ -122,7 +123,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) debug_printf("i915_program_error: "); va_start( args, msg ); - vsprintf( buffer, msg, args ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); va_end( args ); debug_printf(buffer); debug_printf("\n"); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 839b98c0ce..9ae594ce54 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "i915_reg.h" #include "i915_context.h" @@ -78,7 +79,7 @@ i915_get_name( struct pipe_screen *pscreen ) break; } - sprintf(buffer, "i915 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 5be369fe52..6845c7abde 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "brw_context.h" #include "brw_screen.h" @@ -66,7 +67,7 @@ brw_get_name( struct pipe_screen *screen ) break; } - sprintf(buffer, "i965 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 9e0f91f202..ef9e3a3d6c 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -28,7 +28,7 @@ #ifndef PIPE_FORMAT_H #define PIPE_FORMAT_H -#include /* for sprintf */ +#include "util/u_string.h" #include "p_compiler.h" #include "p_debug.h" @@ -367,7 +367,7 @@ static INLINE char *pf_sprint_name( char *str, enum pipe_format format ) strcat( str, "S" ); break; } - sprintf( &str[strlen( str )], "%u", size * scale ); + util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); } if (i != 0) { strcat( str, "_" ); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 8e3aaee496..dbca080a4b 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -138,14 +138,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) debug_get_option( X, NULL ) -#ifdef WIN32 -int rpl_vsnprintf(char *, size_t, const char *, va_list); -int rpl_snprintf(char *str, size_t size, const char *format, ...); -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf -#endif - - /** * Return memory on given byte alignment */ -- cgit v1.2.3 From 01c7dd2629d161bf87af679a3045e1e2d54259fc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 10:38:05 +0900 Subject: gallium: Add draw_pt_fetch_shade_pipeline.c to scons build. --- src/gallium/auxiliary/draw/SConscript | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 52107912f5..a7fb5dbd61 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -18,6 +18,7 @@ draw = env.ConvenienceLibrary( 'draw_pt_vcache.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_pipeline.c', + 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_pipeline.c', 'draw_pt_elts.c', 'draw_prim.c', -- cgit v1.2.3 From fdd794dcfa33482bdabe7c04ec9df655e0c69bfc Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 Apr 2008 20:55:14 -0600 Subject: gallium: fix PIPE_CAP_MAX_RENDER_TARGETS query --- src/gallium/drivers/softpipe/sp_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 1850a1ced3..7dacb1c461 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -68,7 +68,7 @@ softpipe_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; + return PIPE_MAX_COLOR_BUFS; case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: -- cgit v1.2.3 From b54225ccd6d3bc1b678e27c2f00ebddf5bf1046d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 Apr 2008 20:56:08 -0600 Subject: gallium: set ctx->Const.MaxDrawBuffers --- src/mesa/state_tracker/st_extensions.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 47a50d40ca..2f7ac074da 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -111,6 +111,10 @@ void st_init_limits(struct st_context *st) st->bitmap_texcoord_bias = screen->get_paramf(screen, PIPE_CAP_BITMAP_TEXCOORD_BIAS); + + c->MaxDrawBuffers + = CLAMP(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), + 1, MAX_DRAW_BUFFERS); } -- cgit v1.2.3 From e4b3c13d7a7dbd716bdf4b4d2dda8c6e579bd2d1 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 Apr 2008 20:57:15 -0600 Subject: gallium: fix multi drawbuffer fb state --- src/mesa/state_tracker/st_atom_framebuffer.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 8a95096ec9..14eeb58cc1 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -48,7 +48,7 @@ update_framebuffer_state( struct st_context *st ) struct pipe_framebuffer_state *framebuffer = &st->state.framebuffer; struct gl_framebuffer *fb = st->ctx->DrawBuffer; struct st_renderbuffer *strb; - GLuint i; + GLuint i, j; memset(framebuffer, 0, sizeof(*framebuffer)); @@ -58,11 +58,14 @@ update_framebuffer_state( struct st_context *st ) /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state * to determine which surfaces to draw to */ - framebuffer->num_cbufs = fb->_NumColorDrawBuffers[0]; - for (i = 0; i < framebuffer->num_cbufs; i++) { - strb = st_renderbuffer(fb->_ColorDrawBuffers[0][i]); - assert(strb->surface); - framebuffer->cbufs[i] = strb->surface; + framebuffer->num_cbufs = 0; + for (j = 0; j < MAX_DRAW_BUFFERS; j++) { + for (i = 0; i < fb->_NumColorDrawBuffers[j]; i++) { + strb = st_renderbuffer(fb->_ColorDrawBuffers[j][i]); + assert(strb->surface); + framebuffer->cbufs[framebuffer->num_cbufs] = strb->surface; + framebuffer->num_cbufs++; + } } strb = st_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); -- cgit v1.2.3 From 90b9a11a6d69f1cf6c837def0e8a9b598079ef1b Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 Apr 2008 20:58:05 -0600 Subject: gallium: fix semantic indexes for outputs --- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 578fd2ecb0..524b5af50b 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -767,12 +767,14 @@ tgsi_translate_mesa_program( switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: fulldecl = make_output_decl(i, - TGSI_SEMANTIC_POSITION, 0, /* Z / Depth */ + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i], TGSI_WRITEMASK_Z ); break; case TGSI_SEMANTIC_COLOR: fulldecl = make_output_decl(i, - TGSI_SEMANTIC_COLOR, 0, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i], TGSI_WRITEMASK_XYZW ); break; default: -- cgit v1.2.3 From d3878b070b7b5084526b65499737cc686a6039b6 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 Apr 2008 21:01:40 -0600 Subject: gallium: enable new quad output code, remove old code --- src/gallium/drivers/softpipe/sp_quad_fs.c | 36 ------------------------------- 1 file changed, 36 deletions(-) diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 9a20c056a2..8dbdbe5764 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -88,7 +88,6 @@ shade_quad( &qss->machine, quad ); -#if 0 /* XXX multi color outputs - untested */ /* store outputs */ boolean z_written = FALSE; { @@ -133,41 +132,6 @@ shade_quad( quad->outputs.depth[2] = z0 + dzdy; quad->outputs.depth[3] = z0 + dzdx + dzdy; } -#endif - - /* store result color(s) */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color[0], - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color[0] ) ); - } - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* compute Z values now, as in the quad earlyz stage */ - /* XXX we should really only do this if the earlyz stage is not used */ - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; - const float dzdx = quad->posCoef->dadx[2]; - const float dzdy = quad->posCoef->dady[2]; - const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; - - quad->outputs.depth[0] = z0; - quad->outputs.depth[1] = z0 + dzdx; - quad->outputs.depth[2] = z0 + dzdy; - quad->outputs.depth[3] = z0 + dzdx + dzdy; - } /* shader may cull fragments */ if( quad->mask ) { -- cgit v1.2.3