From 7a231da442838bbdd20ae3121dc4ec9ddcf8145a Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sun, 2 Jan 2005 01:22:10 +0000 Subject: Improved the performance of software fallbacks by not waiting for idle in every single span function. Instead flush and wait in the SpanRenderStart hook and in wrappers around _swrast_Copy/Draw/ReadPixels. Misc. cleanups in savagespan.c while I'm there. --- src/mesa/drivers/dri/savage/savagespan.c | 125 +++++++++++++++++++++---------- src/mesa/drivers/dri/savage/savagetris.c | 1 - 2 files changed, 85 insertions(+), 41 deletions(-) diff --git a/src/mesa/drivers/dri/savage/savagespan.c b/src/mesa/drivers/dri/savage/savagespan.c index b6412d2983..0caee3a652 100644 --- a/src/mesa/drivers/dri/savage/savagespan.c +++ b/src/mesa/drivers/dri/savage/savagespan.c @@ -33,6 +33,7 @@ #define DBG 0 #define LOCAL_VARS \ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \ __DRIdrawablePrivate *dPriv = imesa->mesa_drawable; \ savageScreenPrivate *savageScreen = imesa->savageScreen; \ GLuint cpp = savageScreen->cpp; \ @@ -48,6 +49,7 @@ (void) read_buf; (void) buf; (void) p #define LOCAL_DEPTH_VARS \ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \ __DRIdrawablePrivate *dPriv = imesa->mesa_drawable; \ savageScreenPrivate *savageScreen = imesa->savageScreen; \ GLuint zpp = savageScreen->zpp; \ @@ -77,8 +79,7 @@ #define Y_FLIP(_y) (height - _y - 1) -#define HW_LOCK() savageContextPtr imesa = SAVAGE_CONTEXT(ctx); \ - WAIT_IDLE_EMPTY;\ +#define HW_LOCK() #define HW_CLIPLOOP() \ do { \ @@ -95,11 +96,7 @@ } \ } while (0) -#if 0 -#define HW_UNLOCK() \ - UNLOCK_HARDWARE(imesa); -#endif -#define HW_UNLOCK() { } +#define HW_UNLOCK() /* 16 bit, 565 rgb color spanline and pixel functions @@ -164,55 +161,47 @@ do { \ /* 16 bit depthbuffer functions. */ #define WRITE_DEPTH( _x, _y, d ) \ -do{ \ - *(GLushort *)(buf + (_x<<1) + _y*pitch) = d; \ -}while(0) - + *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = d + #define READ_DEPTH( d, _x, _y ) \ -do{ \ - d = *(GLushort *)(buf + (_x<<1) + _y*pitch); \ -}while(0) - -/* d = 0xffff; */ - + d = *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) + #define TAG(x) savage##x##_16 #include "depthtmp.h" - + /* 8-bit stencil /24-bit depth depthbuffer functions. */ -#define WRITE_DEPTH( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x<<2) + _y*pitch); \ - tmp &= 0xFF000000; \ - tmp |= d; \ +#define WRITE_DEPTH( _x, _y, d ) do { \ + GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch); \ + tmp &= 0xFF000000; \ + tmp |= d; \ *(GLuint *)(buf + (_x<<2) + _y*pitch) = tmp; \ -} +} while(0) #define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + (_x<<2) + _y*pitch) & 0x00FFFFFF; - -/* d = 0x00ffffff; */ + d = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) #define TAG(x) savage##x##_8_24 #include "depthtmp.h" -#define WRITE_STENCIL( _x, _y, d ) { \ - GLuint tmp = *(GLuint *)(buf + (_x<<2) + _y*pitch); \ - tmp &= 0x00FFFFFF; \ - tmp |= (((GLuint)d)<<24) & 0xFF000000; \ - *(GLuint *)(buf + (_x<<2) + _y*pitch) = tmp; \ -} - -#define READ_STENCIL( d, _x, _y ) \ - d = (GLstencil)((*(GLuint *)(buf + (_x<<2) + _y*pitch) & 0xFF000000) >> 24); - +#define WRITE_STENCIL( _x, _y, d ) do { \ + GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch); \ + tmp &= 0x00FFFFFF; \ + tmp |= (((GLuint)d)<<24) & 0xFF000000; \ + *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) = tmp; \ +} while(0) + +#define READ_STENCIL( d, _x, _y ) \ + d = (GLstencil)((*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0xFF000000) >> 24) + #define TAG(x) savage##x##_8_24 #include "stenciltmp.h" - + /* * This function is called to specify which buffer to read and write @@ -241,6 +230,58 @@ static void savageDDSetBuffer(GLcontext *ctx, GLframebuffer *buffer, ? imesa->driDrawable : imesa->driReadable; } +/* + * Wrappers around _swrast_Copy/Draw/ReadPixels that make sure all + * primitives are flushed and the hardware is idle before accessing + * the frame buffer. + */ +static void +savageCopyPixels( GLcontext *ctx, + GLint srcx, GLint srcy, GLsizei width, GLsizei height, + GLint destx, GLint desty, + GLenum type ) +{ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); + FLUSH_BATCH(imesa); + WAIT_IDLE_EMPTY; + _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); +} +static void +savageDrawPixels( GLcontext *ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *packing, + const GLvoid *pixels ) +{ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); + FLUSH_BATCH(imesa); + WAIT_IDLE_EMPTY; + _swrast_DrawPixels(ctx, x, y, width, height, format, type, packing, pixels); +} +static void +savageReadPixels( GLcontext *ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *packing, + GLvoid *pixels ) +{ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); + FLUSH_BATCH(imesa); + WAIT_IDLE_EMPTY; + _swrast_ReadPixels(ctx, x, y, width, height, format, type, packing, pixels); +} + +/* + * Make sure the hardware is idle when span-rendering. + */ +static void savageSpanRenderStart( GLcontext *ctx ) +{ + savageContextPtr imesa = SAVAGE_CONTEXT(ctx); + FLUSH_BATCH(imesa); + WAIT_IDLE_EMPTY; +} + void savageDDInitSpanFuncs( GLcontext *ctx ) { @@ -277,6 +318,7 @@ void savageDDInitSpanFuncs( GLcontext *ctx ) case 2: swdd->ReadDepthSpan = savageReadDepthSpan_16; swdd->WriteDepthSpan = savageWriteDepthSpan_16; + swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_16; swdd->ReadDepthPixels = savageReadDepthPixels_16; swdd->WriteDepthPixels = savageWriteDepthPixels_16; @@ -284,6 +326,7 @@ void savageDDInitSpanFuncs( GLcontext *ctx ) case 4: swdd->ReadDepthSpan = savageReadDepthSpan_8_24; swdd->WriteDepthSpan = savageWriteDepthSpan_8_24; + swdd->WriteMonoDepthSpan = savageWriteMonoDepthSpan_8_24; swdd->ReadDepthPixels = savageReadDepthPixels_8_24; swdd->WriteDepthPixels = savageWriteDepthPixels_8_24; swdd->ReadStencilSpan = savageReadStencilSpan_8_24; @@ -301,11 +344,13 @@ void savageDDInitSpanFuncs( GLcontext *ctx ) swdd->ReadCI32Span =NULL; swdd->ReadCI32Pixels =NULL; + swdd->SpanRenderStart = savageSpanRenderStart; + /* Pixel path fallbacks. */ ctx->Driver.Accum = _swrast_Accum; ctx->Driver.Bitmap = _swrast_Bitmap; - ctx->Driver.CopyPixels = _swrast_CopyPixels; - ctx->Driver.DrawPixels = _swrast_DrawPixels; - ctx->Driver.ReadPixels = _swrast_ReadPixels; + ctx->Driver.CopyPixels = savageCopyPixels; + ctx->Driver.DrawPixels = savageDrawPixels; + ctx->Driver.ReadPixels = savageReadPixels; } diff --git a/src/mesa/drivers/dri/savage/savagetris.c b/src/mesa/drivers/dri/savage/savagetris.c index 1732dc5074..2721a63edb 100644 --- a/src/mesa/drivers/dri/savage/savagetris.c +++ b/src/mesa/drivers/dri/savage/savagetris.c @@ -1120,7 +1120,6 @@ void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) imesa->Fallback |= bit; if (oldfallback == 0) { /* the first fallback */ - FLUSH_BATCH( imesa ); _swsetup_Wakeup( ctx ); imesa->RenderIndex = ~0; } -- cgit v1.2.3