summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r128/r128_span.c
diff options
context:
space:
mode:
authorEric Anholt <anholt@FreeBSD.org>2005-10-27 20:26:24 +0000
committerEric Anholt <anholt@FreeBSD.org>2005-10-27 20:26:24 +0000
commit215c4c3a9c989b3d0e7090177ab2fc3eeab0ddaa (patch)
tree4d358e322e350f176e5b0dfbc6c838403f7199fd /src/mesa/drivers/dri/r128/r128_span.c
parent512c994b92126a7575bb3cc327da40710b43f52c (diff)
Bug #1028: Add hardware-accelerated stencil support to r128. Testing with
stencilwrap reported many issues with various modes. Some of these were complicated by the fact that spans are broken (Bug #1615), but some appear to be real bugs. However, while spans remain broken, I found that visual results were better by avoiding fallbacks rather than avoiding just a broken stencil implementation. Note that this required changing the depth spans at 24+8bpp into read-modify-write cycles. It would be nicer as a single write with a mask, but the kernel span blits turn off masking. Reviewed by: ajax
Diffstat (limited to 'src/mesa/drivers/dri/r128/r128_span.c')
-rw-r--r--src/mesa/drivers/dri/r128/r128_span.c144
1 files changed, 140 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/r128/r128_span.c b/src/mesa/drivers/dri/r128/r128_span.c
index 76f80efb77..a24e63c9b6 100644
--- a/src/mesa/drivers/dri/r128/r128_span.c
+++ b/src/mesa/drivers/dri/r128/r128_span.c
@@ -46,6 +46,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define HAVE_HW_DEPTH_SPANS 1
#define HAVE_HW_DEPTH_PIXELS 1
+#define HAVE_HW_STENCIL_SPANS 1
+#define HAVE_HW_STENCIL_PIXELS 1
#define LOCAL_VARS \
r128ContextPtr rmesa = R128_CONTEXT(ctx); \
@@ -101,6 +103,21 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
#include "spantmp2.h"
+/* Idling in the depth/stencil span functions:
+ * For writes, the kernel reads from the given user-space buffer at dispatch
+ * time, and then writes to the depth buffer asynchronously.
+ * For reads, the kernel reads from the depth buffer and writes to the span
+ * temporary asynchronously.
+ * So, if we're going to read from the span temporary, we need to idle before
+ * doing so. But we don't need to idle after write, because the CPU won't
+ * be accessing the destination, only the accelerator (through 3d rendering or
+ * depth span reads)
+ * However, due to interactions from pixel cache between 2d (what we do with
+ * depth) and 3d (all other parts of the system), we idle at the begin and end
+ * of a set of span operations, which should cover the pix cache issue.
+ * Except, we still have major issues, as shown by no_rast=true glxgears, or
+ * stencilwrap.
+ */
/* ================================================================
* Depth buffer
@@ -110,10 +127,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#define WRITE_DEPTH_SPAN() \
+do { \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
- depth, mask );
+ depth, mask ); \
+} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
@@ -183,20 +202,41 @@ do { \
/* 24-bit depth, 8-bit stencil buffer functions
*/
#define WRITE_DEPTH_SPAN() \
+do { \
+ GLint buf[n]; \
+ GLint i; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
+ } \
r128WriteDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y, \
- depth, mask );
+ buf, mask ); \
+} while (0)
#define WRITE_DEPTH_PIXELS() \
do { \
+ GLint buf[n]; \
GLint ox[MAX_WIDTH]; \
GLint oy[MAX_WIDTH]; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
for ( i = 0 ; i < n ; i++ ) { \
ox[i] = x[i] + dPriv->x; \
oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
} \
- r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
+ r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
+ } \
+ r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
} while (0)
#define READ_DEPTH_SPAN() \
@@ -205,6 +245,7 @@ do { \
r128scrn->spanOffset); \
GLint i; \
\
+ /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
r128ReadDepthSpanLocked( rmesa, n, \
x + dPriv->x, \
y + dPriv->y ); \
@@ -258,8 +299,99 @@ do { \
* Stencil buffer
*/
-/* FIXME: Add support for hardware stencil buffers.
+/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
+#define WRITE_STENCIL_SPAN() \
+do { \
+ GLint buf[n]; \
+ GLint i; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
+ } \
+ r128WriteDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y, \
+ buf, mask ); \
+} while (0)
+
+#define WRITE_STENCIL_PIXELS() \
+do { \
+ GLint buf[n]; \
+ GLint ox[MAX_WIDTH]; \
+ GLint oy[MAX_WIDTH]; \
+ GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ ox[i] = x[i] + dPriv->x; \
+ oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
+ } \
+ r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
+ for ( i = 0 ; i < n ; i++ ) { \
+ buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
+ } \
+ r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
+} while (0)
+
+#define READ_STENCIL_SPAN() \
+do { \
+ GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ GLint i; \
+ \
+ /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
+ r128ReadDepthSpanLocked( rmesa, n, \
+ x + dPriv->x, \
+ y + dPriv->y ); \
+ r128WaitForIdleLocked( rmesa ); \
+ \
+ for ( i = 0 ; i < n ; i++ ) { \
+ stencil[i] = (buf[i] & 0xff000000) >> 24; \
+ } \
+} while (0)
+
+#define READ_STENCIL_PIXELS() \
+do { \
+ GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
+ r128scrn->spanOffset); \
+ GLint i, remaining = n; \
+ \
+ while ( remaining > 0 ) { \
+ GLint ox[128]; \
+ GLint oy[128]; \
+ GLint count; \
+ \
+ if ( remaining <= 128 ) { \
+ count = remaining; \
+ } else { \
+ count = 128; \
+ } \
+ for ( i = 0 ; i < count ; i++ ) { \
+ ox[i] = x[i] + dPriv->x; \
+ oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
+ } \
+ \
+ r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
+ r128WaitForIdleLocked( rmesa ); \
+ \
+ for ( i = 0 ; i < count ; i++ ) { \
+ stencil[i] = (buf[i] & 0xff000000) >> 24; \
+ } \
+ stencil += count; \
+ x += count; \
+ y += count; \
+ remaining -= count; \
+ } \
+} while (0)
+
+#define TAG(x) radeon##x##_z24_s8
+#include "stenciltmp.h"
static void
r128SpanRenderStart( GLcontext *ctx )
@@ -275,6 +407,7 @@ r128SpanRenderFinish( GLcontext *ctx )
{
r128ContextPtr rmesa = R128_CONTEXT(ctx);
_swrast_flush( ctx );
+ r128WaitForIdleLocked( rmesa );
UNLOCK_HARDWARE( rmesa );
}
@@ -306,4 +439,7 @@ r128SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
r128InitDepthPointers_z24_s8(&drb->Base);
}
+ else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+ radeonInitStencilPointers_z24_s8(&drb->Base);
+ }
}