summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/radeon/radeon_span.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/radeon/radeon_span.c')
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c589
1 files changed, 464 insertions, 125 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 12051ff1c8..4e100d854e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -43,46 +43,222 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/glheader.h"
#include "swrast/swrast.h"
-#include "radeon_context.h"
-#include "radeon_ioctl.h"
-#include "radeon_state.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
#include "radeon_span.h"
-#include "radeon_tex.h"
-
-#include "drirenderbuffer.h"
#define DBG 0
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+
+
+/* r200 depth buffer is always tiled - this is the formula
+ according to the docs unless I typo'ed in it
+*/
+#if defined(RADEON_COMMON_FOR_R200)
+static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset;
+ if (rrb->has_surface) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ GLuint b;
+ offset = 0;
+ b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
+ offset += (b >> 1) << 12;
+ offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+ offset += ((y >> 2) & 0x3) << 9;
+ offset += ((x >> 3) & 0x1) << 8;
+ offset += ((x >> 4) & 0x3) << 6;
+ offset += ((x >> 2) & 0x1) << 5;
+ offset += ((y >> 1) & 0x1) << 4;
+ offset += ((x >> 1) & 0x1) << 3;
+ offset += (y & 0x1) << 2;
+ offset += (x & 0x1) << 1;
+ }
+ return &ptr[offset];
+}
+
+static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ GLint offset;
+ if (rrb->has_surface) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ GLuint b;
+ offset = 0;
+ b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
+ offset += (b >> 1) << 12;
+ offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+ offset += ((y >> 2) & 0x3) << 9;
+ offset += ((x >> 2) & 0x1) << 8;
+ offset += ((x >> 3) & 0x3) << 6;
+ offset += ((y >> 1) & 0x1) << 5;
+ offset += ((x >> 1) & 0x1) << 4;
+ offset += (y & 0x1) << 3;
+ offset += (x & 0x1) << 2;
+ }
+ return &ptr[offset];
+}
+#endif
+
+/* radeon tiling on r300-r500 has 4 states,
+ macro-linear/micro-linear
+ macro-linear/micro-tiled
+ macro-tiled /micro-linear
+ macro-tiled /micro-tiled
+ 1 byte surface
+ 2 byte surface - two types - we only provide 8x2 microtiling
+ 4 byte surface
+ 8/16 byte (unused)
+*/
+static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+ GLint offset;
+
+ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ offset = 0;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+ offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
+ offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
+ offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
+ offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
+ offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
+ offset += ((y >> 1) & 0x1) << 6;
+ offset += ((x >> 2) & 0x1) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ } else {
+ offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
+ offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
+ offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
+ offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
+ offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
+ offset += (y & 1) << 6;
+ offset += (x & 15) << 2;
+ }
+ } else {
+ offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ }
+ }
+ return &ptr[offset];
+}
+
+static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y)
+{
+ GLubyte *ptr = rrb->bo->ptr;
+ uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+ GLint offset;
+
+ if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+ offset = x * rrb->cpp + y * rrb->pitch;
+ } else {
+ offset = 0;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+ offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
+ offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
+ offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
+ offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
+ offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
+ offset += ((y >> 1) & 0x1) << 6;
+ offset += ((x >> 3) & 0x1) << 5;
+ offset += (y & 1) << 4;
+ offset += (x & 3) << 2;
+ } else {
+ offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
+ offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
+ offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
+ offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
+ offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
+ offset += (y & 1) << 6;
+ offset += ((x >> 4) & 0x1) << 5;
+ offset += (x & 15) << 2;
+ }
+ } else {
+ offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
+ offset += (y & 0x1) << 4;
+ offset += (x & 0x7) << 1;
+ }
+ }
+ return &ptr[offset];
+}
+
+#ifndef COMPILE_R300
+static uint32_t
+z24s8_to_s8z24(uint32_t val)
+{
+ return (val << 24) | (val >> 8);
+}
+
+static uint32_t
+s8z24_to_z24s8(uint32_t val)
+{
+ return (val >> 24) | (val << 8);
+}
+#endif
+
/*
* Note that all information needed to access pixels in a renderbuffer
* should be obtained through the gl_renderbuffer parameter, not per-context
* information.
*/
#define LOCAL_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLubyte *buf = (GLubyte *) drb->flippedData \
- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
- GLuint p; \
- (void) p;
+ struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
+ const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ GLuint p; \
+ (void)p; \
+ radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_DEPTH_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
- const GLuint bottom = dPriv->h - 1; \
- GLuint xo = dPriv->x; \
- GLuint yo = dPriv->y; \
- GLubyte *buf = (GLubyte *) drb->Base.Data;
+ struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
+ const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+ unsigned int num_cliprects; \
+ struct drm_clip_rect *cliprects; \
+ int x_off, y_off; \
+ radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
-#define Y_FLIP(Y) (bottom - (Y))
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
#define HW_LOCK()
#define HW_UNLOCK()
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP() \
+ do { \
+ int _nc = num_cliprects; \
+ while ( _nc-- ) { \
+ int minx = cliprects[_nc].x1 - x_off; \
+ int miny = cliprects[_nc].y1 - y_off; \
+ int maxx = cliprects[_nc].x2 - x_off; \
+ int maxy = cliprects[_nc].y2 - y_off;
+
/* ================================================================
* Color buffer
*/
@@ -94,7 +270,41 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define TAG(x) radeon##x##_RGB565
#define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 16 bit, ARGB1555 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+
+#define TAG(x) radeon##x##_ARGB1555
+#define TAG2(x,y) radeon##x##_ARGB1555##y
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 16 bit, RGBA4 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+
+#define TAG(x) radeon##x##_ARGB4444
+#define TAG2(x,y) radeon##x##_ARGB4444##y
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
#include "spantmp2.h"
/* 32 bit, ARGB8888 color spanline and pixel functions
@@ -104,7 +314,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define TAG(x) radeon##x##_ARGB8888
#define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
#include "spantmp2.h"
/* ================================================================
@@ -121,106 +335,127 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* too...
*/
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 4 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0..1] = 0 */
-
-#ifdef COMPILE_R300
- ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
- ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
-
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 2 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0] = 0 */
-
- ba = (y / 16) * (pitch / 32) + (x / 32);
-
- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
- address |= (x & 0x8) << 4; /* a[7] = x[3] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
/* 16-bit depth buffer functions
*/
#define VALUE_TYPE GLushort
+#if defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+ *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
+#else
#define WRITE_DEPTH( _x, _y, d ) \
- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+ *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
+#endif
+#if defined(RADEON_COMMON_FOR_R200)
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+ d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
+#else
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
+#endif
#define TAG(x) radeon##x##_z16
#include "depthtmp.h"
-/* 24 bit depth, 8 bit stencil depthbuffer functions
+/* 24 bit depth
*
* Careful: It looks like the R300 uses ZZZS byte order while the R200
* uses SZZZ for 24 bit depth, 8 bit stencil mode.
*/
#define VALUE_TYPE GLuint
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
tmp &= 0x000000ff; \
tmp |= ((d << 8) & 0xffffff00); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0xff000000; \
+ tmp |= ((d) & 0x00ffffff); \
+ *_ptr = tmp; \
} while (0)
#else
#define WRITE_DEPTH( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
tmp &= 0xff000000; \
tmp |= ((d) & 0x00ffffff); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
-#ifdef COMPILE_R300
+#if defined(COMPILE_R300)
#define READ_DEPTH( d, _x, _y ) \
- do { \
- d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0xffffff00) >> 8; \
+ do { \
+ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
}while(0)
#else
+#define READ_DEPTH( d, _x, _y ) \
+ d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
+#endif
+
+#define TAG(x) radeon##x##_z24
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#if defined(COMPILE_R300)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ *_ptr = d; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = z24s8_to_s8z24(d); \
+ *_ptr = tmp; \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = z24s8_to_s8z24(d); \
+ *_ptr = tmp; \
+} while (0)
+#endif
+
+#if defined(COMPILE_R300)
+#define READ_DEPTH( d, _x, _y ) \
+ do { \
+ d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
+ }while(0)
+#elif defined(RADEON_COMMON_FOR_R200)
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0x00ffffff;
+ do { \
+ d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
+ }while(0)
+#else
+#define READ_DEPTH( d, _x, _y ) do { \
+ d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
+ } while (0)
#endif
#define TAG(x) radeon##x##_z24_s8
@@ -235,35 +470,51 @@ do { \
#ifdef COMPILE_R300
#define WRITE_STENCIL( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
tmp &= 0xffffff00; \
tmp |= (d) & 0xff; \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
+} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define WRITE_STENCIL( _x, _y, d ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
+ tmp &= 0x00ffffff; \
+ tmp |= (((d) & 0xff) << 24); \
+ *_ptr = tmp; \
} while (0)
#else
#define WRITE_STENCIL( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
+ GLuint tmp = *_ptr; \
tmp &= 0x00ffffff; \
tmp |= (((d) & 0xff) << 24); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
#ifdef COMPILE_R300
#define READ_STENCIL( d, _x, _y ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
d = tmp & 0x000000ff; \
} while (0)
+#elif defined(RADEON_COMMON_FOR_R200)
+#define READ_STENCIL( d, _x, _y ) \
+do { \
+ GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
+ d = (tmp & 0xff000000) >> 24; \
+} while (0)
#else
#define READ_STENCIL( d, _x, _y ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
+ GLuint tmp = *_ptr; \
d = (tmp & 0xff000000) >> 24; \
} while (0)
#endif
@@ -271,29 +522,110 @@ do { \
#define TAG(x) radeon##x##_z24_s8
#include "stenciltmp.h"
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above). WaitForIdle() is the main
- * culprit.
- */
+
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
+{
+ struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+ int r;
+
+ if (rrb == NULL || !rrb->bo)
+ return;
+
+ if (flag) {
+ if (rrb->bo->bom->funcs->bo_wait)
+ radeon_bo_wait(rrb->bo);
+ r = radeon_bo_map(rrb->bo, 1);
+ if (r) {
+ fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+ __FUNCTION__, r);
+ }
+
+ radeonSetSpanFunctions(rrb);
+ } else {
+ radeon_bo_unmap(rrb->bo);
+ rb->GetRow = NULL;
+ rb->PutRow = NULL;
+ }
+}
+
+static void
+radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
+{
+ GLuint i, j;
+
+ /* color draw buffers */
+ for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+ map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
+
+ /* check for render to textures */
+ for (i = 0; i < BUFFER_COUNT; i++) {
+ struct gl_renderbuffer_attachment *att =
+ ctx->DrawBuffer->Attachment + i;
+ struct gl_texture_object *tex = att->Texture;
+ if (tex) {
+ /* Render to texture. Note that a mipmapped texture need not
+ * be complete for render to texture, so we must restrict to
+ * mapping only the attached image.
+ */
+ radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
+ ASSERT(att->Renderbuffer);
+
+ if (map)
+ radeon_teximage_map(image, GL_TRUE);
+ else
+ radeon_teximage_unmap(image);
+ }
+ }
+
+ map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
+
+ /* depth buffer (Note wrapper!) */
+ if (ctx->DrawBuffer->_DepthBuffer)
+ map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
+
+ if (ctx->DrawBuffer->_StencilBuffer)
+ map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
+}
static void radeonSpanRenderStart(GLcontext * ctx)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
- r300ContextPtr r300 = (r300ContextPtr) rmesa;
- R300_FIREVERTICES(r300);
-#else
- RADEON_FIREVERTICES(rmesa);
-#endif
- LOCK_HARDWARE(rmesa);
- radeonWaitForIdleLocked(rmesa);
+ int i;
+
+ radeon_firevertices(rmesa);
+
+ /* The locking and wait for idle should really only be needed in classic mode.
+ * In a future memory manager based implementation, this should become
+ * unnecessary due to the fact that mapping our buffers, textures, etc.
+ * should implicitly wait for any previous rendering commands that must
+ * be waited on. */
+ if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+ LOCK_HARDWARE(rmesa);
+ radeonWaitForIdleLocked(rmesa);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+ }
+
+ radeon_map_unmap_buffers(ctx, 1);
}
static void radeonSpanRenderFinish(GLcontext * ctx)
{
radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+ int i;
_swrast_flush(ctx);
- UNLOCK_HARDWARE(rmesa);
+ if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+ UNLOCK_HARDWARE(rmesa);
+ }
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled)
+ ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+ }
+
+ radeon_map_unmap_buffers(ctx, 0);
}
void radeonInitSpanFuncs(GLcontext * ctx)
@@ -307,20 +639,27 @@ void radeonInitSpanFuncs(GLcontext * ctx)
/**
* Plug in the Get/Put routines for the given driRenderbuffer.
*/
-void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
{
- if (drb->Base.InternalFormat == GL_RGBA) {
- if (vis->redBits == 5 && vis->greenBits == 6
- && vis->blueBits == 5) {
- radeonInitPointers_RGB565(&drb->Base);
- } else {
- radeonInitPointers_ARGB8888(&drb->Base);
- }
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
- radeonInitDepthPointers_z16(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
- radeonInitDepthPointers_z24_s8(&drb->Base);
- } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
- radeonInitStencilPointers_z24_s8(&drb->Base);
+ if (rrb->base._ActualFormat == GL_RGB5) {
+ radeonInitPointers_RGB565(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGB8) {
+ radeonInitPointers_xRGB8888(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGBA8) {
+ radeonInitPointers_ARGB8888(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGBA4) {
+ radeonInitPointers_ARGB4444(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_RGB5_A1) {
+ radeonInitPointers_ARGB1555(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
+ radeonInitDepthPointers_z16(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
+ radeonInitDepthPointers_z24(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
+ radeonInitDepthPointers_z24_s8(&rrb->base);
+ } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
+ radeonInitStencilPointers_z24_s8(&rrb->base);
+ } else {
+ fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
}
}