From 4837ea30208d002bc36a836d2117f826d40c8bfa Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Thu, 10 Feb 2005 22:36:06 +0000 Subject: add texture micro and macro tiling to radeon/r200 driver. This can improve performance up to 15% in texture-intensive applications. Convert the driver to use the correct blit format and blit width instead of fixed blit format and blit width when uploading textures to make it work. --- src/mesa/drivers/dri/r200/r200_context.c | 3 ++ src/mesa/drivers/dri/r200/r200_context.h | 3 ++ src/mesa/drivers/dri/r200/r200_reg.h | 2 + src/mesa/drivers/dri/r200/r200_texmem.c | 71 ++++++++++++++++---------- src/mesa/drivers/dri/r200/r200_texstate.c | 73 +++++++++++++++++++-------- src/mesa/drivers/dri/radeon/radeon_context.c | 4 ++ src/mesa/drivers/dri/radeon/radeon_context.h | 5 +- src/mesa/drivers/dri/radeon/radeon_texmem.c | 52 ++++++++++++++----- src/mesa/drivers/dri/radeon/radeon_texstate.c | 70 ++++++++++++++++++++----- 9 files changed, 209 insertions(+), 74 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 4eca4ad7e5..baaca08755 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -272,6 +272,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, else rmesa->using_hyperz = GL_TRUE; } + + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; /* Init default driver functions then plug in our R200-specific functions * (the texture functions are especially important) diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index cedf1b974f..7e0a46ae51 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -167,6 +167,8 @@ struct r200_tex_obj { GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ }; @@ -931,6 +933,7 @@ struct r200_context { driOptionCache optionCache; GLboolean using_hyperz; + GLboolean texmicrotile; }; #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h index c1132e54ab..2468c6cebf 100644 --- a/src/mesa/drivers/dri/r200/r200_reg.h +++ b/src/mesa/drivers/dri/r200/r200_reg.h @@ -968,6 +968,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0) #define R200_TXO_ENDIAN_WORD_SWAP (2 << 0) #define R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +#define R200_TXO_MACRO_TILE (1 << 2) +#define R200_TXO_MICRO_TILE (1 << 3) #define R200_TXO_OFFSET_MASK 0xffffffe0 #define R200_TXO_OFFSET_SHIFT 5 #define R200_PP_CUBIC_OFFSET_F1_0 0x2d04 diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c index 3f8e5d6e7f..7472afeedd 100644 --- a/src/mesa/drivers/dri/r200/r200_texmem.c +++ b/src/mesa/drivers/dri/r200/r200_texmem.c @@ -43,12 +43,10 @@ SOFTWARE. #include "context.h" #include "colormac.h" #include "macros.h" -#include "radeon_reg.h" /* gets definition for usleep */ #include "r200_context.h" -#include "r200_state.h" #include "r200_ioctl.h" -#include "r200_swtcl.h" #include "r200_tex.h" +#include "radeon_reg.h" #include /* for usleep() */ @@ -253,12 +251,13 @@ static void r200UploadRectSubImage( r200ContextPtr rmesa, /* Blit to framebuffer */ - r200EmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch, t->bufAddr, - 0, 0, - 0, done, + r200EmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch | (t->tile_bits >> 16), + t->bufAddr, + 0, 0, + 0, done, width, lines ); r200EmitWait( rmesa, RADEON_WAIT_2D ); @@ -339,7 +338,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, imageWidth = texImage->Width; imageHeight = texImage->Height; - offset = t->bufAddr; + offset = t->bufAddr + t->base.totalSize / 6 * face; if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { GLint imageX = 0; @@ -363,19 +362,47 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, * We used to use 1, 2 and 4-byte texels and used to use the texture * width to dictate the blit width - but that won't work for compressed * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) */ tex.offset = offset; - tex.pitch = BLIT_WIDTH_BYTES / 64; - tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); + if (texImage->TexFormat->TexelBytes) { - tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */ + /* use multi-byte upload scheme */ tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK; + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & R200_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256) && + ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) || + (texImage->Height >= 16))) { + /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, + OR if height is smaller than 8 automatically, but if micro tiling is active + the limit is height 16 instead ? */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } } else { /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed so the kernel module reads the right amount of data. */ + tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); tex.height = (imageHeight + 3) / 4; tex.width = (imageWidth + 3) / 4; switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) { @@ -390,19 +417,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, fprintf(stderr, "unknown compressed tex format in uploadSubImage\n"); } } - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); - - /* Adjust the base offset to account for the Y-offset. This is done, - * instead of just letting the Y-offset automatically take care of it, - * because it is possible, for very large textures, for the Y-offset - * to exceede the [-8192,+8191] range. - */ - tex.offset += tmp.y * 1024; - tmp.y = 0; - LOCK_HARDWARE( rmesa ); do { ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, @@ -473,7 +488,11 @@ int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ) t->bufAddr = rmesa->r200Screen->texOffset[heap] + t->base.memBlock->ofs; t->pp_txoffset = t->bufAddr; - + + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } /* Mark this texobj as dirty on all units: */ diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 1e56c78f9b..3fba25d0b5 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -125,8 +125,8 @@ static void r200SetTexImages( r200ContextPtr rmesa, { r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i; + GLint curOffset, blitWidth; + GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; @@ -146,6 +146,7 @@ static void r200SetTexImages( r200ContextPtr rmesa, return; } + texelBytes = baseImage->TexFormat->TexelBytes; /* Compute which mipmap levels we really want to send to the hardware. */ @@ -164,6 +165,28 @@ static void r200SetTexImages( r200ContextPtr rmesa, * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes) { + if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) && + /* texrect might be able to use micro tiling too in theory? */ + (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + t->tile_bits |= R200_TXO_MICRO_TILE; + } + } + if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { + /* we can set macro tiling even for small textures, they will be untiled anyway */ + t->tile_bits |= R200_TXO_MACRO_TILE; + } + } for (i = 0; i < numLevels; i++) { const struct gl_texture_image *texImage; @@ -195,28 +218,41 @@ static void r200SetTexImages( r200ContextPtr rmesa, else size = texImage->CompressedSize; } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63) - & ~63) * texImage->Height; + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & R200_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } else { - int w = texImage->Width * texImage->TexFormat->TexelBytes; - if (w < 32) - w = 32; - size = w * texImage->Height * texImage->Depth; + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } assert(size > 0); - /* Align to 32-byte offset. It is faster to do this unconditionally * (no branch penalty). */ curOffset = (curOffset + 0x1f) & ~0x1f; - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } #if 0 /* for debugging only and only applicable to non-rectangle targets */ @@ -242,16 +278,13 @@ static void r200SetTexImages( r200ContextPtr rmesa, /* Setup remaining cube face blits, if needed */ if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */ - const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1) - & ~(BLIT_WIDTH_BYTES-1); - const GLuint lines = faceSize / BLIT_WIDTH_BYTES; + const GLuint faceSize = t->base.totalSize; GLuint face; - /* reuse face 0 x/y/width/height - just adjust y */ + /* reuse face 0 x/y/width/height - just update the offset when uploading */ for (face = 1; face < 6; face++) { for (i = 0; i < numLevels; i++) { t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y + face * lines; + t->image[face][i].y = t->image[0][i].y; t->image[face][i].width = t->image[0][i].width; t->image[face][i].height = t->image[0][i].height; } @@ -310,7 +343,7 @@ static void r200SetTexImages( r200ContextPtr rmesa, if (baseImage->IsCompressed) t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63); + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->pp_txpitch -= 32; t->dirty_state = TEX_ALL; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index 4229d5cb5e..5d7e28cf89 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -255,6 +255,9 @@ radeonCreateContext( const __GLcontextModes *glVisual, rmesa->using_hyperz = GL_TRUE; } + if ( sPriv->drmMinor >= 15 ) + rmesa->texmicrotile = GL_TRUE; + /* Init default driver functions then plug in our Radeon-specific functions * (the texture functions are especially important) */ @@ -445,6 +448,7 @@ radeonCreateContext( const __GLcontextModes *glVisual, } (*rmesa->get_ust)( & rmesa->swap_ust ); + if (rmesa->sarea->tiling_enabled != 0) fprintf(stderr, "color tiling enabled!\n"); #if DO_DEBUG RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ), diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 53860c12b8..8d0637ca32 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -162,6 +162,8 @@ struct radeon_tex_obj { GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ GLboolean border_fallback; + + GLuint tile_bits; /* hw texture tile bits used on this texture */ }; @@ -186,7 +188,7 @@ struct radeon_state_atom { GLboolean dirty; /* dirty-mark in emit_state_list */ GLboolean (*check)( GLcontext * ); /* is this state active? */ }; - + /* Trying to keep these relatively short as the variables are becoming @@ -781,6 +783,7 @@ struct radeon_context { driOptionCache optionCache; GLboolean using_hyperz; + GLboolean texmicrotile; /* Performance counters */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c index d910a6c15a..d492e190c1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texmem.c +++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c @@ -46,6 +46,8 @@ SOFTWARE. #include "radeon_ioctl.h" #include "radeon_tex.h" +#include /* for usleep() */ + /** * Destroy any device-dependent state associated with the texture. This may @@ -151,12 +153,12 @@ static void radeonUploadRectSubImage( radeonContextPtr rmesa, /* Blit to framebuffer */ - radeonEmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch, t->bufAddr, - 0, 0, - 0, done, + radeonEmitBlit( rmesa, + blit_format, + dstPitch, GET_START( ®ion ), + dstPitch, t->bufAddr, + 0, 0, + 0, done, width, lines ); radeonEmitWait( rmesa, RADEON_WAIT_2D ); @@ -248,19 +250,43 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, * We used to use 1, 2 and 4-byte texels and used to use the texture * width to dictate the blit width - but that won't work for compressed * textures. (Brian) + * NOTE: can't do that with texture tiling. (sroland) */ tex.offset = offset; - tex.pitch = BLIT_WIDTH_BYTES / 64; - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.image = &tmp; + /* copy (x,y,width,height,data) */ + memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); + if (texImage->TexFormat->TexelBytes) { - tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */ + /* use multi-byte upload scheme */ tex.height = imageHeight; + tex.width = imageWidth; + tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK; + tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); + tex.offset += tmp.x & ~1023; + tmp.x = tmp.x % 1024; + if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* need something like "tiled coordinates" ? */ + tmp.y = tmp.x / (tex.pitch * 128) * 2; + tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; + tex.pitch |= RADEON_DST_TILE_MICRO >> 22; + } + else { + tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); + } + if ((t->tile_bits & RADEON_TXO_MACRO_TILE) && + (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) { + /* radeon switches off macro tiling for small textures/mipmaps it seems */ + tex.pitch |= RADEON_DST_TILE_MACRO >> 22; + } } else { /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed so the kernel module reads the right amount of data. */ + tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ + tex.pitch = (BLIT_WIDTH_BYTES / 64); tex.height = (imageHeight + 3) / 4; tex.width = (imageWidth + 3) / 4; switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) { @@ -273,10 +299,6 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, break; } } - tex.image = &tmp; - - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); LOCK_HARDWARE( rmesa ); do { @@ -344,6 +366,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac + t->base.memBlock->ofs; t->pp_txoffset = t->bufAddr; + if (!(t->base.tObj->Image[0][0]->IsClientData)) { + /* hope it's safe to add that here... */ + t->pp_txoffset |= t->tile_bits; + } /* Mark this texobj as dirty on all units: */ diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 5e818da9fd..b96ad740d1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -127,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa, { radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i; + GLint curOffset, blitWidth; + GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; @@ -148,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa, return; } + texelBytes = baseImage->TexFormat->TexelBytes; /* Compute which mipmap levels we really want to send to the hardware. */ @@ -166,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa, * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ curOffset = 0; + blitWidth = BLIT_WIDTH_BYTES; + t->tile_bits = 0; + + /* figure out if this texture is suitable for tiling. */ + if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) { + if (rmesa->texmicrotile && (baseImage->Height > 1)) { + /* allow 32 (bytes) x 1 mip (which will use two times the space + the non-tiled version would use) max if base texture is large enough */ + if ((numLevels == 1) || + (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && + (baseImage->Width * texelBytes > 64)) || + ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { + /* R100 has two microtile bits (only the txoffset reg, not the blitter) + weird: X2 + OPT: 32bit correct, 16bit completely hosed + X2: 32bit correct, 16bit correct + OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */ + t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/; + } + } + if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) { + /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not + in the case if height is smaller than 16 (not 100% sure), as does the r200, + so need to disable macro tiling in that case */ + if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) { + t->tile_bits |= RADEON_TXO_MACRO_TILE; + } + } + } for (i = 0; i < numLevels; i++) { const struct gl_texture_image *texImage; @@ -197,28 +226,41 @@ static void radeonSetTexImages( radeonContextPtr rmesa, else size = texImage->CompressedSize; } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63) - & ~63) * texImage->Height; + size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; + } + else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } else { - int w = texImage->Width * texImage->TexFormat->TexelBytes; - if (w < 32) - w = 32; - size = w * texImage->Height * texImage->Depth; + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); } assert(size > 0); - /* Align to 32-byte offset. It is faster to do this unconditionally * (no branch penalty). */ curOffset = (curOffset + 0x1f) & ~0x1f; - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + if (texelBytes) { + t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ + t->image[0][i].y = 0; + t->image[0][i].width = MIN2(size / texelBytes, blitWidth); + t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; + } + else { + t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; + t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; + t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); + t->image[0][i].height = size / t->image[0][i].width; + } #if 0 /* for debugging only and only applicable to non-rectangle targets */ @@ -263,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa, if (baseImage->IsCompressed) t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63); + t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->pp_txpitch -= 32; t->dirty_state = TEX_ALL; -- cgit v1.2.3