summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/radeon
diff options
context:
space:
mode:
authorRoland Scheidegger <rscheidegger@gmx.ch>2005-02-10 22:36:06 +0000
committerRoland Scheidegger <rscheidegger@gmx.ch>2005-02-10 22:36:06 +0000
commit4837ea30208d002bc36a836d2117f826d40c8bfa (patch)
tree4db5a234a5af7d7f02a42ed824b85e938066828d /src/mesa/drivers/dri/radeon
parent26d31591257d575362776972439f614948366dd1 (diff)
add texture micro and macro tiling to radeon/r200 driver. This can improve performance up to 15% in texture-intensive applications. Convert the driver to use the correct blit format and blit width instead of fixed blit format and blit width when uploading textures to make it work.
Diffstat (limited to 'src/mesa/drivers/dri/radeon')
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.h5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texmem.c52
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c70
4 files changed, 103 insertions, 28 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 4229d5cb5e..5d7e28cf89 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -255,6 +255,9 @@ radeonCreateContext( const __GLcontextModes *glVisual,
rmesa->using_hyperz = GL_TRUE;
}
+ if ( sPriv->drmMinor >= 15 )
+ rmesa->texmicrotile = GL_TRUE;
+
/* Init default driver functions then plug in our Radeon-specific functions
* (the texture functions are especially important)
*/
@@ -445,6 +448,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
}
(*rmesa->get_ust)( & rmesa->swap_ust );
+ if (rmesa->sarea->tiling_enabled != 0) fprintf(stderr, "color tiling enabled!\n");
#if DO_DEBUG
RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index 53860c12b8..8d0637ca32 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -162,6 +162,8 @@ struct radeon_tex_obj {
GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
GLboolean border_fallback;
+
+ GLuint tile_bits; /* hw texture tile bits used on this texture */
};
@@ -186,7 +188,7 @@ struct radeon_state_atom {
GLboolean dirty; /* dirty-mark in emit_state_list */
GLboolean (*check)( GLcontext * ); /* is this state active? */
};
-
+
/* Trying to keep these relatively short as the variables are becoming
@@ -781,6 +783,7 @@ struct radeon_context {
driOptionCache optionCache;
GLboolean using_hyperz;
+ GLboolean texmicrotile;
/* Performance counters
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
index d910a6c15a..d492e190c1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c
@@ -46,6 +46,8 @@ SOFTWARE.
#include "radeon_ioctl.h"
#include "radeon_tex.h"
+#include <unistd.h> /* for usleep() */
+
/**
* Destroy any device-dependent state associated with the texture. This may
@@ -151,12 +153,12 @@ static void radeonUploadRectSubImage( radeonContextPtr rmesa,
/* Blit to framebuffer
*/
- radeonEmitBlit( rmesa,
- blit_format,
- dstPitch, GET_START( &region ),
- dstPitch, t->bufAddr,
- 0, 0,
- 0, done,
+ radeonEmitBlit( rmesa,
+ blit_format,
+ dstPitch, GET_START( &region ),
+ dstPitch, t->bufAddr,
+ 0, 0,
+ 0, done,
width, lines );
radeonEmitWait( rmesa, RADEON_WAIT_2D );
@@ -248,19 +250,43 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
* We used to use 1, 2 and 4-byte texels and used to use the texture
* width to dictate the blit width - but that won't work for compressed
* textures. (Brian)
+ * NOTE: can't do that with texture tiling. (sroland)
*/
tex.offset = offset;
- tex.pitch = BLIT_WIDTH_BYTES / 64;
- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.image = &tmp;
+ /* copy (x,y,width,height,data) */
+ memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
+
if (texImage->TexFormat->TexelBytes) {
- tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+ /* use multi-byte upload scheme */
tex.height = imageHeight;
+ tex.width = imageWidth;
+ tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+ tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+ tex.offset += tmp.x & ~1023;
+ tmp.x = tmp.x % 1024;
+ if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+ /* need something like "tiled coordinates" ? */
+ tmp.y = tmp.x / (tex.pitch * 128) * 2;
+ tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+ tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+ }
+ else {
+ tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+ }
+ if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
+ (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
+ /* radeon switches off macro tiling for small textures/mipmaps it seems */
+ tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+ }
}
else {
/* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
/* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
so the kernel module reads the right amount of data. */
+ tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.pitch = (BLIT_WIDTH_BYTES / 64);
tex.height = (imageHeight + 3) / 4;
tex.width = (imageWidth + 3) / 4;
switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
@@ -273,10 +299,6 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
break;
}
}
- tex.image = &tmp;
-
- /* copy (x,y,width,height,data) */
- memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
LOCK_HARDWARE( rmesa );
do {
@@ -344,6 +366,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac
+ t->base.memBlock->ofs;
t->pp_txoffset = t->bufAddr;
+ if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+ /* hope it's safe to add that here... */
+ t->pp_txoffset |= t->tile_bits;
+ }
/* Mark this texobj as dirty on all units:
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 5e818da9fd..b96ad740d1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -127,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
{
radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset;
- GLint i;
+ GLint curOffset, blitWidth;
+ GLint i, texelBytes;
GLint numLevels;
GLint log2Width, log2Height, log2Depth;
@@ -148,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
return;
}
+ texelBytes = baseImage->TexFormat->TexelBytes;
/* Compute which mipmap levels we really want to send to the hardware.
*/
@@ -166,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
*/
curOffset = 0;
+ blitWidth = BLIT_WIDTH_BYTES;
+ t->tile_bits = 0;
+
+ /* figure out if this texture is suitable for tiling. */
+ if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+ if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+ /* allow 32 (bytes) x 1 mip (which will use two times the space
+ the non-tiled version would use) max if base texture is large enough */
+ if ((numLevels == 1) ||
+ (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+ (baseImage->Width * texelBytes > 64)) ||
+ ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+ /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+ weird: X2 + OPT: 32bit correct, 16bit completely hosed
+ X2: 32bit correct, 16bit correct
+ OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+ t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+ }
+ }
+ if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+ /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+ in the case if height is smaller than 16 (not 100% sure), as does the r200,
+ so need to disable macro tiling in that case */
+ if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+ t->tile_bits |= RADEON_TXO_MACRO_TILE;
+ }
+ }
+ }
for (i = 0; i < numLevels; i++) {
const struct gl_texture_image *texImage;
@@ -197,28 +226,41 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
else size = texImage->CompressedSize;
}
else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
- & ~63) * texImage->Height;
+ size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+ }
+ else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
else {
- int w = texImage->Width * texImage->TexFormat->TexelBytes;
- if (w < 32)
- w = 32;
- size = w * texImage->Height * texImage->Depth;
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
assert(size > 0);
-
/* Align to 32-byte offset. It is faster to do this unconditionally
* (no branch penalty).
*/
curOffset = (curOffset + 0x1f) & ~0x1f;
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
+ if (texelBytes) {
+ t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+ t->image[0][i].y = 0;
+ t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+ t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+ }
+ else {
+ t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+ t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+ t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+ t->image[0][i].height = size / t->image[0][i].width;
+ }
#if 0
/* for debugging only and only applicable to non-rectangle targets */
@@ -263,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
if (baseImage->IsCompressed)
t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+ t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
t->pp_txpitch -= 32;
t->dirty_state = TEX_ALL;