add texture micro and macro tiling to radeon/r200 driver. This can improve performance up to 15% in texture-intensive applications. Convert the driver to use the correct blit format and blit width instead of fixed blit format and blit width when uploading textures to make it work.

author: Roland Scheidegger <rscheidegger@gmx.ch> 2005-02-10 22:36:06 +0000
committer: Roland Scheidegger <rscheidegger@gmx.ch> 2005-02-10 22:36:06 +0000
commit: 4837ea30208d002bc36a836d2117f826d40c8bfa (patch)
tree: 4db5a234a5af7d7f02a42ed824b85e938066828d /src/mesa/drivers/dri/r200/r200_texstate.c
parent: 26d31591257d575362776972439f614948366dd1 (diff)
1 files changed, 53 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 1e56c78f9b..3fba25d0b5 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -125,8 +125,8 @@ static void r200SetTexImages( r200ContextPtr rmesa,
 {
    r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
    const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
-   GLint curOffset;
-   GLint i;
+   GLint curOffset, blitWidth;
+   GLint i, texelBytes;
    GLint numLevels;
    GLint log2Width, log2Height, log2Depth;
 
@@ -146,6 +146,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
       return;
    }
 
+   texelBytes = baseImage->TexFormat->TexelBytes;
 
    /* Compute which mipmap levels we really want to send to the hardware.
     */
@@ -164,6 +165,28 @@ static void r200SetTexImages( r200ContextPtr rmesa,
     * memory organized as a rectangle of width BLIT_WIDTH_BYTES.
     */
    curOffset = 0;
+   blitWidth = BLIT_WIDTH_BYTES;
+   t->tile_bits = 0;
+
+   /* figure out if this texture is suitable for tiling. */
+   if (texelBytes) {
+      if (rmesa->texmicrotile  && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+      /* texrect might be able to use micro tiling too in theory? */
+	 (baseImage->Height > 1)) {
+	 /* allow 32 (bytes) x 1 mip (which will use two times the space
+	 the non-tiled version would use) max if base texture is large enough */
+	 if ((numLevels == 1) ||
+	   (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+	       (baseImage->Width * texelBytes > 64)) ||
+	    ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+	    t->tile_bits |= R200_TXO_MICRO_TILE;
+	 }
+      }
+      if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+	 /* we can set macro tiling even for small textures, they will be untiled anyway */
+	 t->tile_bits |= R200_TXO_MACRO_TILE;
+      }
+   }
 
    for (i = 0; i < numLevels; i++) {
       const struct gl_texture_image *texImage;
@@ -195,28 +218,41 @@ static void r200SetTexImages( r200ContextPtr rmesa,
             else size = texImage->CompressedSize;
       }
       else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
-         size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
-                 & ~63) * texImage->Height;
+	 size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+      }
+      else if (t->tile_bits & R200_TXO_MICRO_TILE) {
+	 /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+	    though the actual offset may be different (if texture is less than
+	    32 bytes width) to the untiled case */
+	 int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+	 size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
       }
       else {
-         int w = texImage->Width * texImage->TexFormat->TexelBytes;
-         if (w < 32)
-            w = 32;
-         size = w * texImage->Height * texImage->Depth;
+	 int w = (texImage->Width * texelBytes + 31) & ~31;
+	 size = w * texImage->Height * texImage->Depth;
+	 blitWidth = MAX2(texImage->Width, 64 / texelBytes);
       }
       assert(size > 0);
 
-
       /* Align to 32-byte offset.  It is faster to do this unconditionally
        * (no branch penalty).
        */
 
       curOffset = (curOffset + 0x1f) & ~0x1f;
 
-      t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
-      t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
-      t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
-      t->image[0][i].height = size / t->image[0][i].width;
+      if (texelBytes) {
+	 t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+	 t->image[0][i].y = 0;
+	 t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+	 t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+      }
+      else {
+         t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+         t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+         t->image[0][i].width  = MIN2(size, BLIT_WIDTH_BYTES);
+         t->image[0][i].height = size / t->image[0][i].width;     
+      }
 
 #if 0
       /* for debugging only and only  applicable to non-rectangle targets */
@@ -242,16 +278,13 @@ static void r200SetTexImages( r200ContextPtr rmesa,
 
    /* Setup remaining cube face blits, if needed */
    if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */
-      const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1)
-                              & ~(BLIT_WIDTH_BYTES-1);
-      const GLuint lines = faceSize / BLIT_WIDTH_BYTES;
+      const GLuint faceSize = t->base.totalSize;
       GLuint face;
-      /* reuse face 0 x/y/width/height - just adjust y */
+      /* reuse face 0 x/y/width/height - just update the offset when uploading */
       for (face = 1; face < 6; face++) {
          for (i = 0; i < numLevels; i++) {
             t->image[face][i].x =  t->image[0][i].x;
-            t->image[face][i].y =  t->image[0][i].y + face * lines;
+            t->image[face][i].y =  t->image[0][i].y;
             t->image[face][i].width  = t->image[0][i].width;
             t->image[face][i].height = t->image[0][i].height;
          }
@@ -310,7 +343,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
    if (baseImage->IsCompressed)
       t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
    else
-      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+      t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
    t->pp_txpitch -= 32;
 
    t->dirty_state = TEX_ALL;
author	Roland Scheidegger <rscheidegger@gmx.ch>	2005-02-10 22:36:06 +0000
committer	Roland Scheidegger <rscheidegger@gmx.ch>	2005-02-10 22:36:06 +0000
commit	4837ea30208d002bc36a836d2117f826d40c8bfa (patch)
tree	4db5a234a5af7d7f02a42ed824b85e938066828d /src/mesa/drivers/dri/r200/r200_texstate.c
parent	26d31591257d575362776972439f614948366dd1 (diff)