1 files changed, 328 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
new file mode 100644
index 0000000000..2b63992dd7
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -0,0 +1,328 @@
+/**************************************************************************
+ * 
+ * Copyright 2010 VMware, Inc.  All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+/**
+ * Code to convert images from tiled to linear and back.
+ * XXX there are quite a few assumptions about color and z/stencil being
+ * 32bpp.
+ */
+
+
+#include "util/u_format.h"
+#include "lp_tile_soa.h"
+#include "lp_tile_image.h"
+
+
+#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
+
+
+/**
+ * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
+{
+   uint32_t *d0 = dst;
+   uint32_t *d1 = d0 + dst_stride;
+   uint32_t *d2 = d1 + dst_stride;
+   uint32_t *d3 = d2 + dst_stride;
+
+   d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
+   d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
+   d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
+   d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
+}
+
+
+
+/**
+ * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
+{
+   uint16_t *d0 = dst;
+   uint16_t *d1 = d0 + dst_stride;
+   uint16_t *d2 = d1 + dst_stride;
+   uint16_t *d3 = d2 + dst_stride;
+
+   d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
+   d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
+   d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
+   d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
+{
+   const uint32_t *s0 = src;
+   const uint32_t *s1 = s0 + src_stride;
+   const uint32_t *s2 = s1 + src_stride;
+   const uint32_t *s3 = s2 + src_stride;
+
+   dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
+   dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
+   dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
+   dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
+{
+   const uint16_t *s0 = src;
+   const uint16_t *s1 = s0 + src_stride;
+   const uint16_t *s2 = s1 + src_stride;
+   const uint16_t *s3 = s2 + src_stride;
+
+   dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
+   dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
+   dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
+   dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
+}
+
+
+
+/**
+ * Convert a tiled image into a linear image.
+ * \param dst_stride  dest row stride in bytes
+ */
+void
+lp_tiled_to_linear(const void *src, void *dst,
+                   unsigned x, unsigned y,
+                   unsigned width, unsigned height,
+                   enum pipe_format format,
+                   unsigned dst_stride,
+                   unsigned tiles_per_row)
+{
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+   /*assert(width % TILE_SIZE == 0);
+     assert(height % TILE_SIZE == 0);*/
+
+   /* Note that Z/stencil surfaces use a different tiling size than
+    * color surfaces.
+    */
+   if (util_format_is_depth_or_stencil(format)) {
+      const uint bpp = util_format_get_blocksize(format);
+      const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
+      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+      const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
+
+      dst_stride /= bpp;   /* convert from bytes to words */
+
+      if (bpp == 4) {
+         const uint32_t *src32 = (const uint32_t *) src;
+         uint32_t *dst32 = (uint32_t *) dst;
+         uint i, j;
+
+         for (j = 0; j < height; j += tile_h) {
+            for (i = 0; i < width; i += tile_w) {
+               /* compute offsets in 32-bit words */
+               uint ii = i + x, jj = j + y;
+               uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+                  * (tile_w * tile_h);
+               uint dst_offset = jj * dst_stride + ii;
+               untile_4_4_uint32(src32 + src_offset,
+                                 dst32 + dst_offset,
+                                 dst_stride);
+            }
+         }
+      }
+      else {
+         const uint16_t *src16 = (const uint16_t *) src;
+         uint16_t *dst16 = (uint16_t *) dst;
+         uint i, j;
+
+         assert(bpp == 2);
+
+         for (j = 0; j < height; j += tile_h) {
+            for (i = 0; i < width; i += tile_w) {
+               /* compute offsets in 16-bit words */
+               uint ii = i + x, jj = j + y;
+               uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+                  * (tile_w * tile_h);
+               uint dst_offset = jj * dst_stride + ii;
+               untile_4_4_uint16(src16 + src_offset,
+                                 dst16 + dst_offset,
+                                 dst_stride);
+            }
+         }
+      }
+   }
+   else {
+      /* color image */
+      const uint bpp = 4;
+      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+      const uint bytes_per_tile = tile_w * tile_h * bpp;
+      uint i, j;
+
+      for (j = 0; j < height; j += tile_h) {
+         for (i = 0; i < width; i += tile_w) {
+            uint ii = i + x, jj = j + y;
+            uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+            uint byte_offset = tile_offset * bytes_per_tile;
+            const uint8_t *src_tile = (uint8_t *) src + byte_offset;
+
+            lp_tile_unswizzle_4ub(format,
+                              src_tile,
+                              dst, dst_stride,
+                              ii, jj, tile_w, tile_h);
+         }
+      }
+   }
+}
+
+
+/**
+ * Convert a linear image into a tiled image.
+ * \param src_stride  source row stride in bytes
+ */
+void
+lp_linear_to_tiled(const void *src, void *dst,
+                   unsigned x, unsigned y,
+                   unsigned width, unsigned height,
+                   enum pipe_format format,
+                   unsigned src_stride,
+                   unsigned tiles_per_row)
+{
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+   /*
+   assert(width % TILE_SIZE == 0);
+   assert(height % TILE_SIZE == 0);
+   */
+
+   if (util_format_is_depth_or_stencil(format)) {
+      const uint bpp = util_format_get_blocksize(format);
+      const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
+      const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+      const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
+
+      src_stride /= bpp;   /* convert from bytes to words */
+
+      if (bpp == 4) {
+         const uint32_t *src32 = (const uint32_t *) src;
+         uint32_t *dst32 = (uint32_t *) dst;
+         uint i, j;
+
+         for (j = 0; j < height; j += tile_h) {
+            for (i = 0; i < width; i += tile_w) {
+               /* compute offsets in 32-bit words */
+               uint ii = i + x, jj = j + y;
+               uint src_offset = jj * src_stride + ii;
+               uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+                  * (tile_w * tile_h);
+               tile_4_4_uint32(src32 + src_offset,
+                               dst32 + dst_offset,
+                               src_stride);
+            }
+         }
+      }
+      else {
+         const uint16_t *src16 = (const uint16_t *) src;
+         uint16_t *dst16 = (uint16_t *) dst;
+         uint i, j;
+
+         assert(bpp == 2);
+
+         for (j = 0; j < height; j += tile_h) {
+            for (i = 0; i < width; i += tile_w) {
+               /* compute offsets in 16-bit words */
+               uint ii = i + x, jj = j + y;
+               uint src_offset = jj * src_stride + ii;
+               uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+                  * (tile_w * tile_h);
+               tile_4_4_uint16(src16 + src_offset,
+                               dst16 + dst_offset,
+                               src_stride);
+            }
+         }
+      }
+   }
+   else {
+      const uint bpp = 4;
+      const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+      const uint bytes_per_tile = tile_w * tile_h * bpp;
+      uint i, j;
+
+      for (j = 0; j < height; j += TILE_SIZE) {
+         for (i = 0; i < width; i += TILE_SIZE) {
+            uint ii = i + x, jj = j + y;
+            uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+            uint byte_offset = tile_offset * bytes_per_tile;
+            uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
+
+            lp_tile_swizzle_4ub(format,
+                             dst_tile,
+                             src, src_stride,
+                             ii, jj, tile_w, tile_h);
+         }
+      }
+   }
+}
+
+
+/**
+ * For testing only.
+ */
+void
+test_tiled_linear_conversion(void *data,
+                             enum pipe_format format,
+                             unsigned width, unsigned height,
+                             unsigned stride)
+{
+   /* size in tiles */
+   unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
+   unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
+
+   uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
+
+   /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
+
+   lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
+                      stride, wt);
+
+   lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
+                      stride, wt);
+
+   free(tiled);
+}
+