summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe/lp_tile_image.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_tile_image.c')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.c296
1 files changed, 250 insertions, 46 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index c1980b316d..0852150ba7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -25,6 +25,14 @@
**************************************************************************/
+/**
+ * Code to convert images from tiled to linear and back.
+ * XXX there are quite a few assumptions about color and z/stencil being
+ * 32bpp.
+ */
+
+
+#include "util/u_format.h"
#include "lp_tile_soa.h"
#include "lp_tile_image.h"
@@ -33,33 +41,172 @@
/**
+ * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
+{
+ uint32_t *d0 = dst;
+ uint32_t *d1 = d0 + dst_stride;
+ uint32_t *d2 = d1 + dst_stride;
+ uint32_t *d3 = d2 + dst_stride;
+
+ d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
+ d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
+ d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
+ d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
+}
+
+
+
+/**
+ * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
+{
+ uint16_t *d0 = dst;
+ uint16_t *d1 = d0 + dst_stride;
+ uint16_t *d2 = d1 + dst_stride;
+ uint16_t *d3 = d2 + dst_stride;
+
+ d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
+ d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
+ d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
+ d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
+{
+ const uint32_t *s0 = src;
+ const uint32_t *s1 = s0 + src_stride;
+ const uint32_t *s2 = s1 + src_stride;
+ const uint32_t *s3 = s2 + src_stride;
+
+ dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
+ dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
+ dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
+ dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
+{
+ const uint16_t *s0 = src;
+ const uint16_t *s1 = s0 + src_stride;
+ const uint16_t *s2 = s1 + src_stride;
+ const uint16_t *s3 = s2 + src_stride;
+
+ dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
+ dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
+ dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
+ dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
+}
+
+
+
+/**
* Convert a tiled image into a linear image.
* \param src_stride source row stride in bytes (bytes per row of tiles)
* \param dst_stride dest row stride in bytes
*/
void
-lp_tiled_to_linear(const uint8_t *src,
- uint8_t *dst,
+lp_tiled_to_linear(const void *src, void *dst,
+ unsigned x, unsigned y,
unsigned width, unsigned height,
- enum pipe_format format,
- unsigned src_stride,
- unsigned dst_stride)
+ enum pipe_format format, unsigned dst_stride)
{
- const unsigned tiles_per_row = src_stride / BYTES_PER_TILE;
- unsigned i, j;
-
- for (j = 0; j < height; j += TILE_SIZE) {
- for (i = 0; i < width; i += TILE_SIZE) {
- unsigned tile_offset =
- ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE);
- unsigned byte_offset = tile_offset * BYTES_PER_TILE;
- const uint8_t *src_tile = src + byte_offset;
-
- lp_tile_write_4ub(format,
- src_tile,
- dst,
- dst_stride,
- i, j, TILE_SIZE, TILE_SIZE);
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+ /*assert(width % TILE_SIZE == 0);
+ assert(height % TILE_SIZE == 0);*/
+
+ /* Note that Z/stencil surfaces use a different tiling size than
+ * color surfaces.
+ */
+ if (util_format_is_depth_or_stencil(format)) {
+ const uint bpp = util_format_get_blocksize(format);
+ const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
+ const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+ const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
+
+ dst_stride /= bpp; /* convert from bytes to words */
+
+ if (bpp == 4) {
+ const uint32_t *src32 = (const uint32_t *) src;
+ uint32_t *dst32 = (uint32_t *) dst;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 32-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ uint dst_offset = jj * dst_stride + ii;
+ untile_4_4_uint32(src32 + src_offset,
+ dst32 + dst_offset,
+ dst_stride);
+ }
+ }
+ }
+ else {
+ const uint16_t *src16 = (const uint16_t *) src;
+ uint16_t *dst16 = (uint16_t *) dst;
+ uint i, j;
+
+ assert(bpp == 2);
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 16-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ uint dst_offset = jj * dst_stride + ii;
+ untile_4_4_uint16(src16 + src_offset,
+ dst16 + dst_offset,
+ dst_stride);
+ }
+ }
+ }
+ }
+ else {
+ /* color image */
+ const uint bpp = 4;
+ const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+ const uint bytes_per_tile = tile_w * tile_h * bpp;
+ const uint src_stride = dst_stride * tile_w;
+ const uint tiles_per_row = src_stride / bytes_per_tile;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ uint ii = i + x, jj = j + y;
+ uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+ uint byte_offset = tile_offset * bytes_per_tile;
+ const uint8_t *src_tile = (uint8_t *) src + byte_offset;
+
+ lp_tile_write_4ub(format,
+ src_tile,
+ dst, dst_stride,
+ ii, jj, tile_w, tile_h);
+ }
}
}
}
@@ -71,28 +218,85 @@ lp_tiled_to_linear(const uint8_t *src,
* \param dst_stride dest row stride in bytes (bytes per row of tiles)
*/
void
-lp_linear_to_tiled(const uint8_t *src,
- uint8_t *dst,
+lp_linear_to_tiled(const void *src, void *dst,
+ unsigned x, unsigned y,
unsigned width, unsigned height,
- enum pipe_format format,
- unsigned src_stride,
- unsigned dst_stride)
+ enum pipe_format format, unsigned src_stride)
{
- const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE;
- unsigned i, j;
-
- for (j = 0; j < height; j += TILE_SIZE) {
- for (i = 0; i < width; i += TILE_SIZE) {
- unsigned tile_offset =
- ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE);
- unsigned byte_offset = tile_offset * BYTES_PER_TILE;
- uint8_t *dst_tile = dst + byte_offset;
-
- lp_tile_read_4ub(format,
- dst_tile,
- src,
- src_stride,
- i, j, TILE_SIZE, TILE_SIZE);
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+ /*
+ assert(width % TILE_SIZE == 0);
+ assert(height % TILE_SIZE == 0);
+ */
+
+ if (util_format_is_depth_or_stencil(format)) {
+ const uint bpp = util_format_get_blocksize(format);
+ const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
+ const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+ const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
+
+ src_stride /= bpp; /* convert from bytes to words */
+
+ if (bpp == 4) {
+ const uint32_t *src32 = (const uint32_t *) src;
+ uint32_t *dst32 = (uint32_t *) dst;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 32-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = jj * src_stride + ii;
+ uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ tile_4_4_uint32(src32 + src_offset,
+ dst32 + dst_offset,
+ src_stride);
+ }
+ }
+ }
+ else {
+ const uint16_t *src16 = (const uint16_t *) src;
+ uint16_t *dst16 = (uint16_t *) dst;
+ uint i, j;
+
+ assert(bpp == 2);
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 16-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = jj * src_stride + ii;
+ uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ tile_4_4_uint16(src16 + src_offset,
+ dst16 + dst_offset,
+ src_stride);
+ }
+ }
+ }
+ }
+ else {
+ const uint bpp = 4;
+ const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+ const uint bytes_per_tile = tile_w * tile_h * bpp;
+ const uint dst_stride = src_stride * tile_w;
+ const uint tiles_per_row = dst_stride / bytes_per_tile;
+ uint i, j;
+
+ for (j = 0; j < height; j += TILE_SIZE) {
+ for (i = 0; i < width; i += TILE_SIZE) {
+ uint ii = i + x, jj = j + y;
+ uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+ uint byte_offset = tile_offset * bytes_per_tile;
+ uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
+
+ lp_tile_read_4ub(format,
+ dst_tile,
+ src, src_stride,
+ ii, jj, tile_w, tile_h);
+ }
}
}
}
@@ -102,7 +306,7 @@ lp_linear_to_tiled(const uint8_t *src,
* For testing only.
*/
void
-test_tiled_linear_conversion(uint8_t *data,
+test_tiled_linear_conversion(void *data,
enum pipe_format format,
unsigned width, unsigned height,
unsigned stride)
@@ -113,13 +317,13 @@ test_tiled_linear_conversion(uint8_t *data,
uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
- unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;
+ /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
- lp_linear_to_tiled(data, tiled, width, height, format,
- stride, tiled_stride);
+ lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
+ stride);
- lp_tiled_to_linear(tiled, data, width, height, format,
- tiled_stride, stride);
+ lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
+ stride);
free(tiled);
}