diff options
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_tile_image.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_tile_image.c | 292 |
1 files changed, 247 insertions, 45 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c index c1980b316d..2b63992dd7 100644 --- a/src/gallium/drivers/llvmpipe/lp_tile_image.c +++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c @@ -25,6 +25,14 @@ **************************************************************************/ +/** + * Code to convert images from tiled to linear and back. + * XXX there are quite a few assumptions about color and z/stencil being + * 32bpp. + */ + + +#include "util/u_format.h" #include "lp_tile_soa.h" #include "lp_tile_image.h" @@ -33,33 +41,171 @@ /** + * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride) +{ + uint32_t *d0 = dst; + uint32_t *d1 = d0 + dst_stride; + uint32_t *d2 = d1 + dst_stride; + uint32_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout + * at dst, with dst_stride words between rows. + */ +static void +untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride) +{ + uint16_t *d0 = dst; + uint16_t *d1 = d0 + dst_stride; + uint16_t *d2 = d1 + dst_stride; + uint16_t *d3 = d2 + dst_stride; + + d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5]; + d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7]; + d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13]; + d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15]; +} + + + +/** + * Convert a 4x4 rect of 32-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride) +{ + const uint32_t *s0 = src; + const uint32_t *s1 = s0 + src_stride; + const uint32_t *s2 = s1 + src_stride; + const uint32_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + +/** + * Convert a 4x4 rect of 16-bit words from a linear layout into tiled + * layout (in which all 16 words are contiguous). + */ +static void +tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride) +{ + const uint16_t *s0 = src; + const uint16_t *s1 = s0 + src_stride; + const uint16_t *s2 = s1 + src_stride; + const uint16_t *s3 = s2 + src_stride; + + dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3]; + dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3]; + dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3]; + dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3]; +} + + + +/** * Convert a tiled image into a linear image. - * \param src_stride source row stride in bytes (bytes per row of tiles) * \param dst_stride dest row stride in bytes */ void -lp_tiled_to_linear(const uint8_t *src, - uint8_t *dst, +lp_tiled_to_linear(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, - unsigned src_stride, - unsigned dst_stride) + unsigned dst_stride, + unsigned tiles_per_row) { - const unsigned tiles_per_row = src_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - const uint8_t *src_tile = src + byte_offset; - - lp_tile_write_4ub(format, - src_tile, - dst, - dst_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /*assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0);*/ + + /* Note that Z/stencil surfaces use a different tiling size than + * color surfaces. + */ + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint src_stride = dst_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp); + + dst_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + dst_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + uint dst_offset = jj * dst_stride + ii; + untile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + dst_stride); + } + } + } + } + else { + /* color image */ + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + const uint8_t *src_tile = (uint8_t *) src + byte_offset; + + lp_tile_unswizzle_4ub(format, + src_tile, + dst, dst_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -68,31 +214,87 @@ lp_tiled_to_linear(const uint8_t *src, /** * Convert a linear image into a tiled image. * \param src_stride source row stride in bytes - * \param dst_stride dest row stride in bytes (bytes per row of tiles) */ void -lp_linear_to_tiled(const uint8_t *src, - uint8_t *dst, +lp_linear_to_tiled(const void *src, void *dst, + unsigned x, unsigned y, unsigned width, unsigned height, enum pipe_format format, unsigned src_stride, - unsigned dst_stride) + unsigned tiles_per_row) { - const unsigned tiles_per_row = dst_stride / BYTES_PER_TILE; - unsigned i, j; - - for (j = 0; j < height; j += TILE_SIZE) { - for (i = 0; i < width; i += TILE_SIZE) { - unsigned tile_offset = - ((j / TILE_SIZE) * tiles_per_row + i / TILE_SIZE); - unsigned byte_offset = tile_offset * BYTES_PER_TILE; - uint8_t *dst_tile = dst + byte_offset; - - lp_tile_read_4ub(format, - dst_tile, - src, - src_stride, - i, j, TILE_SIZE, TILE_SIZE); + assert(x % TILE_SIZE == 0); + assert(y % TILE_SIZE == 0); + /* + assert(width % TILE_SIZE == 0); + assert(height % TILE_SIZE == 0); + */ + + if (util_format_is_depth_or_stencil(format)) { + const uint bpp = util_format_get_blocksize(format); + const uint dst_stride = src_stride * TILE_VECTOR_WIDTH; + const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT; + const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp); + + src_stride /= bpp; /* convert from bytes to words */ + + if (bpp == 4) { + const uint32_t *src32 = (const uint32_t *) src; + uint32_t *dst32 = (uint32_t *) dst; + uint i, j; + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 32-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint32(src32 + src_offset, + dst32 + dst_offset, + src_stride); + } + } + } + else { + const uint16_t *src16 = (const uint16_t *) src; + uint16_t *dst16 = (uint16_t *) dst; + uint i, j; + + assert(bpp == 2); + + for (j = 0; j < height; j += tile_h) { + for (i = 0; i < width; i += tile_w) { + /* compute offsets in 16-bit words */ + uint ii = i + x, jj = j + y; + uint src_offset = jj * src_stride + ii; + uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w) + * (tile_w * tile_h); + tile_4_4_uint16(src16 + src_offset, + dst16 + dst_offset, + src_stride); + } + } + } + } + else { + const uint bpp = 4; + const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE; + const uint bytes_per_tile = tile_w * tile_h * bpp; + uint i, j; + + for (j = 0; j < height; j += TILE_SIZE) { + for (i = 0; i < width; i += TILE_SIZE) { + uint ii = i + x, jj = j + y; + uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w); + uint byte_offset = tile_offset * bytes_per_tile; + uint8_t *dst_tile = (uint8_t *) dst + byte_offset; + + lp_tile_swizzle_4ub(format, + dst_tile, + src, src_stride, + ii, jj, tile_w, tile_h); + } } } } @@ -102,7 +304,7 @@ lp_linear_to_tiled(const uint8_t *src, * For testing only. */ void -test_tiled_linear_conversion(uint8_t *data, +test_tiled_linear_conversion(void *data, enum pipe_format format, unsigned width, unsigned height, unsigned stride) @@ -113,13 +315,13 @@ test_tiled_linear_conversion(uint8_t *data, uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4); - unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4; + /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/ - lp_linear_to_tiled(data, tiled, width, height, format, - stride, tiled_stride); + lp_linear_to_tiled(data, tiled, 0, 0, width, height, format, + stride, wt); - lp_tiled_to_linear(tiled, data, width, height, format, - tiled_stride, stride); + lp_tiled_to_linear(tiled, data, 0, 0, width, height, format, + stride, wt); free(tiled); } |