diff options
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 97 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 300 |
3 files changed, 280 insertions, 126 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 3c4992b25e..259b1142e3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -124,6 +124,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** + * Compute the partial offset of a pixel block along an arbitrary axis. + * + * @param coord coordinate in pixels + * @param stride number of bytes between rows of successive pixel blocks + * @param block_length number of pixels in a pixels block along the coordinate + * axis + * @param out_offset resulting relative offset of the pixel block in bytes + * @param out_subcoord resulting sub-block pixel coordinate + */ +void +lp_build_sample_partial_offset(struct lp_build_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_subcoord) +{ + LLVMValueRef offset; + LLVMValueRef subcoord; + + if (block_length == 1) { + subcoord = bld->zero; + } + else { + /* + * Pixel blocks have power of two dimensions. LLVM should convert the + * rem/div to bit arithmetic. + * TODO: Verify this. + */ + + LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); + subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); + coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); + } + + offset = lp_build_mul(bld, coord, stride); + + assert(out_offset); + assert(out_subcoord); + + *out_offset = offset; + *out_subcoord = subcoord; +} + + +/** * Compute the offset of a pixel block. * * x, y, z, y_stride, z_stride are vectors, and they refer to pixels. @@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld, { LLVMValueRef x_stride; LLVMValueRef offset; - LLVMValueRef i; - LLVMValueRef j; - - /* - * Describe the coordinates in terms of pixel blocks. - * - * TODO: pixel blocks are power of two. LLVM should convert rem/div to - * bit arithmetic. Verify this. - */ - - if (format_desc->block.width == 1) { - i = bld->zero; - } - else { - LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width); - i = LLVMBuildURem(bld->builder, x, block_width, ""); - x = LLVMBuildUDiv(bld->builder, x, block_width, ""); - } - - if (format_desc->block.height == 1) { - j = bld->zero; - } - else { - LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height); - j = LLVMBuildURem(bld->builder, y, block_height, ""); - y = LLVMBuildUDiv(bld->builder, y, block_height, ""); - } x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); - offset = lp_build_mul(bld, x, x_stride); + + lp_build_sample_partial_offset(bld, + format_desc->block.width, + x, x_stride, + &offset, out_i); if (y && y_stride) { - LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + LLVMValueRef y_offset; + lp_build_sample_partial_offset(bld, + format_desc->block.height, + y, y_stride, + &y_offset, out_j); offset = lp_build_add(bld, offset, y_offset); } + else { + *out_j = bld->zero; + } if (z && z_stride) { - LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + LLVMValueRef z_offset; + LLVMValueRef k; + lp_build_sample_partial_offset(bld, + 1, /* pixel blocks are always 2D */ + z, z_stride, + &z_offset, &k); offset = lp_build_add(bld, offset, z_offset); } *out_offset = offset; - *out_i = i; - *out_j = j; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index aff7bb2a4d..caafc4eca0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -149,6 +149,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, void +lp_build_sample_partial_offset(struct lp_build_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i); + + +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index f6b6162f63..1f39d9c98b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -323,59 +323,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, /** - * Fetch the texels as <4n x i8> in AoS form. - */ -static LLVMValueRef -lp_build_sample_packed(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_array) -{ - LLVMValueRef offset, i, j; - LLVMValueRef data_ptr; - LLVMValueRef res; - - /* convert x,y,z coords to linear offset from start of texture, in bytes */ - lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, NULL, y_stride, NULL, - &offset, &i, &j); - - /* get pointer to mipmap level 0 data */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); - - if (util_format_is_rgba8_variant(bld->format_desc)) { - /* Just fetch the data directly without swizzling */ - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); - - res = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); - } - else { - struct lp_type type; - - assert(bld->texel_type.width == 32); - - memset(&type, 0, sizeof type); - type.width = 8; - type.length = bld->texel_type.length*4; - type.norm = TRUE; - - res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type, - data_ptr, offset, i, j); - } - - return res; -} - - -/** * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. */ static LLVMValueRef @@ -409,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, /** - * We only support a few wrap modes in lp_build_sample_wrap_int() at this time. + * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time. * Return whether the given mode is supported by that function. */ static boolean @@ -431,13 +378,18 @@ is_simple_wrap_mode(unsigned mode) * \param length the texture size along one dimension * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param i0 resulting sub-block pixel coordinate for coord0 */ -static LLVMValueRef -lp_build_sample_wrap_int(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - boolean is_pot, - unsigned wrap_mode) +static void +lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *out_offset, + LLVMValueRef *out_i) { struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; @@ -470,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld, assert(0); } - return coord; + lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + out_offset, out_i); +} + + +/** + * Build LLVM code for texture wrap mode, for scaled integer texcoords. + * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size + * \param length the texture size along one dimension + * \param stride pixel stride along the coordinate axis + * \param block_length is the length of the pixel block along the + * coordinate axis + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param offset0 resulting relative offset for coord0 + * \param offset1 resulting relative offset for coord0 + 1 + * \param i0 resulting sub-block pixel coordinate for coord0 + * \param i1 resulting sub-block pixel coordinate for coord0 + 1 + */ +static void +lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord0, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *offset0, + LLVMValueRef *offset1, + LLVMValueRef *i0, + LLVMValueRef *i1) +{ + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + LLVMValueRef lmask, umask, mask; + + if (block_length != 1) { + /* + * If the pixel block covers more than one pixel then there is no easy + * way to calculate offset1 relative to offset0. Instead, compute them + * independently. + */ + + LLVMValueRef coord1; + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord0, + length, + stride, + is_pot, + wrap_mode, + offset0, i0); + + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord1, + length, + stride, + is_pot, + wrap_mode, + offset1, i1); + + return; + } + + /* + * Scalar pixels -- try to compute offset0 and offset1 with a single stride + * multiplication. + */ + + *i0 = uint_coord_bld->zero; + *i1 = uint_coord_bld->zero; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if (is_pot) { + coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + } + else { + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + } + + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = LLVMBuildAnd(bld->builder, + lp_build_add(uint_coord_bld, *offset0, stride), + mask, ""); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); + umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_LESS, coord0, length_minus_one); + + coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); + coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); + + mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = lp_build_add(uint_coord_bld, + *offset0, + LLVMBuildAnd(bld->builder, stride, mask, "")); + break; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + *offset0 = uint_coord_bld->zero; + *offset1 = uint_coord_bld->zero; + break; + } } @@ -1741,14 +1820,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; - LLVMValueRef neighbors[2][2]; + LLVMValueRef data_ptr; + LLVMValueRef x_stride, y_stride; + LLVMValueRef x_offset0, x_offset1; + LLVMValueRef y_offset0, y_offset1; + LLVMValueRef offset[2][2]; + LLVMValueRef x_subcoord[2], y_subcoord[2]; LLVMValueRef neighbors_lo[2][2]; LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; - LLVMValueRef stride; + const unsigned level = 0; + unsigned i, j; assert(bld->static_state->target == PIPE_TEXTURE_2D || bld->static_state->target == PIPE_TEXTURE_RECT); @@ -1795,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); - x0 = s_ipart; - y0 = t_ipart; - - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); - y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); - - x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width, - bld->static_state->wrap_s); - y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height, - bld->static_state->wrap_t); - - x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width, - bld->static_state->wrap_s); - y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height, - bld->static_state->wrap_t); + x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + bld->format_desc->block.bits/8); + + y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level); + + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.width, + s_ipart, width, x_stride, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x_offset0, &x_offset1, + &x_subcoord[0], &x_subcoord[1]); + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.height, + t_ipart, height, y_stride, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y_offset0, &y_offset1, + &y_subcoord[0], &y_subcoord[1]); + + offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0); + offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0); + offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1); + offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1); /* * Transform 4 x i32 in @@ -1838,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; LLVMValueRef shuffle_hi; - unsigned i, j; for(j = 0; j < h16.type.length; j += 4) { #ifdef PIPE_ARCH_LITTLE_ENDIAN @@ -1866,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } - stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* + * get pointer to mipmap level 0 data + */ + data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level); /* * Fetch the pixels as 4 x 32bit (rgba order might differ): @@ -1885,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * The higher 8 bits of the resulting elements will be zero. */ - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); + for (j = 0; j < 2; ++j) { + for (i = 0; i < 2; ++i) { + LLVMValueRef rgba8; - neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); - neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); - neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); - neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* + * Given the format is a rgba8, just read the pixels as is, + * without any swizzling. Swizzling will be done later. + */ + rgba8 = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset[j][i]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); + + } + else { + rgba8 = lp_build_fetch_rgba_aos(bld->builder, + bld->format_desc, + u8n.type, + data_ptr, offset[j][i], + x_subcoord[i], + y_subcoord[j]); + } + + lp_build_unpack2(builder, u8n.type, h16.type, + rgba8, + &neighbors_lo[j][i], &neighbors_hi[j][i]); + } + } /* * Linear interpolate with 8.8 fixed point. |