diff options
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_arit.c | 48 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_debug.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 14 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.h | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 103 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.h | 11 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 307 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_type.h | 8 |
10 files changed, 342 insertions, 165 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 7b35dd4bb4..e0d30be98d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -59,14 +59,6 @@ #include "lp_bld_arit.h" -/* - * XXX: Increasing eliminates some artifacts, but adds others, most - * noticeably corruption in the Earth halo in Google Earth. - */ -#define RCP_NEWTON_STEPS 0 - -#define RSQRT_NEWTON_STEPS 0 - #define EXP_POLY_DEGREE 3 #define LOG_POLY_DEGREE 5 @@ -267,7 +259,7 @@ lp_build_add(struct lp_build_context *bld, } -/** Return the sum of the elements of a */ +/** Return the scalar sum of the elements of a */ LLVMValueRef lp_build_sum_vector(struct lp_build_context *bld, LLVMValueRef a) @@ -278,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld, assert(lp_check_value(type, a)); - if (a == bld->zero) - return bld->zero; - if (a == bld->undef) - return bld->undef; - assert(type.length > 1); + if (type.length == 1) { + return a; + } assert(!bld->type.norm); @@ -546,7 +536,7 @@ lp_build_mul_imm(struct lp_build_context *bld, if(b == 2 && bld->type.floating) return lp_build_add(bld, a, a); - if(util_is_pot(b)) { + if(util_is_power_of_two(b)) { unsigned shift = ffs(b) - 1; if(bld->type.floating) { @@ -1266,6 +1256,11 @@ lp_build_sqrt(struct lp_build_context *bld, * * x_{i+1} = x_i * (2 - a * x_i) * + * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or + * +/-Inf, giving NaN instead. Certain applications rely on this behavior, + * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's + * halo. It would be necessary to clamp the argument to prevent this. + * * See also: * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division * - http://softwarecommunity.intel.com/articles/eng/1818.htm @@ -1306,13 +1301,27 @@ lp_build_rcp(struct lp_build_context *bld, if(LLVMIsConstant(a)) return LLVMConstFDiv(bld->one, a); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + /* + * We don't use RCPPS because: + * - it only has 10bits of precision + * - it doesn't even get the reciprocate of 1.0 exactly + * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf + * - for recent processors the benefit over DIVPS is marginal, a case + * depedent + * + * We could still use it on certain processors if benchmarks show that the + * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for + * particular uses that require less workarounds. + */ + + if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + const unsigned num_iterations = 0; LLVMValueRef res; unsigned i; res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a); - for (i = 0; i < RCP_NEWTON_STEPS; ++i) { + for (i = 0; i < num_iterations; ++i) { res = lp_build_rcp_refine(bld, a, res); } @@ -1363,13 +1372,14 @@ lp_build_rsqrt(struct lp_build_context *bld, assert(type.floating); - if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) { + const unsigned num_iterations = 0; LLVMValueRef res; unsigned i; res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a); - for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) { + for (i = 0; i < num_iterations; ++i) { res = lp_build_rsqrt_refine(bld, a, res); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c index 39dfc51e50..d3a5afff8c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c @@ -46,7 +46,7 @@ boolean lp_check_alignment(const void *ptr, unsigned alignment) { - assert(util_is_pot(alignment)); + assert(util_is_power_of_two(alignment)); return ((uintptr_t)ptr & (alignment - 1)) == 0; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 247cb83ce6..92123e09d3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && - util_is_pot(format_desc->block.bits)) { + util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* @@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && - util_is_pot(format_desc->block.bits) && + util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 6d5410d970..48baf7c425 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -40,6 +40,7 @@ #include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/JITEventListener.h> #include <llvm/Support/CommandLine.h> +#include <llvm/Support/PrettyStackTrace.h> #include "pipe/p_config.h" #include "util/u_debug.h" @@ -143,7 +144,6 @@ lp_set_target_options(void) llvm::UnsafeFPMath = true; #endif -#if 0 /* * LLVM will generate MMX instructions for vectors <= 64 bits, leading to * innefficient code, and in 32bit systems, to the corruption of the FPU @@ -152,10 +152,8 @@ lp_set_target_options(void) * See also: * - http://llvm.org/bugs/show_bug.cgi?id=3287 * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/ - * - * XXX: Unfortunately this is not working. */ - static boolean first = FALSE; + static boolean first = TRUE; if (first) { static const char* options[] = { "prog", @@ -164,7 +162,13 @@ lp_set_target_options(void) llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options)); first = FALSE; } -#endif + + /* + * By default LLVM adds a signal handler to output a pretty stack trace. + * This signal handler is never removed, causing problems when unloading the + * shared object where the gallium driver resides. + */ + llvm::DisablePrettyStackTrace = true; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index e470082b97..e947b90d16 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -37,6 +37,8 @@ #define LP_BLD_PACK_H +#include "pipe/p_compiler.h" + #include "gallivm/lp_bld.h" diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 0fd014ab9b..259b1142e3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, state->swizzle_a = view->swizzle_a; state->target = texture->target; - state->pot_width = util_is_pot(texture->width0); - state->pot_height = util_is_pot(texture->height0); - state->pot_depth = util_is_pot(texture->depth0); + state->pot_width = util_is_power_of_two(texture->width0); + state->pot_height = util_is_power_of_two(texture->height0); + state->pot_depth = util_is_power_of_two(texture->depth0); state->wrap_s = sampler->wrap_s; state->wrap_t = sampler->wrap_t; @@ -124,6 +124,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, /** + * Compute the partial offset of a pixel block along an arbitrary axis. + * + * @param coord coordinate in pixels + * @param stride number of bytes between rows of successive pixel blocks + * @param block_length number of pixels in a pixels block along the coordinate + * axis + * @param out_offset resulting relative offset of the pixel block in bytes + * @param out_subcoord resulting sub-block pixel coordinate + */ +void +lp_build_sample_partial_offset(struct lp_build_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_subcoord) +{ + LLVMValueRef offset; + LLVMValueRef subcoord; + + if (block_length == 1) { + subcoord = bld->zero; + } + else { + /* + * Pixel blocks have power of two dimensions. LLVM should convert the + * rem/div to bit arithmetic. + * TODO: Verify this. + */ + + LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length); + subcoord = LLVMBuildURem(bld->builder, coord, block_width, ""); + coord = LLVMBuildUDiv(bld->builder, coord, block_width, ""); + } + + offset = lp_build_mul(bld, coord, stride); + + assert(out_offset); + assert(out_subcoord); + + *out_offset = offset; + *out_subcoord = subcoord; +} + + +/** * Compute the offset of a pixel block. * * x, y, z, y_stride, z_stride are vectors, and they refer to pixels. @@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld, { LLVMValueRef x_stride; LLVMValueRef offset; - LLVMValueRef i; - LLVMValueRef j; - - /* - * Describe the coordinates in terms of pixel blocks. - * - * TODO: pixel blocks are power of two. LLVM should convert rem/div to - * bit arithmetic. Verify this. - */ - - if (format_desc->block.width == 1) { - i = bld->zero; - } - else { - LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width); - i = LLVMBuildURem(bld->builder, x, block_width, ""); - x = LLVMBuildUDiv(bld->builder, x, block_width, ""); - } - - if (format_desc->block.height == 1) { - j = bld->zero; - } - else { - LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height); - j = LLVMBuildURem(bld->builder, y, block_height, ""); - y = LLVMBuildUDiv(bld->builder, y, block_height, ""); - } x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8); - offset = lp_build_mul(bld, x, x_stride); + + lp_build_sample_partial_offset(bld, + format_desc->block.width, + x, x_stride, + &offset, out_i); if (y && y_stride) { - LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride); + LLVMValueRef y_offset; + lp_build_sample_partial_offset(bld, + format_desc->block.height, + y, y_stride, + &y_offset, out_j); offset = lp_build_add(bld, offset, y_offset); } + else { + *out_j = bld->zero; + } if (z && z_stride) { - LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride); + LLVMValueRef z_offset; + LLVMValueRef k; + lp_build_sample_partial_offset(bld, + 1, /* pixel blocks are always 2D */ + z, z_stride, + &z_offset, &k); offset = lp_build_add(bld, offset, z_offset); } *out_offset = offset; - *out_i = i; - *out_j = j; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index 5b8f478094..caafc4eca0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -36,6 +36,8 @@ #define LP_BLD_SAMPLE_H +#include "pipe/p_format.h" + #include "gallivm/lp_bld.h" struct pipe_resource; @@ -147,6 +149,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state, void +lp_build_sample_partial_offset(struct lp_build_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef stride, + LLVMValueRef *out_offset, + LLVMValueRef *out_i); + + +void lp_build_sample_offset(struct lp_build_context *bld, const struct util_format_description *format_desc, LLVMValueRef x, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 806c7d56a8..1f39d9c98b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex) case PIPE_TEXTURE_1D: return 1; case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_CUBE: return 2; case PIPE_TEXTURE_3D: @@ -322,59 +323,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld, /** - * Fetch the texels as <4n x i8> in AoS form. - */ -static LLVMValueRef -lp_build_sample_packed(struct lp_build_sample_context *bld, - LLVMValueRef x, - LLVMValueRef y, - LLVMValueRef y_stride, - LLVMValueRef data_array) -{ - LLVMValueRef offset, i, j; - LLVMValueRef data_ptr; - LLVMValueRef res; - - /* convert x,y,z coords to linear offset from start of texture, in bytes */ - lp_build_sample_offset(&bld->uint_coord_bld, - bld->format_desc, - x, y, NULL, y_stride, NULL, - &offset, &i, &j); - - /* get pointer to mipmap level 0 data */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0); - - if (util_format_is_rgba8_variant(bld->format_desc)) { - /* Just fetch the data directly without swizzling */ - assert(bld->format_desc->block.width == 1); - assert(bld->format_desc->block.height == 1); - assert(bld->format_desc->block.bits <= bld->texel_type.width); - - res = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset); - } - else { - struct lp_type type; - - assert(bld->texel_type.width == 32); - - memset(&type, 0, sizeof type); - type.width = 8; - type.length = bld->texel_type.length*4; - type.norm = TRUE; - - res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type, - data_ptr, offset, i, j); - } - - return res; -} - - -/** * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. */ static LLVMValueRef @@ -408,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, /** - * We only support a few wrap modes in lp_build_sample_wrap_int() at this time. + * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time. * Return whether the given mode is supported by that function. */ static boolean @@ -430,13 +378,18 @@ is_simple_wrap_mode(unsigned mode) * \param length the texture size along one dimension * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param i0 resulting sub-block pixel coordinate for coord0 */ -static LLVMValueRef -lp_build_sample_wrap_int(struct lp_build_sample_context *bld, - LLVMValueRef coord, - LLVMValueRef length, - boolean is_pot, - unsigned wrap_mode) +static void +lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *out_offset, + LLVMValueRef *out_i) { struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; @@ -469,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld, assert(0); } - return coord; + lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + out_offset, out_i); +} + + +/** + * Build LLVM code for texture wrap mode, for scaled integer texcoords. + * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size + * \param length the texture size along one dimension + * \param stride pixel stride along the coordinate axis + * \param block_length is the length of the pixel block along the + * coordinate axis + * \param is_pot if TRUE, length is a power of two + * \param wrap_mode one of PIPE_TEX_WRAP_x + * \param offset0 resulting relative offset for coord0 + * \param offset1 resulting relative offset for coord0 + 1 + * \param i0 resulting sub-block pixel coordinate for coord0 + * \param i1 resulting sub-block pixel coordinate for coord0 + 1 + */ +static void +lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, + unsigned block_length, + LLVMValueRef coord0, + LLVMValueRef length, + LLVMValueRef stride, + boolean is_pot, + unsigned wrap_mode, + LLVMValueRef *offset0, + LLVMValueRef *offset1, + LLVMValueRef *i0, + LLVMValueRef *i1) +{ + struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; + struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMValueRef length_minus_one; + LLVMValueRef lmask, umask, mask; + + if (block_length != 1) { + /* + * If the pixel block covers more than one pixel then there is no easy + * way to calculate offset1 relative to offset0. Instead, compute them + * independently. + */ + + LLVMValueRef coord1; + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord0, + length, + stride, + is_pot, + wrap_mode, + offset0, i0); + + coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); + + lp_build_sample_wrap_nearest_int(bld, + block_length, + coord1, + length, + stride, + is_pot, + wrap_mode, + offset1, i1); + + return; + } + + /* + * Scalar pixels -- try to compute offset0 and offset1 with a single stride + * multiplication. + */ + + *i0 = uint_coord_bld->zero; + *i1 = uint_coord_bld->zero; + + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); + + switch(wrap_mode) { + case PIPE_TEX_WRAP_REPEAT: + if (is_pot) { + coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + } + else { + /* Signed remainder won't give the right results for negative + * dividends but unsigned remainder does.*/ + coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + } + + mask = lp_build_compare(bld->builder, int_coord_bld->type, + PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = LLVMBuildAnd(bld->builder, + lp_build_add(uint_coord_bld, *offset0, stride), + mask, ""); + break; + + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); + umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + PIPE_FUNC_LESS, coord0, length_minus_one); + + coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); + coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); + + mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); + + *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); + *offset1 = lp_build_add(uint_coord_bld, + *offset0, + LLVMBuildAnd(bld->builder, stride, mask, "")); + break; + + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + default: + assert(0); + *offset0 = uint_coord_bld->zero; + *offset1 = uint_coord_bld->zero; + break; + } } @@ -1740,16 +1820,21 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef i32_c8, i32_c128, i32_c255; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef x0, x1; - LLVMValueRef y0, y1; - LLVMValueRef neighbors[2][2]; + LLVMValueRef data_ptr; + LLVMValueRef x_stride, y_stride; + LLVMValueRef x_offset0, x_offset1; + LLVMValueRef y_offset0, y_offset1; + LLVMValueRef offset[2][2]; + LLVMValueRef x_subcoord[2], y_subcoord[2]; LLVMValueRef neighbors_lo[2][2]; LLVMValueRef neighbors_hi[2][2]; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; - LLVMValueRef stride; + const unsigned level = 0; + unsigned i, j; - assert(bld->static_state->target == PIPE_TEXTURE_2D); + assert(bld->static_state->target == PIPE_TEXTURE_2D + || bld->static_state->target == PIPE_TEXTURE_RECT); assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); @@ -1793,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); - x0 = s_ipart; - y0 = t_ipart; - - x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one); - y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one); - - x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width, - bld->static_state->wrap_s); - y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height, - bld->static_state->wrap_t); - - x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width, - bld->static_state->wrap_s); - y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height, - bld->static_state->wrap_t); + x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + bld->format_desc->block.bits/8); + + y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level); + + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.width, + s_ipart, width, x_stride, + bld->static_state->pot_width, + bld->static_state->wrap_s, + &x_offset0, &x_offset1, + &x_subcoord[0], &x_subcoord[1]); + lp_build_sample_wrap_linear_int(bld, + bld->format_desc->block.height, + t_ipart, height, y_stride, + bld->static_state->pot_height, + bld->static_state->wrap_t, + &y_offset0, &y_offset1, + &y_subcoord[0], &y_subcoord[1]); + + offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0); + offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0); + offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1); + offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1); /* * Transform 4 x i32 in @@ -1836,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; LLVMValueRef shuffle_hi; - unsigned i, j; for(j = 0; j < h16.type.length; j += 4) { #ifdef PIPE_ARCH_LITTLE_ENDIAN @@ -1864,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); } - stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0); + /* + * get pointer to mipmap level 0 data + */ + data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level); /* * Fetch the pixels as 4 x 32bit (rgba order might differ): @@ -1883,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, * The higher 8 bits of the resulting elements will be zero. */ - neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array); - neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array); - neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array); - neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array); + for (j = 0; j < 2; ++j) { + for (i = 0; i < 2; ++i) { + LLVMValueRef rgba8; - neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, ""); - neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, ""); - neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, ""); - neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, ""); + if (util_format_is_rgba8_variant(bld->format_desc)) { + /* + * Given the format is a rgba8, just read the pixels as is, + * without any swizzling. Swizzling will be done later. + */ + rgba8 = lp_build_gather(bld->builder, + bld->texel_type.length, + bld->format_desc->block.bits, + bld->texel_type.width, + data_ptr, offset[j][i]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]); - lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]); + rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); + + } + else { + rgba8 = lp_build_fetch_rgba_aos(bld->builder, + bld->format_desc, + u8n.type, + data_ptr, offset[j][i], + x_subcoord[i], + y_subcoord[j]); + } + + lp_build_unpack2(builder, u8n.type, h16.type, + rgba8, + &neighbors_lo[j][i], &neighbors_hi[j][i]); + } + } /* * Linear interpolate with 8.8 fixed point. @@ -2077,7 +2191,8 @@ lp_build_sample_soa(LLVMBuilderRef builder, } else if (util_format_fits_8unorm(bld.format_desc) && bld.format_desc->nr_channels > 1 && - static_state->target == PIPE_TEXTURE_2D && + (static_state->target == PIPE_TEXTURE_2D || + static_state->target == PIPE_TEXTURE_RECT) && static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 0aa64affac..0e07f7f3f3 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, } mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; assert(LLVMTypeOf(val) == mask->int_vec_type); - mask->cond_mask = val; - + mask->cond_mask = LLVMBuildAnd(mask->bld->builder, + mask->cond_mask, + val, + ""); lp_exec_mask_update(mask); } @@ -802,7 +804,7 @@ emit_store( case TGSI_FILE_PREDICATE: lp_exec_mask_store(&bld->exec_mask, pred, value, - bld->preds[index][chan_index]); + bld->preds[reg->Register.Index][chan_index]); break; default: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index 3ffe916f8e..fec1d3dfbc 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -128,16 +128,16 @@ struct lp_build_context */ struct lp_type type; - /** Same as lp_build_undef(type) */ + /** Same as lp_build_elem_type(type) */ LLVMTypeRef elem_type; - /** Same as lp_build_undef(type) */ + /** Same as lp_build_vec_type(type) */ LLVMTypeRef vec_type; - /** Same as lp_build_undef(type) */ + /** Same as lp_build_int_elem_type(type) */ LLVMTypeRef int_elem_type; - /** Same as lp_build_undef(type) */ + /** Same as lp_build_int_vec_type(type) */ LLVMTypeRef int_vec_type; /** Same as lp_build_undef(type) */ |