summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c48
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c103
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c307
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h8
10 files changed, 342 insertions, 165 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7b35dd4bb4..e0d30be98d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -59,14 +59,6 @@
#include "lp_bld_arit.h"
-/*
- * XXX: Increasing eliminates some artifacts, but adds others, most
- * noticeably corruption in the Earth halo in Google Earth.
- */
-#define RCP_NEWTON_STEPS 0
-
-#define RSQRT_NEWTON_STEPS 0
-
#define EXP_POLY_DEGREE 3
#define LOG_POLY_DEGREE 5
@@ -267,7 +259,7 @@ lp_build_add(struct lp_build_context *bld,
}
-/** Return the sum of the elements of a */
+/** Return the scalar sum of the elements of a */
LLVMValueRef
lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef a)
@@ -278,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld,
assert(lp_check_value(type, a));
- if (a == bld->zero)
- return bld->zero;
- if (a == bld->undef)
- return bld->undef;
- assert(type.length > 1);
+ if (type.length == 1) {
+ return a;
+ }
assert(!bld->type.norm);
@@ -546,7 +536,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
- if(util_is_pot(b)) {
+ if(util_is_power_of_two(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
@@ -1266,6 +1256,11 @@ lp_build_sqrt(struct lp_build_context *bld,
*
* x_{i+1} = x_i * (2 - a * x_i)
*
+ * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
+ * +/-Inf, giving NaN instead. Certain applications rely on this behavior,
+ * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
+ * halo. It would be necessary to clamp the argument to prevent this.
+ *
* See also:
* - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
* - http://softwarecommunity.intel.com/articles/eng/1818.htm
@@ -1306,13 +1301,27 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * We don't use RCPPS because:
+ * - it only has 10bits of precision
+ * - it doesn't even get the reciprocate of 1.0 exactly
+ * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
+ * - for recent processors the benefit over DIVPS is marginal, a case
+ * depedent
+ *
+ * We could still use it on certain processors if benchmarks show that the
+ * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
+ * particular uses that require less workarounds.
+ */
+
+ if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rcp_refine(bld, a, res);
}
@@ -1363,13 +1372,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
- for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rsqrt_refine(bld, a, res);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index 39dfc51e50..d3a5afff8c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -46,7 +46,7 @@
boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
- assert(util_is_pot(alignment));
+ assert(util_is_power_of_two(alignment));
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 247cb83ce6..92123e09d3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
if (format_matches_type(format_desc, type) &&
format_desc->block.bits <= type.width * 4 &&
- util_is_pot(format_desc->block.bits)) {
+ util_is_power_of_two(format_desc->block.bits)) {
LLVMValueRef packed;
/*
@@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
- util_is_pot(format_desc->block.bits) &&
+ util_is_power_of_two(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
!format_desc->is_mixed &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 6d5410d970..48baf7c425 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -40,6 +40,7 @@
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/PrettyStackTrace.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -143,7 +144,6 @@ lp_set_target_options(void)
llvm::UnsafeFPMath = true;
#endif
-#if 0
/*
* LLVM will generate MMX instructions for vectors <= 64 bits, leading to
* innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -152,10 +152,8 @@ lp_set_target_options(void)
* See also:
* - http://llvm.org/bugs/show_bug.cgi?id=3287
* - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
- *
- * XXX: Unfortunately this is not working.
*/
- static boolean first = FALSE;
+ static boolean first = TRUE;
if (first) {
static const char* options[] = {
"prog",
@@ -164,7 +162,13 @@ lp_set_target_options(void)
llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
first = FALSE;
}
-#endif
+
+ /*
+ * By default LLVM adds a signal handler to output a pretty stack trace.
+ * This signal handler is never removed, causing problems when unloading the
+ * shared object where the gallium driver resides.
+ */
+ llvm::DisablePrettyStackTrace = true;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index e470082b97..e947b90d16 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,6 +37,8 @@
#define LP_BLD_PACK_H
+#include "pipe/p_compiler.h"
+
#include "gallivm/lp_bld.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 0fd014ab9b..259b1142e3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->swizzle_a = view->swizzle_a;
state->target = texture->target;
- state->pot_width = util_is_pot(texture->width0);
- state->pot_height = util_is_pot(texture->height0);
- state->pot_depth = util_is_pot(texture->depth0);
+ state->pot_width = util_is_power_of_two(texture->width0);
+ state->pot_height = util_is_power_of_two(texture->height0);
+ state->pot_depth = util_is_power_of_two(texture->depth0);
state->wrap_s = sampler->wrap_s;
state->wrap_t = sampler->wrap_t;
@@ -124,6 +124,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Compute the partial offset of a pixel block along an arbitrary axis.
+ *
+ * @param coord coordinate in pixels
+ * @param stride number of bytes between rows of successive pixel blocks
+ * @param block_length number of pixels in a pixels block along the coordinate
+ * axis
+ * @param out_offset resulting relative offset of the pixel block in bytes
+ * @param out_subcoord resulting sub-block pixel coordinate
+ */
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_subcoord)
+{
+ LLVMValueRef offset;
+ LLVMValueRef subcoord;
+
+ if (block_length == 1) {
+ subcoord = bld->zero;
+ }
+ else {
+ /*
+ * Pixel blocks have power of two dimensions. LLVM should convert the
+ * rem/div to bit arithmetic.
+ * TODO: Verify this.
+ */
+
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
+ subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
+ coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+ }
+
+ offset = lp_build_mul(bld, coord, stride);
+
+ assert(out_offset);
+ assert(out_subcoord);
+
+ *out_offset = offset;
+ *out_subcoord = subcoord;
+}
+
+
+/**
* Compute the offset of a pixel block.
*
* x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
@@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
{
LLVMValueRef x_stride;
LLVMValueRef offset;
- LLVMValueRef i;
- LLVMValueRef j;
-
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (format_desc->block.width == 1) {
- i = bld->zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (format_desc->block.height == 1) {
- j = bld->zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
- offset = lp_build_mul(bld, x, x_stride);
+
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.width,
+ x, x_stride,
+ &offset, out_i);
if (y && y_stride) {
- LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ LLVMValueRef y_offset;
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.height,
+ y, y_stride,
+ &y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
+ else {
+ *out_j = bld->zero;
+ }
if (z && z_stride) {
- LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ LLVMValueRef z_offset;
+ LLVMValueRef k;
+ lp_build_sample_partial_offset(bld,
+ 1, /* pixel blocks are always 2D */
+ z, z_stride,
+ &z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
*out_offset = offset;
- *out_i = i;
- *out_j = j;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 5b8f478094..caafc4eca0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,6 +36,8 @@
#define LP_BLD_SAMPLE_H
+#include "pipe/p_format.h"
+
#include "gallivm/lp_bld.h"
struct pipe_resource;
@@ -147,6 +149,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i);
+
+
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 806c7d56a8..1f39d9c98b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex)
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return 2;
case PIPE_TEXTURE_3D:
@@ -322,59 +323,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
/**
- * Fetch the texels as <4n x i8> in AoS form.
- */
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_array)
-{
- LLVMValueRef offset, i, j;
- LLVMValueRef data_ptr;
- LLVMValueRef res;
-
- /* convert x,y,z coords to linear offset from start of texture, in bytes */
- lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL,
- &offset, &i, &j);
-
- /* get pointer to mipmap level 0 data */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- /* Just fetch the data directly without swizzling */
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- res = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
- }
- else {
- struct lp_type type;
-
- assert(bld->texel_type.width == 32);
-
- memset(&type, 0, sizeof type);
- type.width = 8;
- type.length = bld->texel_type.length*4;
- type.norm = TRUE;
-
- res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
- data_ptr, offset, i, j);
- }
-
- return res;
-}
-
-
-/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
static LLVMValueRef
@@ -408,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
@@ -430,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param i0 resulting sub-block pixel coordinate for coord0
*/
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -469,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
assert(0);
}
- return coord;
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
+ }
}
@@ -1740,16 +1820,21 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
- LLVMValueRef neighbors[2][2];
+ LLVMValueRef data_ptr;
+ LLVMValueRef x_stride, y_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef offset[2][2];
+ LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
- LLVMValueRef stride;
+ const unsigned level = 0;
+ unsigned i, j;
- assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->target == PIPE_TEXTURE_2D
+ || bld->static_state->target == PIPE_TEXTURE_RECT);
assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
@@ -1793,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
- x0 = s_ipart;
- y0 = t_ipart;
-
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
- x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
+
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
+ offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
+ offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
+ offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
@@ -1836,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
- unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
#ifdef PIPE_ARCH_LITTLE_ENDIAN
@@ -1864,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
- stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+ /*
+ * get pointer to mipmap level 0 data
+ */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -1883,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
+ for (j = 0; j < 2; ++j) {
+ for (i = 0; i < 2; ++i) {
+ LLVMValueRef rgba8;
- neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
- neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
- neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
- neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[j][i]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[j][i], &neighbors_hi[j][i]);
+ }
+ }
/*
* Linear interpolate with 8.8 fixed point.
@@ -2077,7 +2191,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
}
else if (util_format_fits_8unorm(bld.format_desc) &&
bld.format_desc->nr_channels > 1 &&
- static_state->target == PIPE_TEXTURE_2D &&
+ (static_state->target == PIPE_TEXTURE_2D ||
+ static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0aa64affac..0e07f7f3f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
}
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
- mask->cond_mask = val;
-
+ mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ val,
+ "");
lp_exec_mask_update(mask);
}
@@ -802,7 +804,7 @@ emit_store(
case TGSI_FILE_PREDICATE:
lp_exec_mask_store(&bld->exec_mask, pred, value,
- bld->preds[index][chan_index]);
+ bld->preds[reg->Register.Index][chan_index]);
break;
default:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 3ffe916f8e..fec1d3dfbc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -128,16 +128,16 @@ struct lp_build_context
*/
struct lp_type type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_elem_type(type) */
LLVMTypeRef elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_vec_type(type) */
LLVMTypeRef vec_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_elem_type(type) */
LLVMTypeRef int_elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_vec_type(type) */
LLVMTypeRef int_vec_type;
/** Same as lp_build_undef(type) */