summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/gallivm')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c87
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c47
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c432
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c91
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c399
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.c148
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.h61
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c49
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c120
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c95
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c219
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c211
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h20
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c209
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h48
22 files changed, 1876 insertions, 461 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index d926b2de18..f5f2623e46 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -56,7 +56,6 @@
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
-#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -847,6 +846,11 @@ lp_build_round_sse41(struct lp_build_context *bld,
}
+/**
+ * Return the integer part of a float (vector) value. The returned value is
+ * a float (vector).
+ * Ex: trunc(-1.5) = 1.0
+ */
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a)
@@ -869,6 +873,12 @@ lp_build_trunc(struct lp_build_context *bld,
}
+/**
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is a float (vector).
+ * Ex: round(0.9) = 1.0
+ * Ex: round(-1.5) = -2.0
+ */
LLVMValueRef
lp_build_round(struct lp_build_context *bld,
LLVMValueRef a)
@@ -890,6 +900,11 @@ lp_build_round(struct lp_build_context *bld,
}
+/**
+ * Return floor of float (vector), result is a float (vector)
+ * Ex: floor(1.1) = 1.0
+ * Ex: floor(-1.1) = -2.0
+ */
LLVMValueRef
lp_build_floor(struct lp_build_context *bld,
LLVMValueRef a)
@@ -911,6 +926,11 @@ lp_build_floor(struct lp_build_context *bld,
}
+/**
+ * Return ceiling of float (vector), returning float (vector).
+ * Ex: ceil( 1.1) = 2.0
+ * Ex: ceil(-1.1) = -1.0
+ */
LLVMValueRef
lp_build_ceil(struct lp_build_context *bld,
LLVMValueRef a)
@@ -933,7 +953,7 @@ lp_build_ceil(struct lp_build_context *bld,
/**
- * Return fractional part of 'a' computed as a - floor(f)
+ * Return fractional part of 'a' computed as a - floor(a)
* Typically used in texture coord arithmetic.
*/
LLVMValueRef
@@ -946,8 +966,9 @@ lp_build_fract(struct lp_build_context *bld,
/**
- * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating toward zero.
+ * Return the integer part of a float (vector) value. The returned value is
+ * an integer (vector).
+ * Ex: itrunc(-1.5) = 1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
@@ -964,7 +985,10 @@ lp_build_itrunc(struct lp_build_context *bld,
/**
- * Convert float[] to int[] with round().
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is an integer (vector).
+ * Ex: iround(0.9) = 1
+ * Ex: iround(-1.5) = -2
*/
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
@@ -1007,7 +1031,9 @@ lp_build_iround(struct lp_build_context *bld,
/**
- * Convert float[] to int[] with floor().
+ * Return floor of float (vector), result is an int (vector)
+ * Ex: ifloor(1.1) = 1.0
+ * Ex: ifloor(-1.1) = -2.0
*/
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
@@ -1034,29 +1060,31 @@ lp_build_ifloor(struct lp_build_context *bld,
/* sign = a < 0 ? ~0 : 0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "");
- lp_build_name(sign, "floor.sign");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
/* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);
- /* offset = a < 0 ? -0.99999(9)f : 0.0f */
+ /* offset = a < 0 ? offset : 0.0f */
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
- lp_build_name(offset, "floor.offset");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "");
- lp_build_name(res, "floor.res");
+ res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
- lp_build_name(res, "floor");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res");
return res;
}
+/**
+ * Return ceiling of float (vector), returning int (vector).
+ * Ex: iceil( 1.1) = 2
+ * Ex: iceil(-1.1) = -1
+ */
LLVMValueRef
lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
@@ -1072,12 +1100,31 @@ lp_build_iceil(struct lp_build_context *bld,
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- /* TODO: mimic lp_build_ifloor() here */
- assert(0);
- res = bld->undef;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = 0.99999(9)f */
+ offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+
+ res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res");
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index d46b9f882b..7ee8fff140 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -107,4 +107,12 @@ lp_build_const_mask_aos(struct lp_type type,
const boolean cond[4]);
+static INLINE LLVMValueRef
+lp_build_const_int32(int i)
+{
+ return LLVMConstInt(LLVMInt32Type(), i, 0);
+}
+
+
+
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3f7f2ebde9..77012f1fac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -83,6 +83,9 @@
*
* Although the result values can be scaled to an arbitrary bit width specified
* by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
/**
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
*/
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -219,18 +224,19 @@ lp_build_conv(LLVMBuilderRef builder,
unsigned num_tmps;
unsigned i;
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
tmp_type = src_type;
- for(i = 0; i < num_srcs; ++i)
+ for(i = 0; i < num_srcs; ++i) {
+ assert(lp_check_value(src_type, src[i]));
tmp[i] = src[i];
+ }
num_tmps = num_srcs;
/*
@@ -326,30 +332,25 @@ lp_build_conv(LLVMBuilderRef builder,
/*
* Truncate or expand bit width
+ *
+ * No data conversion should happen here, although the sign bits are
+ * crucial to avoid bad clamping.
*/
- assert(!tmp_type.floating || tmp_type.width == dst_type.width);
+ {
+ struct lp_type new_type;
- if(tmp_type.width > dst_type.width) {
- assert(num_dsts == 1);
- tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
- num_tmps = 1;
- }
+ new_type = tmp_type;
+ new_type.sign = dst_type.sign;
+ new_type.width = dst_type.width;
+ new_type.length = dst_type.length;
+
+ lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts);
- if(tmp_type.width < dst_type.width) {
- assert(num_tmps == 1);
- lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
+ tmp_type = new_type;
num_tmps = num_dsts;
}
- assert(tmp_type.width == dst_type.width);
- assert(tmp_type.length == dst_type.length);
- assert(num_tmps == num_dsts);
-
/*
* Scale to the widest range
*/
@@ -406,8 +407,10 @@ lp_build_conv(LLVMBuilderRef builder,
}
}
- for(i = 0; i < num_dsts; ++i)
+ for(i = 0; i < num_dsts; ++i) {
dst[i] = tmp[i];
+ assert(lp_check_value(dst_type, dst[i]));
+ }
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 5f5036e7bd..60e22d727a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -48,9 +48,9 @@ struct lp_build_context;
*/
LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed);
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled);
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
@@ -60,7 +60,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j);
@@ -72,7 +74,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
void
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
struct lp_build_context *bld,
- const LLVMValueRef *unswizzled,
+ const LLVMValueRef unswizzled[4],
LLVMValueRef swizzled_out[4]);
void
@@ -82,6 +84,11 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef packed,
LLVMValueRef rgba_out[4]);
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
void
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
@@ -93,5 +100,18 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef j,
LLVMValueRef rgba_out[4]);
+/*
+ * YUV
+ */
+
+
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j);
#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 87e3e72a6e..0f01fc1d75 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -38,33 +38,122 @@
#include "util/u_math.h"
#include "util/u_string.h"
+#include "lp_bld_arit.h"
#include "lp_bld_init.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
/**
+ * Basic swizzling. Rearrange the order of the unswizzled array elements
+ * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
+ * too.
+ * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
+ */
+LLVMValueRef
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled)
+{
+ unsigned char swizzles[4];
+ unsigned chan;
+
+ assert(bld->type.length % 4 == 0);
+
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ /*
+ * For ZS formats do RGBA = ZZZ1
+ */
+ if (chan == 3) {
+ swizzle = UTIL_FORMAT_SWIZZLE_1;
+ } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ swizzle = UTIL_FORMAT_SWIZZLE_0;
+ } else {
+ swizzle = desc->swizzle[0];
+ }
+ } else {
+ swizzle = desc->swizzle[chan];
+ }
+ swizzles[chan] = swizzle;
+ }
+
+ return lp_build_swizzle_aos(bld, unswizzled, swizzles);
+}
+
+
+/**
+ * Whether the format matches the vector type, apart of swizzles.
+ */
+static INLINE boolean
+format_matches_type(const struct util_format_description *desc,
+ struct lp_type type)
+{
+ enum util_format_type chan_type;
+ unsigned chan;
+
+ assert(type.length % 4 == 0);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
+ desc->block.width != 1 ||
+ desc->block.height != 1) {
+ return FALSE;
+ }
+
+ if (type.floating) {
+ chan_type = UTIL_FORMAT_TYPE_FLOAT;
+ } else if (type.fixed) {
+ chan_type = UTIL_FORMAT_TYPE_FIXED;
+ } else if (type.sign) {
+ chan_type = UTIL_FORMAT_TYPE_SIGNED;
+ } else {
+ chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
+ }
+
+ for (chan = 0; chan < desc->nr_channels; ++chan) {
+ if (desc->channel[chan].size != type.width) {
+ return FALSE;
+ }
+
+ if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
+ if (desc->channel[chan].type != chan_type ||
+ desc->channel[chan].normalized != type.norm) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
* Unpack a single pixel into its RGBA components.
*
* @param desc the pixel format for the packed pixel value
* @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
*
- * @return RGBA in a 4 floats vector.
+ * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
*/
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed)
+static INLINE LLVMValueRef
+lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed)
{
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
LLVMValueRef masks[4];
LLVMValueRef scales[4];
- LLVMValueRef swizzles[4];
- LLVMValueRef aux[4];
+
boolean normalized;
- int empty_channel;
boolean needs_uitofp;
unsigned shift;
unsigned i;
@@ -77,8 +166,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Do the intermediate integer computations with 32bit integers since it
* matches floating point size */
- if (desc->block.bits < 32)
- packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+ assert (LLVMTypeOf(packed) == LLVMInt32Type());
/* Broadcast the packed value to all four channels
* before: packed = BGRA
@@ -98,7 +186,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Initialize vector constants */
normalized = FALSE;
needs_uitofp = FALSE;
- empty_channel = -1;
shift = 0;
/* Loop over 4 color components */
@@ -109,7 +196,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifts[i] = LLVMGetUndef(LLVMInt32Type());
masks[i] = LLVMConstNull(LLVMInt32Type());
scales[i] = LLVMConstNull(LLVMFloatType());
- empty_channel = i;
}
else {
unsigned long long mask = (1ULL << bits) - 1;
@@ -158,52 +244,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
else
scaled = casted;
- for (i = 0; i < 4; ++i)
- aux[i] = LLVMGetUndef(LLVMFloatType());
-
- /* Build swizzles vector to put components into R,G,B,A order */
- for (i = 0; i < 4; ++i) {
- enum util_format_swizzle swizzle;
-
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- /*
- * For ZS formats do RGBA = ZZZ1
- */
- if (i == 3) {
- swizzle = UTIL_FORMAT_SWIZZLE_1;
- } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
- swizzle = UTIL_FORMAT_SWIZZLE_0;
- } else {
- swizzle = desc->swizzle[0];
- }
- } else {
- swizzle = desc->swizzle[i];
- }
-
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- assert(empty_channel >= 0);
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
- aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
- break;
- case UTIL_FORMAT_SWIZZLE_NONE:
- swizzles[i] = LLVMGetUndef(LLVMFloatType());
- assert(0);
- break;
- }
- }
-
- return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4),
- LLVMConstVector(swizzles, 4), "");
+ return scaled;
}
@@ -310,22 +351,65 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
+
+
/**
* Fetch a pixel into a 4 float AoS.
*
* \param format_desc describes format of the image we're fetching from
* \param ptr address of the pixel block (or the texel if uncompressed)
* \param i, j the sub-block pixel coordinates. For non-compressed formats
- * these will always be (0,).
- * \return valueRef with the float[4] RGBA pixel
+ * these will always be (0, 0).
+ * \return a 4 element vector with the pixel's RGBA values.
*/
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j)
{
+ unsigned num_pixels = type.length / 4;
+ struct lp_build_context bld;
+
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(type.length % 4 == 0);
+
+ lp_build_context_init(&bld, builder, type);
+
+ /*
+ * Trivial case
+ *
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ if (format_matches_type(format_desc, type) &&
+ format_desc->block.bits <= type.width * 4 &&
+ util_is_pot(format_desc->block.bits)) {
+ LLVMValueRef packed;
+
+ /*
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ packed = lp_build_gather(builder, type.length/4,
+ format_desc->block.bits, type.width*4,
+ base_ptr, offset);
+
+ assert(format_desc->block.bits <= type.width * type.length);
+
+ packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
+
+ return lp_build_format_swizzle_aos(format_desc, &bld, packed);
+ }
+
+ /*
+ * Bit arithmetic
+ */
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
@@ -337,21 +421,77 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->is_bitmask &&
!format_desc->is_mixed &&
(format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
- format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
- {
- LLVMValueRef packed;
+ format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
+
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
+
+ /*
+ * Unpack a pixel at a time into a <4 x float> RGBA vector
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef packed;
+
+ packed = lp_build_gather_elem(builder, num_pixels,
+ format_desc->block.bits, 32,
+ base_ptr, offset, k);
- ptr = LLVMBuildBitCast(builder, ptr,
- LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
- "");
+ tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc,
+ packed);
+ }
+
+ /*
+ * Type conversion.
+ *
+ * TODO: We could avoid floating conversion for integer to
+ * integer conversions.
+ */
- packed = LLVMBuildLoad(builder, ptr, "packed");
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
- return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+ return lp_build_format_swizzle_aos(format_desc, &bld, res);
}
- else if (format_desc->fetch_rgba_float) {
+
+ /*
+ * YUV / subsampled formats
+ */
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = num_pixels * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_subsampled_rgba_aos(builder,
+ format_desc,
+ num_pixels,
+ base_ptr,
+ offset,
+ i, j);
+
+ lp_build_conv(builder,
+ tmp_type, type,
+ &tmp, 1, &tmp, 1);
+
+ return tmp;
+ }
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_8unorm().
+ */
+
+ if (format_desc->fetch_rgba_8unorm &&
+ !type.floating && type.width == 8 && !type.sign && type.norm) {
/*
- * Fallback to calling util_format_description::fetch_rgba_float.
+ * Fallback to calling util_format_description::fetch_rgba_8unorm.
*
* This is definitely not the most efficient way of fetching pixels, as
* we miss the opportunity to do vectorization, but this it is a
@@ -361,9 +501,113 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
char name[256];
+ LLVMTypeRef i8t = LLVMInt8Type();
+ LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
LLVMValueRef tmp;
- LLVMValueRef args[4];
+ LLVMValueRef res;
+ unsigned k;
+
+ util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
+ format_desc->short_name);
+
+ /*
+ * Declare and bind format_desc->fetch_rgba_8unorm().
+ */
+
+ function = LLVMGetNamedFunction(module, name);
+ if (!function) {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[4];
+ LLVMTypeRef function_type;
+
+ ret_type = LLVMVoidType();
+ arg_types[0] = pi8t;
+ arg_types[1] = pi8t;
+ arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+ function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+ function = LLVMAddFunction(module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ assert(LLVMIsDeclaration(function));
+
+ LLVMAddGlobalMapping(lp_build_engine, function,
+ func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
+ }
+
+ tmp_ptr = lp_build_alloca(builder, i32t, "");
+
+ res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
+
+ /*
+ * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
+ * in the SoA vectors.
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef args[4];
+
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
+
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmp = LLVMBuildLoad(builder, tmp_ptr, "");
+
+ if (num_pixels == 1) {
+ res = tmp;
+ }
+ else {
+ res = LLVMBuildInsertElement(builder, res, tmp, index, "");
+ }
+ }
+
+ /* Bitcast from <n x i32> to <4n x i8> */
+ res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
+
+ return res;
+ }
+
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_float().
+ */
+
+ if (format_desc->fetch_rgba_float) {
+ /*
+ * Fallback to calling util_format_description::fetch_rgba_float.
+ *
+ * This is definitely not the most efficient way of fetching pixels, as
+ * we miss the opportunity to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ char name[256];
+ LLVMTypeRef f32t = LLVMFloatType();
+ LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
+ LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
+ LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
format_desc->short_name);
@@ -379,7 +623,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMTypeRef function_type;
ret_type = LLVMVoidType();
- arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+ arg_types[0] = pf32t;
arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
@@ -394,25 +638,43 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
}
- tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+ tmp_ptr = lp_build_alloca(builder, f32x4t, "");
/*
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
* in the SoA vectors.
*/
- args[0] = LLVMBuildBitCast(builder, tmp,
- LLVMPointerType(LLVMFloatType(), 0), "");
- args[1] = ptr;
- args[2] = i;
- args[3] = j;
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef args[4];
- LLVMBuildCall(builder, function, args, Elements(args), "");
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
- return LLVMBuildLoad(builder, tmp, "");
- }
- else {
- assert(0);
- return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
+ }
+
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
+
+ return res;
}
+
+ assert(0);
+ return lp_build_undef(type);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index e1b94adc85..9f405921b0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -36,7 +36,7 @@
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_swizzle.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
@@ -251,6 +251,41 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
}
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
+ unsigned chan;
+
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_int_vec_type(dst_type), "");
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if (start)
+ input = LLVMBuildLShr(builder, input,
+ lp_build_const_int_vec(dst_type, start), "");
+
+ if (stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+
/**
* Fetch a texels from a texture, returning them in SoA layout.
*
@@ -311,20 +346,49 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
format_desc,
type,
packed, rgba_out);
+ return;
}
- else {
- /*
- * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
- *
- * This is not the most efficient way of fetching pixels, as we
- * miss some opportunities to do vectorization, but this is
- * convenient for formats or scenarios for which there was no
- * opportunity or incentive to optimize.
- */
+ /*
+ * Try calling lp_build_fetch_rgba_aos for all pixels.
+ */
+
+ if (util_format_fits_8unorm(format_desc) &&
+ type.floating && type.width == 32 && type.length == 4) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = type.length * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset, i, j);
+
+ lp_build_rgba8_to_f32_soa(builder,
+ type,
+ tmp,
+ rgba_out);
+
+ return;
+ }
+
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as we
+ * miss some opportunities to do vectorization, but this is
+ * convenient for formats or scenarios for which there was no
+ * opportunity or incentive to optimize.
+ */
+
+ {
unsigned k, chan;
+ struct lp_type tmp_type;
- assert(type.floating);
+ tmp_type = type;
+ tmp_type.length = 4;
for (chan = 0; chan < 4; ++chan) {
rgba_out[chan] = lp_build_undef(type);
@@ -334,18 +398,17 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
for(k = 0; k < type.length; ++k) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
LLVMValueRef offset_elem;
- LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
LLVMValueRef tmp;
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
- ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
/* Get a single float[4]={R,G,B,A} pixel */
- tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr,
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset_elem,
i_elem, j_elem);
/*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
new file mode 100644
index 0000000000..0a5038bc98
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -0,0 +1,399 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * YUV pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Extract Y, U, V channels from packed UYVY.
+ * @param packed is a <n x i32> vector with the packed UYVY blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+uyvy_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef shift, mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (uyvy >> 16*i) & 0xff
+ * u = (uyvy ) & 0xff
+ * v = (uyvy >> 16 ) & 0xff
+ */
+
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ *u = packed;
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+/**
+ * Extract Y, U, V channels from packed YUYV.
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+yuyv_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef shift, mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (yuyv >> 16*i) & 0xff
+ * u = (yuyv >> 8 ) & 0xff
+ * v = (yuyv >> 24 ) & 0xff
+ */
+
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+static INLINE void
+yuv_to_rgb_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
+ LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
+{
+ struct lp_type type;
+ struct lp_build_context bld;
+
+ LLVMValueRef c0;
+ LLVMValueRef c8;
+ LLVMValueRef c16;
+ LLVMValueRef c128;
+ LLVMValueRef c255;
+
+ LLVMValueRef cy;
+ LLVMValueRef cug;
+ LLVMValueRef cub;
+ LLVMValueRef cvr;
+ LLVMValueRef cvg;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ lp_build_context_init(&bld, builder, type);
+
+ assert(lp_check_value(type, y));
+ assert(lp_check_value(type, u));
+ assert(lp_check_value(type, v));
+
+ /*
+ * Constants
+ */
+
+ c0 = lp_build_const_int_vec(type, 0);
+ c8 = lp_build_const_int_vec(type, 8);
+ c16 = lp_build_const_int_vec(type, 16);
+ c128 = lp_build_const_int_vec(type, 128);
+ c255 = lp_build_const_int_vec(type, 255);
+
+ cy = lp_build_const_int_vec(type, 298);
+ cug = lp_build_const_int_vec(type, -100);
+ cub = lp_build_const_int_vec(type, 516);
+ cvr = lp_build_const_int_vec(type, 409);
+ cvg = lp_build_const_int_vec(type, -208);
+
+ /*
+ * y -= 16;
+ * u -= 128;
+ * v -= 128;
+ */
+
+ y = LLVMBuildSub(builder, y, c16, "");
+ u = LLVMBuildSub(builder, u, c128, "");
+ v = LLVMBuildSub(builder, v, c128, "");
+
+ /*
+ * r = 298 * _y + 409 * _v + 128;
+ * g = 298 * _y - 100 * _u - 208 * _v + 128;
+ * b = 298 * _y + 516 * _u + 128;
+ */
+
+ y = LLVMBuildMul(builder, y, cy, "");
+ y = LLVMBuildAdd(builder, y, c128, "");
+
+ *r = LLVMBuildMul(builder, v, cvr, "");
+ *g = LLVMBuildAdd(builder,
+ LLVMBuildMul(builder, u, cug, ""),
+ LLVMBuildMul(builder, v, cvg, ""),
+ "");
+ *b = LLVMBuildMul(builder, u, cub, "");
+
+ *r = LLVMBuildAdd(builder, *r, y, "");
+ *g = LLVMBuildAdd(builder, *g, y, "");
+ *b = LLVMBuildAdd(builder, *b, y, "");
+
+ /*
+ * r >>= 8;
+ * g >>= 8;
+ * b >>= 8;
+ */
+
+ *r = LLVMBuildAShr(builder, *r, c8, "r");
+ *g = LLVMBuildAShr(builder, *g, c8, "g");
+ *b = LLVMBuildAShr(builder, *b, c8, "b");
+
+ /*
+ * Clamp
+ */
+
+ *r = lp_build_clamp(&bld, *r, c0, c255);
+ *g = lp_build_clamp(&bld, *g, c0, c255);
+ *b = lp_build_clamp(&bld, *b, c0, c255);
+}
+
+
+static LLVMValueRef
+rgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
+{
+ struct lp_type type;
+ LLVMValueRef a;
+ LLVMValueRef rgba;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, r));
+ assert(lp_check_value(type, g));
+ assert(lp_check_value(type, b));
+
+ /*
+ * Make a 4 x unorm8 vector
+ */
+
+ r = r;
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
+ a = lp_build_const_int_vec(type, 0xff000000);
+
+ rgba = r;
+ rgba = LLVMBuildOr(builder, rgba, g, "");
+ rgba = LLVMBuildOr(builder, rgba, b, "");
+ rgba = LLVMBuildOr(builder, rgba, a, "");
+
+ rgba = LLVMBuildBitCast(builder, rgba,
+ LLVMVectorType(LLVMInt8Type(), 4*n), "");
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+uyvy_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+yuyv_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgbg_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * @param n is the number of pixels processed
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ * @return a <4*n x i8> vector with the pixel RGBA values in AoS
+ */
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef packed;
+ LLVMValueRef rgba;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
+ assert(format_desc->block.bits == 32);
+ assert(format_desc->block.width == 2);
+ assert(format_desc->block.height == 1);
+
+ packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+
+ (void)j;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_UYVY:
+ rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_YUYV:
+ rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ rgba = grgb_to_rgba_aos(builder, n, packed, i);
+ break;
+ default:
+ assert(0);
+ rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+ break;
+ }
+
+ return rgba;
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
new file mode 100644
index 0000000000..d60472e065
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_format.h"
+#include "lp_bld_gather.h"
+
+
+/**
+ * Get the pointer to one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMValueRef offset;
+ LLVMValueRef ptr;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ if (length == 1) {
+ assert(i == 0);
+ offset = offsets;
+ } else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ }
+
+ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
+
+ return ptr;
+}
+
+
+/**
+ * Gather one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i);
+ ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, "");
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ assert(src_width <= dst_width);
+ if (src_width > dst_width)
+ res = LLVMBuildTrunc(builder, res, dst_elem_type, "");
+ if (src_width < dst_width)
+ res = LLVMBuildZExt(builder, res, dst_elem_type, "");
+
+ return res;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ * Use for fetching texels from a texture.
+ * For SSE, typical values are length=4, src_width=32, dst_width=32.
+ *
+ * @param length length of the offsets
+ * @param src_width src element width in bits
+ * @param dst_width result element width in bits (src will be expanded to fit)
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMValueRef res;
+
+ if (length == 1) {
+ /* Scalar */
+ return lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, 0);
+ } else {
+ /* Vector */
+
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for (i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem;
+ elem = lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, i);
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+ }
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
new file mode 100644
index 0000000000..131af8ea07
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_GATHER_H_
+#define LP_BLD_GATHER_H_
+
+
+#include "gallivm/lp_bld.h"
+
+
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+#endif /* LP_BLD_GATHER_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 44cfdc4d3f..69353dea09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -32,6 +32,8 @@
#include "lp_bld_debug.h"
#include "lp_bld_init.h"
+#include <llvm-c/Transforms/Scalar.h>
+
#ifdef DEBUG
unsigned gallivm_debug = 0;
@@ -50,6 +52,7 @@ LLVMModuleRef lp_build_module = NULL;
LLVMExecutionEngineRef lp_build_engine = NULL;
LLVMModuleProviderRef lp_build_provider = NULL;
LLVMTargetDataRef lp_build_target = NULL;
+LLVMPassManagerRef lp_build_pass = NULL;
/*
@@ -127,6 +130,33 @@ lp_build_init(void)
if (!lp_build_target)
lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);
+ if (!lp_build_pass) {
+ lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider);
+ LLVMAddTargetData(lp_build_target, lp_build_pass);
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(lp_build_pass);
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ LLVMAddConstantPropagationPass(lp_build_pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(lp_build_pass);
+ }
+ LLVMAddGVNPass(lp_build_pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
+ */
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ }
+ }
+
util_cpu_detect();
#if 0
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 0ec2afcd1b..a32ced9b4c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -38,6 +38,7 @@ extern LLVMModuleRef lp_build_module;
extern LLVMExecutionEngineRef lp_build_engine;
extern LLVMModuleProviderRef lp_build_provider;
extern LLVMTargetDataRef lp_build_target;
+extern LLVMPassManagerRef lp_build_pass;
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index d13fa1a5d0..39854e43b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -34,6 +34,7 @@
#include "util/u_cpu_detect.h"
+#include "util/u_memory.h"
#include "util/u_debug.h"
#include "lp_bld_type.h"
@@ -187,12 +188,10 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
- /* There are no signed byte and unsigned word/dword comparison
- * instructions. So flip the sign bit so that the results match.
+ /* There are no unsigned comparison instructions. So flip the sign bit
+ * so that the results match.
*/
- if(table[func].gt &&
- ((type.width == 8 && type.sign) ||
- (type.width != 8 && !type.sign))) {
+ if (table[func].gt && !type.sign) {
LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
@@ -384,6 +383,46 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
+ else if (util_cpu_caps.has_sse4_1 &&
+ type.width * type.length == 128 &&
+ !LLVMIsConstant(a) &&
+ !LLVMIsConstant(b) &&
+ !LLVMIsConstant(mask)) {
+ const char *intrinsic;
+ LLVMTypeRef arg_type;
+ LLVMValueRef args[3];
+
+ if (type.width == 64) {
+ intrinsic = "llvm.x86.sse41.blendvpd";
+ arg_type = LLVMVectorType(LLVMDoubleType(), 2);
+ } else if (type.width == 32) {
+ intrinsic = "llvm.x86.sse41.blendvps";
+ arg_type = LLVMVectorType(LLVMFloatType(), 4);
+ } else {
+ intrinsic = "llvm.x86.sse41.pblendvb";
+ arg_type = LLVMVectorType(LLVMInt8Type(), 16);
+ }
+
+ if (arg_type != bld->int_vec_type) {
+ mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
+ }
+
+ if (arg_type != bld->vec_type) {
+ a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
+ }
+
+ args[0] = b;
+ args[1] = a;
+ args[2] = mask;
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ arg_type, args, Elements(args));
+
+ if (arg_type != bld->vec_type) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ }
+ }
else {
if(type.floating) {
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 186f8849b8..7748f8f099 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -427,3 +427,123 @@ lp_build_pack(LLVMBuilderRef builder,
return tmp[0];
}
+
+
+/**
+ * Truncate or expand the bitwidth.
+ *
+ * NOTE: Getting the right sign flags is crucial here, as we employ some
+ * intrinsics that do saturation.
+ */
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ /*
+ * We don't support float <-> int conversion here. That must be done
+ * before/after calling this function.
+ */
+ assert(src_type.floating == dst_type.floating);
+
+ /*
+ * We don't support double <-> float conversion yet, although it could be
+ * added with little effort.
+ */
+ assert((!src_type.floating && !dst_type.floating) ||
+ src_type.width == dst_type.width);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
+ assert(num_srcs == 1 || num_dsts == 1);
+
+ assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+ if (src_type.width > dst_type.width) {
+ /*
+ * Truncate bit width.
+ */
+
+ assert(num_dsts == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector packing intrinsics
+ */
+
+ tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+ val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else if (src_type.width < dst_type.width) {
+ /*
+ * Expand bit width.
+ */
+
+ assert(num_srcs == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector unpack intrinsics
+ */
+ lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+
+ if (src_type.sign && dst_type.sign) {
+ val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
+ } else {
+ val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
+ }
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else {
+ /*
+ * No-op
+ */
+
+ assert(num_srcs == 1);
+ assert(num_dsts == 1);
+
+ tmp[0] = src[0];
+ }
+
+ for(i = 0; i < num_dsts; ++i)
+ dst[i] = tmp[i];
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 41adeed220..e470082b97 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
const LLVMValueRef *src, unsigned num_srcs);
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 38fd5a39ef..ca36046d22 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -61,8 +61,8 @@ LLVMValueRef
lp_build_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left);
- LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right);
+ LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
+ LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
return lp_build_sub(bld, a_right, a_left);
}
@@ -71,8 +71,8 @@ LLVMValueRef
lp_build_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top);
- LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom);
+ LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
+ LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
return lp_build_sub(bld, a_bottom, a_top);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 946c23e317..0fd014ab9b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -40,7 +40,6 @@
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
#include "lp_bld_type.h"
-#include "lp_bld_format.h"
#include "lp_bld_sample.h"
@@ -125,73 +124,53 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
- * Gather elements from scatter positions in memory into a single vector.
- * Use for fetching texels from a texture.
- * For SSE, typical values are length=4, src_width=32, dst_width=32.
- *
- * @param length length of the offsets
- * @param src_width src element width in bits
- * @param dst_width result element width in bits (src will be expanded to fit)
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
-
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
-
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
-
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
- }
-
- return res;
-}
-
-
-/**
* Compute the offset of a pixel block.
*
- * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
- * per format description, and not individual pixels.
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
+ *
+ * Returns the relative offset and i,j sub-block coordinates
*/
-LLVMValueRef
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride)
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j)
{
LLVMValueRef x_stride;
LLVMValueRef offset;
+ LLVMValueRef i;
+ LLVMValueRef j;
+
+ /*
+ * Describe the coordinates in terms of pixel blocks.
+ *
+ * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+ * bit arithmetic. Verify this.
+ */
+
+ if (format_desc->block.width == 1) {
+ i = bld->zero;
+ }
+ else {
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
+ i = LLVMBuildURem(bld->builder, x, block_width, "");
+ x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+ }
+
+ if (format_desc->block.height == 1) {
+ j = bld->zero;
+ }
+ else {
+ LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
+ j = LLVMBuildURem(bld->builder, y, block_height, "");
+ y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+ }
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
offset = lp_build_mul(bld, x, x_stride);
@@ -206,5 +185,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
offset = lp_build_add(bld, offset, z_offset);
}
- return offset;
+ *out_offset = offset;
+ *out_i = i;
+ *out_j = j;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 51e98ab2f9..5b8f478094 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -146,23 +146,17 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
-
-
-LLVMValueRef
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride);
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 84c04fe272..1a20d74cac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -50,8 +50,10 @@
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
+#include "lp_bld_quad.h"
/**
@@ -264,35 +266,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (bld->format_desc->block.width == 1) {
- i = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (bld->format_desc->block.height == 1) {
- j = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
-
/* convert x,y,z coords to linear offset from start of texture, in bytes */
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, z, y_stride, z_stride);
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, z, y_stride, z_stride,
+ &offset, &i, &j);
if (use_border) {
/* If we can sample the border color, it means that texcoords may
@@ -344,6 +322,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
+/**
+ * Fetch the texels as <4n x i8> in AoS form.
+ */
static LLVMValueRef
lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
@@ -351,25 +332,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef y_stride,
LLVMValueRef data_array)
{
- LLVMValueRef offset;
+ LLVMValueRef offset, i, j;
LLVMValueRef data_ptr;
+ LLVMValueRef res;
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL);
-
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
+ /* convert x,y,z coords to linear offset from start of texture, in bytes */
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, NULL, y_stride, NULL,
+ &offset, &i, &j);
/* get pointer to mipmap level 0 data */
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
- return lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /* Just fetch the data directly without swizzling */
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+ res = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+ }
+ else {
+ struct lp_type type;
+
+ assert(bld->texel_type.width == 32);
+
+ memset(&type, 0, sizeof type);
+ type.width = 8;
+ type.length = bld->texel_type.length*4;
+ type.norm = TRUE;
+
+ res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
+ data_ptr, offset, i, j);
+ }
+
+ return res;
}
@@ -817,9 +819,8 @@ lp_build_minify(struct lp_build_sample_context *bld,
/**
* Generate code to compute texture level of detail (lambda).
- * \param s vector of texcoord s values
- * \param t vector of texcoord t values
- * \param r vector of texcoord r values
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
* \param lod_bias optional float vector with the shader lod bias
* \param explicit_lod optional float vector with the explicit lod
* \param width scalar int texture width
@@ -831,11 +832,8 @@ lp_build_minify(struct lp_build_sample_context *bld,
*/
static LLVMValueRef
lp_build_lod_selector(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
@@ -870,14 +868,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- */
dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
@@ -1287,7 +1277,7 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
/**
- * Generate code to do cube face selection and per-face texcoords.
+ * Generate code to do cube face selection and compute per-face texcoords.
*/
static void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
@@ -1411,7 +1401,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
lp_build_endif(&if_ctx2);
lp_build_flow_scope_end(flow_ctx2);
lp_build_flow_destroy(flow_ctx2);
-
*face_s = face_s2;
*face_t = face_t2;
*face = face2;
@@ -1457,13 +1446,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
int chan;
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_nearest(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
@@ -1473,13 +1463,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+ /* sample the first mipmap level */
lp_build_sample_image_linear(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_linear(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
@@ -1542,6 +1533,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
LLVMValueRef data_ptr0, data_ptr1 = NULL;
+ LLVMValueRef face_ddx[4], face_ddy[4];
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -1549,6 +1541,30 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
*/
/*
+ * Choose cube face, recompute texcoords and derivatives for the chosen face.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
+ /*
* Compute the level of detail (float).
*/
if (min_filter != mag_filter ||
@@ -1556,7 +1572,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
+ lod = lp_build_lod_selector(bld, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth);
}
@@ -1566,9 +1582,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
*/
if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
/* always use mip level 0 */
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
}
else {
+ assert(lod);
if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
}
@@ -1623,18 +1650,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
/*
- * Choose cube face, recompute per-face texcoords.
- */
- if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
- LLVMValueRef face, face_s, face_t;
- lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
- s = face_s; /* vec */
- t = face_t; /* vec */
- /* use 'r' to indicate cube face */
- r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
- }
-
- /*
* Get pointer(s) to image data for mipmap level(s).
*/
data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
@@ -1712,36 +1727,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
static void
-lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
- struct lp_type dst_type,
- LLVMValueRef packed,
- LLVMValueRef *rgba)
-{
- LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
- unsigned chan;
-
- /* Decode the input vector components */
- for (chan = 0; chan < 4; ++chan) {
- unsigned start = chan*8;
- unsigned stop = start + 8;
- LLVMValueRef input;
-
- input = packed;
-
- if(start)
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
-
- if(stop < 32)
- input = LLVMBuildAnd(builder, input, mask, "");
-
- input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
-
- rgba[chan] = input;
- }
-}
-
-
-static void
lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
@@ -1935,15 +1920,20 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* Convert to SoA and swizzle.
*/
- packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
-
lp_build_rgba8_to_f32_soa(bld->builder,
bld->texel_type,
packed, unswizzled);
- lp_build_format_swizzle_soa(bld->format_desc,
- &bld->texel_bld,
- unswizzled, texel_out);
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ lp_build_format_swizzle_soa(bld->format_desc,
+ &bld->texel_bld,
+ unswizzled, texel_out);
+ } else {
+ texel_out[0] = unswizzled[0];
+ texel_out[1] = unswizzled[1];
+ texel_out[2] = unswizzled[2];
+ texel_out[3] = unswizzled[3];
+ }
apply_sampler_swizzle(bld, texel_out);
}
@@ -2007,6 +1997,8 @@ lp_build_sample_nop(struct lp_build_sample_context *bld,
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
+ * \param ddx partial derivatives of (s,t,r,q) with respect to x
+ * \param ddy partial derivatives of (s,t,r,q) with respect to y
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
@@ -2016,8 +2008,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef texel_out[4])
@@ -2079,7 +2071,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
/* For debug: no-op texture sampling */
lp_build_sample_nop(&bld, texel_out);
}
- else if (util_format_is_rgba8_variant(bld.format_desc) &&
+ else if (util_format_fits_8unorm(bld.format_desc) &&
+ bld.format_desc->nr_channels > 1 &&
static_state->target == PIPE_TEXTURE_2D &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 3c8a7bc09e..20cf96ca66 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
- if (n <= 4) {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
@@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
- struct lp_type type4 = type;
+ struct lp_type type4;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
@@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+
+ type4 = type;
+ type4.floating = FALSE;
type4.width *= 4;
type4.length /= 4;
@@ -176,80 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4])
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4])
{
- const unsigned n = bld->type.length;
+ const struct lp_type type = bld->type;
+ const unsigned n = type.length;
unsigned i, j;
- if(a == bld->undef || a == bld->zero || a == bld->one)
+ if (swizzles[0] == PIPE_SWIZZLE_RED &&
+ swizzles[1] == PIPE_SWIZZLE_GREEN &&
+ swizzles[2] == PIPE_SWIZZLE_BLUE &&
+ swizzles[3] == PIPE_SWIZZLE_ALPHA) {
return a;
+ }
- if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
- return lp_build_broadcast_aos(bld, a, swizzle[0]);
+ if (swizzles[0] == swizzles[1] &&
+ swizzles[1] == swizzles[2] &&
+ swizzles[2] == swizzles[3]) {
+ switch (swizzles[0]) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return lp_build_broadcast_aos(bld, a, swizzles[0]);
+ case PIPE_SWIZZLE_ZERO:
+ return bld->zero;
+ case PIPE_SWIZZLE_ONE:
+ return bld->one;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+ }
- {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
+
+ memset(aux, 0, sizeof aux);
+
+ for(j = 0; j < n; j += 4) {
+ for(i = 0; i < 4; ++i) {
+ unsigned shuffle;
+ switch (swizzles[i]) {
+ default:
+ assert(0);
+ /* fall through */
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ shuffle = j + swizzles[i];
+ break;
+ case PIPE_SWIZZLE_ZERO:
+ shuffle = type.length + 0;
+ if (!aux[0]) {
+ aux[0] = lp_build_const_elem(type, 0.0);
+ }
+ break;
+ case PIPE_SWIZZLE_ONE:
+ shuffle = type.length + 1;
+ if (!aux[1]) {
+ aux[1] = lp_build_const_elem(type, 1.0);
+ }
+ break;
+ }
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
+ }
+ }
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
-
- return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
- }
-}
-
+ for (i = 0; i < n; ++i) {
+ if (!aux[i]) {
+ aux[i] = undef;
+ }
+ }
-LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4])
-{
- const unsigned n = bld->type.length;
- unsigned i, j;
+ return LLVMBuildShuffleVector(bld->builder, a,
+ LLVMConstVector(aux, n),
+ LLVMConstVector(shuffles, n), "");
+ } else {
+ /*
+ * Bit mask and shifts.
+ *
+ * For example, this will convert BGRA to RGBA by doing
+ *
+ * rgba = (bgra & 0x00ff0000) >> 16
+ * | (bgra & 0xff00ff00)
+ * | (bgra & 0x000000ff) << 16
+ *
+ * This is necessary not only for faster cause, but because X86 backend
+ * will refuse shuffles of <4 x i8> vectors
+ */
+ LLVMValueRef res;
+ struct lp_type type4;
+ boolean cond[4];
+ unsigned chan;
+ int shift;
- if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ /*
+ * Start with a mixture of 1 and 0.
+ */
+ for (chan = 0; chan < 4; ++chan) {
+ cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
+ }
+ res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
- if(a == b) {
- unsigned char swizzle1[4];
- swizzle1[0] = swizzle[0] % 4;
- swizzle1[1] = swizzle[1] % 4;
- swizzle1[2] = swizzle[2] % 4;
- swizzle1[3] = swizzle[3] % 4;
- return lp_build_swizzle1_aos(bld, a, swizzle1);
- }
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+ type4 = type;
+ type4.floating = FALSE;
+ type4.width *= 4;
+ type4.length /= 4;
- if(swizzle[0] % 4 == 0 &&
- swizzle[1] % 4 == 1 &&
- swizzle[2] % 4 == 2 &&
- swizzle[3] % 4 == 3) {
- boolean cond[4];
- cond[0] = swizzle[0] / 4;
- cond[1] = swizzle[1] / 4;
- cond[2] = swizzle[2] / 4;
- cond[3] = swizzle[3] / 4;
- return lp_build_select_aos(bld, a, b, cond);
- }
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+ res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
- {
/*
- * Shuffle.
+ * Mask and shift the channels, trying to group as many channels in the
+ * same shift as possible
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
+ for (shift = -3; shift <= 3; ++shift) {
+ unsigned long long mask = 0;
+
+ assert(type4.width <= sizeof(mask)*8);
+
+ for (chan = 0; chan < 4; ++chan) {
+ /* FIXME: big endian */
+ if (swizzles[chan] < 4 &&
+ chan - swizzles[chan] == shift) {
+ mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef masked;
+ LLVMValueRef shifted;
+
+ if (0)
+ debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
+
+ masked = LLVMBuildAnd(bld->builder, a,
+ lp_build_const_int_vec(type4, mask), "");
+ if (shift > 0) {
+ shifted = LLVMBuildShl(bld->builder, masked,
+ lp_build_const_int_vec(type4, shift*type.width), "");
+ } else if (shift < 0) {
+ shifted = LLVMBuildLShr(bld->builder, masked,
+ lp_build_const_int_vec(type4, -shift*type.width), "");
+ } else {
+ shifted = masked;
+ }
+
+ res = LLVMBuildOr(bld->builder, res, shifted, "");
+ }
+ }
- return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+ return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index 4f4fa777c9..315e1bcb54 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -68,24 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/**
* Swizzle a vector consisting of an array of XYZW structs.
*
- * @param swizzle is the in [0,4[ range.
+ * @param swizzles is the in [0,4[ range.
*/
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4]);
-
-
-/**
- * Swizzle two vector consisting of an array of XYZW structs.
- *
- * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
- */
-LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4]);
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4]);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index dec7556138..21236839fb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -49,6 +49,7 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
+#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
@@ -132,10 +133,14 @@ struct lp_build_tgsi_soa_context
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
- /* we allocate an array of temps if we have indirect
- * addressing and then the temps above is unused */
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
LLVMValueRef temps_array;
- boolean has_indirect_addressing;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
struct lp_build_mask_context *mask;
struct lp_exec_mask exec_mask;
@@ -404,25 +409,92 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
lp_exec_mask_update(mask);
}
+
+/**
+ * Return pointer to a temporary register channel (src or dest).
+ * Note that indirect addressing cannot be handled here.
+ * \param index which temporary register
+ * \param chan which channel of the temp register.
+ */
static LLVMValueRef
get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
- unsigned chan,
- boolean is_indirect,
- LLVMValueRef addr)
+ unsigned chan)
{
assert(chan < 4);
- if (!bld->has_indirect_addressing) {
- return bld->temps[index][chan];
- } else {
- LLVMValueRef lindex =
- LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
- if (is_indirect)
- lindex = lp_build_add(&bld->base, lindex, addr);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
}
+ else {
+ return bld->temps[index][chan];
+ }
}
+
+/**
+ * Gather vector.
+ * XXX the lp_build_gather() function should be capable of doing this
+ * with a little work.
+ */
+static LLVMValueRef
+build_gather(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef base_ptr,
+ LLVMValueRef indexes)
+{
+ LLVMValueRef res = bld->base.undef;
+ unsigned i;
+
+ /*
+ * Loop over elements of index_vec, load scalar value, insert it into 'res'.
+ */
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
+ indexes, ii, "");
+ LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
+ &index, 1, "");
+ LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Read the current value of the ADDR register, convert the floats to
+ * ints, multiply by four and return the vector of offsets.
+ * The offsets will be used to index into the constant buffer or
+ * temporary register file.
+ */
+static LLVMValueRef
+get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_src_register *indirect_reg)
+{
+ /* always use X component of address register */
+ const int x = indirect_reg->SwizzleX;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
+ LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
+ LLVMValueRef addr_vec;
+
+ addr_vec = LLVMBuildLoad(bld->base.builder,
+ bld->addr[indirect_reg->Index][swizzle],
+ "load addr reg");
+
+ /* for indexing we want integers */
+ addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
+ int_vec_type, "");
+
+ /* addr_vec = addr_vec * 4 */
+ addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+
+ return addr_vec;
+}
+
+
/**
* Register fetch.
*/
@@ -430,14 +502,14 @@ static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- unsigned index,
+ unsigned src_op,
const unsigned chan_index )
{
- const struct tgsi_full_src_register *reg = &inst->Src[index];
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
const unsigned swizzle =
tgsi_util_get_full_src_register_swizzle(reg, chan_index);
LLVMValueRef res;
- LLVMValueRef addr = NULL;
+ LLVMValueRef addr_vec = NULL;
if (swizzle > 3) {
assert(0 && "invalid swizzle in emit_fetch()");
@@ -445,32 +517,33 @@ emit_fetch(
}
if (reg->Register.Indirect) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
- unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
- addr = LLVMBuildLoad(bld->base.builder,
- bld->addr[reg->Indirect.Index][swizzle],
- "");
- /* for indexing we want integers */
- addr = LLVMBuildFPToSI(bld->base.builder, addr,
- int_vec_type, "");
- addr = LLVMBuildExtractElement(bld->base.builder,
- addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
- "");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ assert(bld->indirect_files);
+ addr_vec = get_indirect_offsets(bld, &reg->Indirect);
}
switch (reg->Register.File) {
case TGSI_FILE_CONSTANT:
- {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
- reg->Register.Index*4 + swizzle, 0);
+ if (reg->Register.Indirect) {
+ LLVMValueRef index_vec; /* index into the const buffer */
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
+
+ /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
+ index_vec = lp_build_const_int_vec(bld->int_bld.type,
+ reg->Register.Index * 4 + swizzle);
+
+ /* index_vec = index_vec + addr_vec */
+ index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+
+ /* Gather values from the constant buffer */
+ res = build_gather(bld, bld->consts_ptr, index_vec);
+ }
+ else {
+ LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
- if (reg->Register.Indirect) {
- /*lp_build_printf(bld->base.builder,
- "\taddr = %d\n", addr);*/
- index = lp_build_add(&bld->base, index, addr);
- }
+ index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
+
scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
&index, 1, "");
scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
@@ -490,13 +563,38 @@ emit_fetch(
break;
case TGSI_FILE_TEMPORARY:
- {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- swizzle,
- reg->Register.Indirect,
- addr);
+ if (reg->Register.Indirect) {
+ LLVMValueRef vec_len =
+ lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef temps_array;
+ LLVMTypeRef float4_ptr_type;
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
+
+ /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
+ index_vec = lp_build_const_int_vec(bld->int_bld.type,
+ reg->Register.Index * 4 + swizzle);
+
+ /* index_vec += addr_vec */
+ index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
+
+ /* index_vec *= vector_length */
+ index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
+
+ /* cast temps_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
+ float4_ptr_type, "");
+
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, temps_array, index_vec);
+ }
+ else {
+ LLVMValueRef temp_ptr;
+ temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
- if(!res)
+ if (!res)
return bld->base.undef;
}
break;
@@ -660,8 +758,12 @@ emit_store(
}
if (reg->Register.Indirect) {
+ /* XXX use get_indirect_offsets() here eventually */
LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+
+ assert(bld->indirect_files);
+
addr = LLVMBuildLoad(bld->base.builder,
bld->addr[reg->Indirect.Index][swizzle],
"");
@@ -680,14 +782,18 @@ emit_store(
bld->outputs[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_TEMPORARY: {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- chan_index,
- reg->Register.Indirect,
- addr);
- lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
+ case TGSI_FILE_TEMPORARY:
+ if (reg->Register.Indirect) {
+ /* XXX not done yet */
+ debug_printf("WARNING: LLVM scatter store of temp regs"
+ " not implemented\n");
+ }
+ else {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ chan_index);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
+ }
break;
- }
case TGSI_FILE_ADDRESS:
lp_exec_mask_store(&bld->exec_mask, pred, value,
@@ -905,7 +1011,7 @@ emit_declaration(
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
assert(idx < LP_MAX_TGSI_TEMPS);
- if (bld->has_indirect_addressing) {
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
last*4 + 4, 0);
bld->temps_array = lp_build_array_alloca(bld->base.builder,
@@ -1929,8 +2035,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
- bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
- info->opcode_count[TGSI_OPCODE_ARL] > 0;
+ bld.indirect_files = info->indirect_files;
bld.instructions = (struct tgsi_full_instruction *)
MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
bld.max_instructions = LP_MAX_INSTRUCTIONS;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index df77ef2155..3ffe916f8e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -316,6 +316,54 @@ LLVMTypeRef
lp_build_int32_vec4_type(void);
+static INLINE struct lp_type
+lp_float32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = TRUE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_int32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_unorm8_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = FALSE;
+ type.norm = TRUE;
+ type.width = 8;
+ type.length = 4;
+
+ return type;
+}
+
+
struct lp_type
lp_uint_type(struct lp_type type);