summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2010-09-15 17:04:26 -0600
committerBrian Paul <brianp@vmware.com>2010-09-15 17:04:31 -0600
commit0a7824862eb753878fa79b153b2a111884ff1197 (patch)
tree103c33d4cadc6fe323c810c81be91616c3a0019b
parent95254bbd2ddf0c6207a642604cc218ac9d711501 (diff)
gallivm: expand AoS sampling to cover all filtering modes
...and all texture targets (1D/2D/3D/CUBE).
-rw-r--r--src/gallium/auxiliary/Makefile1
-rw-r--r--src/gallium/auxiliary/SConscript1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c513
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h164
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c1145
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h65
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c1059
7 files changed, 1919 insertions, 1029 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 5388f4ecd5..2a69294e15 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -168,6 +168,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_printf.c \
gallivm/lp_bld_quad.c \
gallivm/lp_bld_sample.c \
+ gallivm/lp_bld_sample_aos.c \
gallivm/lp_bld_sample_soa.c \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index ba8be2efd1..cea2d7db58 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -219,6 +219,7 @@ if env['llvm']:
'gallivm/lp_bld_printf.c',
'gallivm/lp_bld_quad.c',
'gallivm/lp_bld_sample.c',
+ 'gallivm/lp_bld_sample_aos.c',
'gallivm/lp_bld_sample_soa.c',
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 259b1142e3..e89ee7c230 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -36,11 +36,13 @@
#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_math.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_const.h"
#include "lp_bld_arit.h"
-#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_flow.h"
#include "lp_bld_sample.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_type.h"
/**
@@ -124,6 +126,511 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ * \param lod_bias optional float vector with the shader lod bias
+ * \param explicit_lod optional float vector with the explicit lod
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
+ */
+LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth)
+
+{
+ if (bld->static_state->min_lod == bld->static_state->max_lod) {
+ /* User is forcing sampling from a particular mipmap level.
+ * This is hit during mipmap generation.
+ */
+ return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+ }
+ else {
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->lod_bias);
+ LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->min_lod);
+ LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
+ bld->static_state->max_lod);
+ LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef lod;
+
+ if (explicit_lod) {
+ lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+ index0, "");
+ }
+ else {
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef dsdx, dsdy;
+ LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
+ LLVMValueRef rho;
+
+ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
+
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
+ rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
+ }
+
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
+
+ /* add shader lod bias */
+ if (lod_bias) {
+ lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+ index0, "");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ }
+ }
+
+ /* add sampler lod bias */
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+
+ /* clamp lod */
+ lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+
+ return lod;
+ }
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
+ */
+void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_iround(float_bld, lod);
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, zero, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out,
+ LLVMValueRef *weight_out)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef last_level, level;
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->builder, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_ifloor(float_bld, lod);
+
+ /* compute level 0 and clamp to legal range of levels */
+ *level0_out = lp_build_clamp(int_bld, level,
+ int_bld->zero,
+ last_level);
+ /* compute level 1 and clamp to legal range of levels */
+ level = lp_build_add(int_bld, level, int_bld->one);
+ *level1_out = lp_build_clamp(int_bld, level,
+ int_bld->zero,
+ last_level);
+
+ *weight_out = lp_build_fract(float_bld, lod);
+}
+
+
+LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], data_ptr;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
+ return data_ptr;
+}
+
+
+LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, int level)
+{
+ LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
+ return lp_build_get_mipmap_level(bld, data_array, lvl);
+}
+
+
+/**
+ * Codegen equivalent for u_minify().
+ * Return max(1, base_size >> level);
+ */
+static LLVMValueRef
+lp_build_minify(struct lp_build_sample_context *bld,
+ LLVMValueRef base_size,
+ LLVMValueRef level)
+{
+ LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
+ size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
+ return size;
+}
+
+
+/**
+ * Dereference stride_array[mipmap_level] array to get a stride.
+ * Return stride as a vector.
+ */
+static LLVMValueRef
+lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
+ LLVMValueRef stride_array, LLVMValueRef level)
+{
+ LLVMValueRef indexes[2], stride;
+ indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indexes[1] = level;
+ stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
+ stride = LLVMBuildLoad(bld->builder, stride, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ return stride;
+}
+
+
+/**
+ * When sampling a mipmap, we need to compute the width, height, depth
+ * of the source levels from the level indexes. This helper function
+ * does that.
+ */
+void
+lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
+ unsigned dims,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_array,
+ LLVMValueRef *width0_vec,
+ LLVMValueRef *width1_vec,
+ LLVMValueRef *height0_vec,
+ LLVMValueRef *height1_vec,
+ LLVMValueRef *depth0_vec,
+ LLVMValueRef *depth1_vec,
+ LLVMValueRef *row_stride0_vec,
+ LLVMValueRef *row_stride1_vec,
+ LLVMValueRef *img_stride0_vec,
+ LLVMValueRef *img_stride1_vec)
+{
+ const unsigned mip_filter = bld->static_state->min_mip_filter;
+ LLVMValueRef ilevel0_vec, ilevel1_vec;
+
+ ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
+ ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+
+ /*
+ * Compute width, height, depth at mipmap level 'ilevel0'
+ */
+ *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+ if (dims >= 2) {
+ *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
+ *row_stride0_vec = lp_build_get_level_stride_vec(bld,
+ row_stride_array,
+ ilevel0);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ *img_stride0_vec = lp_build_get_level_stride_vec(bld,
+ img_stride_array,
+ ilevel0);
+ if (dims == 3) {
+ *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
+ }
+ }
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* compute width, height, depth for second mipmap level at 'ilevel1' */
+ *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
+ if (dims >= 2) {
+ *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
+ *row_stride1_vec = lp_build_get_level_stride_vec(bld,
+ row_stride_array,
+ ilevel1);
+ if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ *img_stride1_vec = lp_build_get_level_stride_vec(bld,
+ img_stride_array,
+ ilevel1);
+ if (dims == 3) {
+ *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
+ }
+ }
+ }
+ }
+}
+
+
+
+/** Helper used by lp_build_cube_lookup() */
+static LLVMValueRef
+lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
+{
+ /* ima = -0.5 / abs(coord); */
+ LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
+ LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
+ return ima;
+}
+
+
+/**
+ * Helper used by lp_build_cube_lookup()
+ * \param sign scalar +1 or -1
+ * \param coord float vector
+ * \param ima float vector
+ */
+static LLVMValueRef
+lp_build_cube_coord(struct lp_build_context *coord_bld,
+ LLVMValueRef sign, int negate_coord,
+ LLVMValueRef coord, LLVMValueRef ima)
+{
+ /* return negate(coord) * ima * sign + 0.5; */
+ LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
+ LLVMValueRef res;
+
+ assert(negate_coord == +1 || negate_coord == -1);
+
+ if (negate_coord == -1) {
+ coord = lp_build_negate(coord_bld, coord);
+ }
+
+ res = lp_build_mul(coord_bld, coord, ima);
+ if (sign) {
+ sign = lp_build_broadcast_scalar(coord_bld, sign);
+ res = lp_build_mul(coord_bld, res, sign);
+ }
+ res = lp_build_add(coord_bld, res, half);
+
+ return res;
+}
+
+
+/** Helper used by lp_build_cube_lookup()
+ * Return (major_coord >= 0) ? pos_face : neg_face;
+ */
+static LLVMValueRef
+lp_build_cube_face(struct lp_build_sample_context *bld,
+ LLVMValueRef major_coord,
+ unsigned pos_face, unsigned neg_face)
+{
+ LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
+ major_coord,
+ bld->float_bld.zero, "");
+ LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
+ LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
+ LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
+ return res;
+}
+
+
+
+/**
+ * Generate code to do cube face selection and compute per-face texcoords.
+ */
+void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t)
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *coord_bld = &bld->coord_bld;
+ LLVMValueRef rx, ry, rz;
+ LLVMValueRef arx, ary, arz;
+ LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
+ LLVMValueRef arx_ge_ary, arx_ge_arz;
+ LLVMValueRef ary_ge_arx, ary_ge_arz;
+ LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
+ LLVMValueRef rx_pos, ry_pos, rz_pos;
+
+ assert(bld->coord_bld.type.length == 4);
+
+ /*
+ * Use the average of the four pixel's texcoords to choose the face.
+ */
+ rx = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, s));
+ ry = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, t));
+ rz = lp_build_mul(float_bld, c25,
+ lp_build_sum_vector(&bld->coord_bld, r));
+
+ arx = lp_build_abs(float_bld, rx);
+ ary = lp_build_abs(float_bld, ry);
+ arz = lp_build_abs(float_bld, rz);
+
+ /*
+ * Compare sign/magnitude of rx,ry,rz to determine face
+ */
+ arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
+ arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
+ ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
+ ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
+
+ arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
+ ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
+ rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
+
+ {
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ *face_s = bld->coord_bld.undef;
+ *face_t = bld->coord_bld.undef;
+ *face = bld->int_bld.undef;
+
+ lp_build_name(*face_s, "face_s");
+ lp_build_name(*face_t, "face_t");
+ lp_build_name(*face, "face");
+
+ lp_build_flow_scope_declare(flow_ctx, face_s);
+ lp_build_flow_scope_declare(flow_ctx, face_t);
+ lp_build_flow_scope_declare(flow_ctx, face);
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ {
+ /* +/- X face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rx);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
+ *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rx,
+ PIPE_TEX_FACE_POS_X,
+ PIPE_TEX_FACE_NEG_X);
+ }
+ lp_build_else(&if_ctx);
+ {
+ struct lp_build_flow_context *flow_ctx2;
+ struct lp_build_if_state if_ctx2;
+
+ LLVMValueRef face_s2 = bld->coord_bld.undef;
+ LLVMValueRef face_t2 = bld->coord_bld.undef;
+ LLVMValueRef face2 = bld->int_bld.undef;
+
+ flow_ctx2 = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_s2);
+ lp_build_flow_scope_declare(flow_ctx2, &face_t2);
+ lp_build_flow_scope_declare(flow_ctx2, &face2);
+
+ ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
+
+ lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ {
+ /* +/- Y face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, ry);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
+ face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ face2 = lp_build_cube_face(bld, ry,
+ PIPE_TEX_FACE_POS_Y,
+ PIPE_TEX_FACE_NEG_Y);
+ }
+ lp_build_else(&if_ctx2);
+ {
+ /* +/- Z face */
+ LLVMValueRef sign = lp_build_sgn(float_bld, rz);
+ LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
+ face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ face2 = lp_build_cube_face(bld, rz,
+ PIPE_TEX_FACE_POS_Z,
+ PIPE_TEX_FACE_NEG_Z);
+ }
+ lp_build_endif(&if_ctx2);
+ lp_build_flow_scope_end(flow_ctx2);
+ lp_build_flow_destroy(flow_ctx2);
+ *face_s = face_s2;
+ *face_t = face_t2;
+ *face = face2;
+ }
+
+ lp_build_endif(&if_ctx);
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+ }
+}
+
+
+/**
* Compute the partial offset of a pixel block along an arbitrary axis.
*
* @param coord coordinate in pixels
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index caafc4eca0..ff72b8e9f1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -37,8 +37,11 @@
#include "pipe/p_format.h"
-
+#include "util/u_debug.h"
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_swizzle.h"
+
struct pipe_resource;
struct pipe_sampler_view;
@@ -81,6 +84,10 @@ struct lp_sampler_static_state
unsigned normalized_coords:1;
float lod_bias, min_lod, max_lod;
float border_color[4];
+
+ /* Aero hacks */
+ unsigned force_nearest_s:1;
+ unsigned force_nearest_t:1;
};
@@ -140,6 +147,96 @@ struct lp_sampler_dynamic_state
/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+ LLVMBuilderRef builder;
+
+ const struct lp_sampler_static_state *static_state;
+
+ struct lp_sampler_dynamic_state *dynamic_state;
+
+ const struct util_format_description *format_desc;
+
+ /** regular scalar float type */
+ struct lp_type float_type;
+ struct lp_build_context float_bld;
+
+ /** regular scalar float type */
+ struct lp_type int_type;
+ struct lp_build_context int_bld;
+
+ /** Incoming coordinates type and build context */
+ struct lp_type coord_type;
+ struct lp_build_context coord_bld;
+
+ /** Unsigned integer coordinates */
+ struct lp_type uint_coord_type;
+ struct lp_build_context uint_coord_bld;
+
+ /** Signed integer coordinates */
+ struct lp_type int_coord_type;
+ struct lp_build_context int_coord_bld;
+
+ /** Output texels type and build context */
+ struct lp_type texel_type;
+ struct lp_build_context texel_bld;
+};
+
+
+
+/**
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at
+ * this time. Return whether the given mode is supported by that function.
+ */
+static INLINE boolean
+lp_is_simple_wrap_mode(unsigned mode)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+
+static INLINE void
+apply_sampler_swizzle(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
+{
+ unsigned char swizzles[4];
+
+ swizzles[0] = bld->static_state->swizzle_r;
+ swizzles[1] = bld->static_state->swizzle_g;
+ swizzles[2] = bld->static_state->swizzle_b;
+ swizzles[3] = bld->static_state->swizzle_a;
+
+ lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
+}
+
+
+static INLINE int
+texture_dims(enum pipe_texture_target tex)
+{
+ switch (tex) {
+ case PIPE_TEXTURE_1D:
+ return 1;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_CUBE:
+ return 2;
+ case PIPE_TEXTURE_3D:
+ return 3;
+ default:
+ assert(0 && "bad texture target in texture_dims()");
+ return 2;
+ }
+}
+
+
+/**
* Derive the sampler static state.
*/
void
@@ -148,6 +245,71 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
+LLVMValueRef
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth);
+
+void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level_out);
+
+void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out,
+ LLVMValueRef *weight_out);
+
+LLVMValueRef
+lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, LLVMValueRef level);
+
+LLVMValueRef
+lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
+ LLVMValueRef data_array, int level);
+
+
+void
+lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
+ unsigned dims,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_array,
+ LLVMValueRef *width0_vec,
+ LLVMValueRef *width1_vec,
+ LLVMValueRef *height0_vec,
+ LLVMValueRef *height1_vec,
+ LLVMValueRef *depth0_vec,
+ LLVMValueRef *depth1_vec,
+ LLVMValueRef *row_stride0_vec,
+ LLVMValueRef *row_stride1_vec,
+ LLVMValueRef *img_stride0_vec,
+ LLVMValueRef *img_stride1_vec);
+
+
+void
+lp_build_cube_lookup(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *face,
+ LLVMValueRef *face_s,
+ LLVMValueRef *face_t);
+
+
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
new file mode 100644
index 0000000000..a9a4e7b121
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -0,0 +1,1145 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_dump.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_sample_aos.h"
+#include "lp_bld_quad.h"
+
+
+/**
+ * Build LLVM code for texture coord wrapping, for nearest filtering,
+ * for scaled integer texcoords.
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis (in bytes)
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param out_offset byte offset for the wrapped coordinate
+ * \param out_i resulting sub-block pixel coordinate for coord0
+ */
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+
+ length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if(is_pot)
+ coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+ else
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord = LLVMBuildURem(bld->builder, coord, length, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ }
+
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture coord wrapping, for linear filtering,
+ * for scaled integer texcoords.
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis (in bytes)
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
+ }
+}
+
+
+/**
+ * Sample a single texture image with nearest sampling.
+ * If sampling a cube texture, r = cube face in [0,5].
+ * Return filtered color as two vectors of 16-bit fixed point values.
+ */
+static void
+lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *colors_lo,
+ LLVMValueRef *colors_hi)
+{
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8;
+ LLVMValueRef s_ipart, t_ipart, r_ipart;
+ LLVMValueRef x_stride;
+ LLVMValueRef x_offset, offset;
+ LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
+
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ if (bld->static_state->normalized_coords) {
+ /* s = s * width, t = t * height */
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
+ coord_vec_type, "");
+ s = lp_build_mul(&bld->coord_bld, s, fp_width);
+ if (dims >= 2) {
+ LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
+ coord_vec_type, "");
+ t = lp_build_mul(&bld->coord_bld, t, fp_height);
+ if (dims >= 3) {
+ LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
+ coord_vec_type, "");
+ r = lp_build_mul(&bld->coord_bld, r, fp_depth);
+ }
+ }
+ }
+
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+
+ /* convert float to int */
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ if (dims >= 2)
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+ if (dims >= 3)
+ r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
+
+ /* compute floor (shift right 8) */
+ i32_c8 = lp_build_const_int_vec(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ if (dims >= 2)
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+ if (dims >= 3)
+ r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
+
+ /* get pixel, row, image strides */
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ /* Do texcoord wrapping, compute texel offset */
+ lp_build_sample_wrap_nearest_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width_vec, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset, &x_subcoord);
+ offset = x_offset;
+ if (dims >= 2) {
+ LLVMValueRef y_offset;
+ lp_build_sample_wrap_nearest_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height_vec, row_stride_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset, &y_subcoord);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
+ if (dims >= 3) {
+ LLVMValueRef z_offset;
+ lp_build_sample_wrap_nearest_int(bld,
+ 1, /* block length (depth) */
+ r_ipart, depth_vec, img_stride_vec,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_r,
+ &z_offset, &z_subcoord);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef z_offset;
+ /* The r coord is the cube face in [0,5] */
+ z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ }
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+ {
+ LLVMValueRef rgba8;
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset,
+ x_subcoord,
+ y_subcoord);
+ }
+
+ /* Expand one 4*rgba8 to two 2*rgba16 */
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ colors_lo, colors_hi);
+ }
+}
+
+
+/**
+ * Sample a single texture image with (bi-)(tri-)linear sampling.
+ * Return filtered color as two vectors of 16-bit fixed point values.
+ */
+static void
+lp_build_sample_image_linear(struct lp_build_sample_context *bld,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_vec,
+ LLVMValueRef img_stride_vec,
+ LLVMValueRef data_ptr,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef *colors_lo,
+ LLVMValueRef *colors_hi)
+{
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+ LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+ LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
+ LLVMValueRef x_stride, y_stride, z_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef z_offset0, z_offset1;
+ LLVMValueRef offset[2][2][2]; /* [z][y][x] */
+ LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
+ LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
+ LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
+ LLVMValueRef packed_lo, packed_hi;
+ unsigned x, y, z;
+ unsigned i, j, k;
+ unsigned numj, numk;
+
+ lp_build_context_init(&i32, builder, lp_type_int_vec(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ if (bld->static_state->normalized_coords) {
+ /* s = s * width, t = t * height */
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
+ coord_vec_type, "");
+ s = lp_build_mul(&bld->coord_bld, s, fp_width);
+ if (dims >= 2) {
+ LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
+ coord_vec_type, "");
+ t = lp_build_mul(&bld->coord_bld, t, fp_height);
+ }
+ if (dims >= 3) {
+ LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
+ coord_vec_type, "");
+ r = lp_build_mul(&bld->coord_bld, r, fp_depth);
+ }
+ }
+
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+
+ /* convert float to int */
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ if (dims >= 2)
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+ if (dims >= 3)
+ r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
+
+ /* subtract 0.5 (add -128) */
+ i32_c128 = lp_build_const_int_vec(i32.type, -128);
+ if (!bld->static_state->force_nearest_s) {
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ }
+ if (dims >= 2 && !bld->static_state->force_nearest_t) {
+ t = LLVMBuildAdd(builder, t, i32_c128, "");
+ }
+ if (dims >= 3) {
+ r = LLVMBuildAdd(builder, r, i32_c128, "");
+ }
+
+ /* compute floor (shift right 8) */
+ i32_c8 = lp_build_const_int_vec(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ if (dims >= 2)
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+ if (dims >= 3)
+ r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
+
+ /* compute fractional part (AND with 0xff) */
+ i32_c255 = lp_build_const_int_vec(i32.type, 255);
+ s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+ if (dims >= 2)
+ t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+ if (dims >= 3)
+ r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
+
+ /* get pixel, row and image strides */
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+ y_stride = row_stride_vec;
+ z_stride = img_stride_vec;
+
+ /* do texcoord wrapping and compute texel offsets */
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width_vec, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ for (z = 0; z < 2; z++) {
+ for (y = 0; y < 2; y++) {
+ offset[z][y][0] = x_offset0;
+ offset[z][y][1] = x_offset1;
+ }
+ }
+
+ if (dims >= 2) {
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height_vec, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ for (z = 0; z < 2; z++) {
+ for (x = 0; x < 2; x++) {
+ offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][0][x], y_offset0);
+ offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][1][x], y_offset1);
+ }
+ }
+ }
+
+ if (dims >= 3) {
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ r_ipart, depth_vec, z_stride,
+ bld->static_state->pot_depth,
+ bld->static_state->wrap_r,
+ &z_offset0, &z_offset1,
+ &z_subcoord[0], &z_subcoord[1]);
+ for (y = 0; y < 2; y++) {
+ for (x = 0; x < 2; x++) {
+ offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x], z_offset0);
+ offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[1][y][x], z_offset1);
+ }
+ }
+ }
+ else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef z_offset;
+ z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ for (y = 0; y < 2; y++) {
+ for (x = 0; x < 2; x++) {
+ /* The r coord is the cube face in [0,5] */
+ offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x], z_offset);
+ }
+ }
+ }
+
+ /*
+ * Transform 4 x i32 in
+ *
+ * s_fpart = {s0, s1, s2, s3}
+ *
+ * into 8 x i16
+ *
+ * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+ *
+ * into two 8 x i16
+ *
+ * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+ * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+ *
+ * and likewise for t_fpart. There is no risk of loosing precision here
+ * since the fractional parts only use the lower 8bits.
+ */
+ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+ if (dims >= 2)
+ t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+ if (dims >= 3)
+ r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
+
+ {
+ LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffle_lo;
+ LLVMValueRef shuffle_hi;
+
+ for (j = 0; j < h16.type.length; j += 4) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
+ LLVMValueRef index;
+
+ index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+ for (i = 0; i < 4; ++i)
+ shuffles_lo[j + i] = index;
+
+ index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+ for (i = 0; i < 4; ++i)
+ shuffles_hi[j + i] = index;
+ }
+
+ shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+ shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+ s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
+ shuffle_lo, "");
+ s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
+ shuffle_hi, "");
+ if (dims >= 2) {
+ t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
+ shuffle_lo, "");
+ t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
+ shuffle_hi, "");
+ }
+ if (dims >= 3) {
+ r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
+ shuffle_lo, "");
+ r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
+ shuffle_hi, "");
+ }
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+ numj = 1 + (dims >= 2);
+ numk = 1 + (dims >= 3);
+
+ for (k = 0; k < numk; k++) {
+ for (j = 0; j < numj; j++) {
+ for (i = 0; i < 2; i++) {
+ LLVMValueRef rgba8;
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[k][j][i]);
+
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[k][j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ /* Expand one 4*rgba8 to two 2*rgba16 */
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
+ }
+ }
+ }
+
+ /*
+ * Linear interpolation with 8.8 fixed point.
+ */
+ if (bld->static_state->force_nearest_s) {
+ /* special case 1-D lerp */
+ packed_lo = lp_build_lerp(&h16,
+ t_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1]);
+
+ packed_hi = lp_build_lerp(&h16,
+ t_fpart_hi,
+ neighbors_hi[0][1][0],
+ neighbors_hi[0][1][0]);
+ }
+ else if (bld->static_state->force_nearest_t) {
+ /* special case 1-D lerp */
+ packed_lo = lp_build_lerp(&h16,
+ s_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1]);
+
+ packed_hi = lp_build_lerp(&h16,
+ s_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1]);
+ }
+ else {
+ /* general 1/2/3-D lerping */
+ if (dims == 1) {
+ packed_lo = lp_build_lerp(&h16,
+ s_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1]);
+
+ packed_hi = lp_build_lerp(&h16,
+ s_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1]);
+ }
+ else {
+ /* 2-D lerp */
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1],
+ neighbors_lo[0][1][0],
+ neighbors_lo[0][1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1],
+ neighbors_hi[0][1][0],
+ neighbors_hi[0][1][1]);
+
+ if (dims >= 3) {
+ LLVMValueRef packed_lo2, packed_hi2;
+
+ /* lerp in the second z slice */
+ packed_lo2 = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[1][0][0],
+ neighbors_lo[1][0][1],
+ neighbors_lo[1][1][0],
+ neighbors_lo[1][1][1]);
+
+ packed_hi2 = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[1][0][0],
+ neighbors_hi[1][0][1],
+ neighbors_hi[1][1][0],
+ neighbors_hi[1][1][1]);
+ /* interp between two z slices */
+ packed_lo = lp_build_lerp(&h16, r_fpart_lo,
+ packed_lo, packed_lo2);
+ packed_hi = lp_build_lerp(&h16, r_fpart_hi,
+ packed_hi, packed_hi2);
+ }
+ }
+ }
+
+ *colors_lo = packed_lo;
+ *colors_hi = packed_hi;
+}
+
+
+/**
+ * Sample the texture/mipmap using given image filter and mip filter.
+ * data0_ptr and data1_ptr point to the two mipmap levels to sample
+ * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
+ * If we're using nearest miplevel sampling the '1' values will be null/unused.
+ */
+static void
+lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned img_filter,
+ unsigned mip_filter,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ LLVMValueRef lod_fpart,
+ LLVMValueRef width0_vec,
+ LLVMValueRef width1_vec,
+ LLVMValueRef height0_vec,
+ LLVMValueRef height1_vec,
+ LLVMValueRef depth0_vec,
+ LLVMValueRef depth1_vec,
+ LLVMValueRef row_stride0_vec,
+ LLVMValueRef row_stride1_vec,
+ LLVMValueRef img_stride0_vec,
+ LLVMValueRef img_stride1_vec,
+ LLVMValueRef data_ptr0,
+ LLVMValueRef data_ptr1,
+ LLVMValueRef *colors_lo,
+ LLVMValueRef *colors_hi)
+{
+ LLVMValueRef colors0_lo, colors0_hi;
+ LLVMValueRef colors1_lo, colors1_hi;
+
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* sample the first mipmap level */
+ lp_build_sample_image_nearest(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r,
+ &colors0_lo, &colors0_hi);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level */
+ lp_build_sample_image_nearest(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+ }
+ else {
+ assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+
+ /* sample the first mipmap level */
+ lp_build_sample_image_linear(bld,
+ width0_vec, height0_vec, depth0_vec,
+ row_stride0_vec, img_stride0_vec,
+ data_ptr0, s, t, r,
+ &colors0_lo, &colors0_hi);
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* sample the second mipmap level */
+ lp_build_sample_image_linear(bld,
+ width1_vec, height1_vec, depth1_vec,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ /* interpolate samples from the two mipmap levels */
+ struct lp_build_context h16;
+ lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
+
+ *colors_lo = lp_build_lerp(&h16, lod_fpart,
+ colors0_lo, colors1_lo);
+ *colors_hi = lp_build_lerp(&h16, lod_fpart,
+ colors0_hi, colors1_hi);
+ }
+ else {
+ /* use first/only level's colors */
+ *colors_lo = colors0_lo;
+ *colors_hi = colors0_hi;
+ }
+}
+
+
+
+/**
+ * Texture sampling in AoS format. Used when sampling common 32-bit/texel
+ * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
+ * but only limited texture coord wrap modes.
+ */
+void
+lp_build_sample_aos(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_array,
+ LLVMValueRef data_array,
+ LLVMValueRef texel_out[4])
+{
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMBuilderRef builder = bld->builder;
+ const unsigned mip_filter = bld->static_state->min_mip_filter;
+ const unsigned min_filter = bld->static_state->min_img_filter;
+ const unsigned mag_filter = bld->static_state->mag_img_filter;
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL;
+ LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
+ LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
+ LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
+ LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
+ LLVMValueRef data_ptr0, data_ptr1 = NULL;
+ LLVMValueRef packed, packed_lo, packed_hi;
+ LLVMValueRef unswizzled[4];
+ LLVMValueRef face_ddx[4], face_ddy[4];
+ struct lp_build_context h16;
+ LLVMTypeRef h16_vec_type;
+
+ /* we only support the common/simple wrap modes at this time */
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
+ if (dims >= 2)
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
+ if (dims >= 3)
+ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
+
+
+ /* make 16-bit fixed-pt builder context */
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ h16_vec_type = lp_build_vec_type(h16.type);
+
+
+ /* cube face selection, compute pre-face coords, etc. */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
+
+ /*
+ * Compute the level of detail (float).
+ */
+ if (min_filter != mag_filter ||
+ mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ /* Need to compute lod either to choose mipmap levels or to
+ * distinguish between minification/magnification with one mipmap level.
+ */
+ lod = lp_build_lod_selector(bld, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth);
+ }
+
+ /*
+ * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
+ * If mipfilter=linear, also compute the weight between the two
+ * mipmap levels: lod_fpart
+ */
+ switch (mip_filter) {
+ default:
+ assert(0 && "bad mip_filter value in lp_build_sample_aos()");
+ /* fall-through */
+ case PIPE_TEX_MIPFILTER_NONE:
+ /* always use mip level 0 */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ assert(lod);
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ {
+ LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
+ LLVMValueRef i255 = lp_build_const_int32(255);
+ LLVMTypeRef i16_type = LLVMIntType(16);
+
+ assert(lod);
+
+ lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
+ &lod_fpart);
+ lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
+ lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
+ lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
+ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
+ lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
+
+ /* the lod_fpart values will be fixed pt values in [0,1) */
+ }
+ break;
+ }
+
+ /* compute image size(s) of source mipmap level(s) */
+ lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
+ ilevel0, ilevel1,
+ row_stride_array, img_stride_array,
+ &width0_vec, &width1_vec,
+ &height0_vec, &height1_vec,
+ &depth0_vec, &depth1_vec,
+ &row_stride0_vec, &row_stride1_vec,
+ &img_stride0_vec, &img_stride1_vec);
+
+ /*
+ * Get pointer(s) to image data for mipmap level(s).
+ */
+ data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+ }
+
+
+ /*
+ * Get/interpolate texture colors.
+ */
+ if (min_filter == mag_filter) {
+ /* no need to distinquish between minification and magnification */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ &packed_lo, &packed_hi);
+ }
+ else {
+ /* Emit conditional to choose min image filter or mag image filter
+ * depending on the lod being > 0 or <= 0, respectively.
+ */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef minify;
+
+ flow_ctx = lp_build_flow_create(builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ packed_lo = LLVMGetUndef(h16_vec_type);
+ packed_hi = LLVMGetUndef(h16_vec_type);
+
+ lp_build_flow_scope_declare(flow_ctx, &packed_lo);
+ lp_build_flow_scope_declare(flow_ctx, &packed_hi);
+
+ /* minify = lod > 0.0 */
+ minify = LLVMBuildFCmp(builder, LLVMRealUGE,
+ lod, float_bld->zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, builder, minify);
+ {
+ /* Use the minification filter */
+ lp_build_sample_mipmap(bld, min_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ &packed_lo, &packed_hi);
+ }
+ lp_build_else(&if_ctx);
+ {
+ /* Use the magnification filter */
+ lp_build_sample_mipmap(bld, mag_filter, mip_filter,
+ s, t, r, lod_fpart,
+ width0_vec, width1_vec,
+ height0_vec, height1_vec,
+ depth0_vec, depth1_vec,
+ row_stride0_vec, row_stride1_vec,
+ img_stride0_vec, img_stride1_vec,
+ data_ptr0, data_ptr1,
+ &packed_lo, &packed_hi);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+ }
+
+ /* combine 'packed_lo', 'packed_hi' into 'packed' */
+ {
+ struct lp_build_context h16, u8n;
+
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ packed = lp_build_pack2(builder, h16.type, u8n.type,
+ packed_lo, packed_hi);
+ }
+
+ /*
+ * Convert to SoA and swizzle.
+ */
+ lp_build_rgba8_to_f32_soa(builder,
+ bld->texel_type,
+ packed, unswizzled);
+
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ lp_build_format_swizzle_soa(bld->format_desc,
+ &bld->texel_bld,
+ unswizzled, texel_out);
+ }
+ else {
+ texel_out[0] = unswizzled[0];
+ texel_out[1] = unswizzled[1];
+ texel_out[2] = unswizzled[2];
+ texel_out[3] = unswizzled[3];
+ }
+
+ apply_sampler_swizzle(bld, texel_out);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
new file mode 100644
index 0000000000..e1045bbbc2
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- SoA.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_AOS_H
+#define LP_BLD_SAMPLE_AOS_H
+
+
+#include "lp_bld_sample.h"
+
+
+void
+lp_build_sample_aos(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef depth,
+ LLVMValueRef width_vec,
+ LLVMValueRef height_vec,
+ LLVMValueRef depth_vec,
+ LLVMValueRef row_stride_array,
+ LLVMValueRef img_stride_array,
+ LLVMValueRef data_array,
+ LLVMValueRef texel_out[4]);
+
+
+#endif /* LP_BLD_SAMPLE_AOS_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index baf0402f56..f61f23efd1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -52,49 +53,11 @@
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
+#include "lp_bld_sample_aos.h"
#include "lp_bld_quad.h"
/**
- * Keep all information for sampling code generation in a single place.
- */
-struct lp_build_sample_context
-{
- LLVMBuilderRef builder;
-
- const struct lp_sampler_static_state *static_state;
-
- struct lp_sampler_dynamic_state *dynamic_state;
-
- const struct util_format_description *format_desc;
-
- /** regular scalar float type */
- struct lp_type float_type;
- struct lp_build_context float_bld;
-
- /** regular scalar float type */
- struct lp_type int_type;
- struct lp_build_context int_bld;
-
- /** Incoming coordinates type and build context */
- struct lp_type coord_type;
- struct lp_build_context coord_bld;
-
- /** Unsigned integer coordinates */
- struct lp_type uint_coord_type;
- struct lp_build_context uint_coord_bld;
-
- /** Signed integer coordinates */
- struct lp_type int_coord_type;
- struct lp_build_context int_coord_bld;
-
- /** Output texels type and build context */
- struct lp_type texel_type;
- struct lp_build_context texel_bld;
-};
-
-
-/**
* Does the given texture wrap mode allow sampling the texture border color?
* XXX maybe move this into gallium util code.
*/
@@ -119,95 +82,10 @@ wrap_mode_uses_border_color(unsigned mode)
}
-static LLVMValueRef
-lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], data_ptr;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
- data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
- return data_ptr;
-}
-
-
-static LLVMValueRef
-lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_mipmap_level(bld, data_array, lvl);
-}
-
-
-/**
- * Dereference stride_array[mipmap_level] array to get a stride.
- * Return stride as a vector.
- */
-static LLVMValueRef
-lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], stride;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
- stride = LLVMBuildLoad(bld->builder, stride, "");
- stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
- return stride;
-}
-
-
-/** Dereference stride_array[0] array to get a stride (as vector). */
-static LLVMValueRef
-lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_level_stride_vec(bld, stride_array, lvl);
-}
-
-
-static int
-texture_dims(enum pipe_texture_target tex)
-{
- switch (tex) {
- case PIPE_TEXTURE_1D:
- return 1;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_CUBE:
- return 2;
- case PIPE_TEXTURE_3D:
- return 3;
- default:
- assert(0 && "bad texture target in texture_dims()");
- return 2;
- }
-}
-
-
-static void
-apply_sampler_swizzle(struct lp_build_sample_context *bld,
- LLVMValueRef *texel)
-{
- unsigned char swizzles[4];
-
- swizzles[0] = bld->static_state->swizzle_r;
- swizzles[1] = bld->static_state->swizzle_g;
- swizzles[2] = bld->static_state->swizzle_b;
- swizzles[3] = bld->static_state->swizzle_a;
-
- lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
-}
-
-
-
/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
* The computation depends on whether the texture is 1D, 2D or 3D.
- * The result, texel, will be:
+ * The result, texel, will be float vectors:
* texel[0] = red values
* texel[1] = green values
* texel[2] = blue values
@@ -356,204 +234,6 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
- * Return whether the given mode is supported by that function.
- */
-static boolean
-is_simple_wrap_mode(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-
-/**
- * Build LLVM code for texture wrap mode, for scaled integer texcoords.
- * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
- * \param length the texture size along one dimension
- * \param is_pot if TRUE, length is a power of two
- * \param wrap_mode one of PIPE_TEX_WRAP_x
- * \param i0 resulting sub-block pixel coordinate for coord0
- */
-static void
-lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
- unsigned block_length,
- LLVMValueRef coord,
- LLVMValueRef length,
- LLVMValueRef stride,
- boolean is_pot,
- unsigned wrap_mode,
- LLVMValueRef *out_offset,
- LLVMValueRef *out_i)
-{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef length_minus_one;
-
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
-
- switch(wrap_mode) {
- case PIPE_TEX_WRAP_REPEAT:
- if(is_pot)
- coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
- coord = LLVMBuildURem(bld->builder, coord, length, "");
- break;
-
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
- coord = lp_build_min(int_coord_bld, coord, length_minus_one);
- break;
-
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(0);
- }
-
- lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
- out_offset, out_i);
-}
-
-
-/**
- * Build LLVM code for texture wrap mode, for scaled integer texcoords.
- * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
- * \param length the texture size along one dimension
- * \param stride pixel stride along the coordinate axis
- * \param block_length is the length of the pixel block along the
- * coordinate axis
- * \param is_pot if TRUE, length is a power of two
- * \param wrap_mode one of PIPE_TEX_WRAP_x
- * \param offset0 resulting relative offset for coord0
- * \param offset1 resulting relative offset for coord0 + 1
- * \param i0 resulting sub-block pixel coordinate for coord0
- * \param i1 resulting sub-block pixel coordinate for coord0 + 1
- */
-static void
-lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
- unsigned block_length,
- LLVMValueRef coord0,
- LLVMValueRef length,
- LLVMValueRef stride,
- boolean is_pot,
- unsigned wrap_mode,
- LLVMValueRef *offset0,
- LLVMValueRef *offset1,
- LLVMValueRef *i0,
- LLVMValueRef *i1)
-{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef length_minus_one;
- LLVMValueRef lmask, umask, mask;
-
- if (block_length != 1) {
- /*
- * If the pixel block covers more than one pixel then there is no easy
- * way to calculate offset1 relative to offset0. Instead, compute them
- * independently.
- */
-
- LLVMValueRef coord1;
-
- lp_build_sample_wrap_nearest_int(bld,
- block_length,
- coord0,
- length,
- stride,
- is_pot,
- wrap_mode,
- offset0, i0);
-
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
-
- lp_build_sample_wrap_nearest_int(bld,
- block_length,
- coord1,
- length,
- stride,
- is_pot,
- wrap_mode,
- offset1, i1);
-
- return;
- }
-
- /*
- * Scalar pixels -- try to compute offset0 and offset1 with a single stride
- * multiplication.
- */
-
- *i0 = uint_coord_bld->zero;
- *i1 = uint_coord_bld->zero;
-
- length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
-
- switch(wrap_mode) {
- case PIPE_TEX_WRAP_REPEAT:
- if (is_pot) {
- coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
- }
- else {
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
- coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
- }
-
- mask = lp_build_compare(bld->builder, int_coord_bld->type,
- PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
-
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = LLVMBuildAnd(bld->builder,
- lp_build_add(uint_coord_bld, *offset0, stride),
- mask, "");
- break;
-
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
- PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
- umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
- PIPE_FUNC_LESS, coord0, length_minus_one);
-
- coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
- coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
-
- mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
-
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = lp_build_add(uint_coord_bld,
- *offset0,
- LLVMBuildAnd(bld->builder, stride, mask, ""));
- break;
-
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(0);
- *offset0 = uint_coord_bld->zero;
- *offset1 = uint_coord_bld->zero;
- break;
- }
-}
-
-
-/**
* Build LLVM code for texture wrap mode for linear filtering.
* \param x0_out returns first integer texcoord
* \param x1_out returns second integer texcoord
@@ -765,7 +445,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/**
* Build LLVM code for texture wrap mode for nearest filtering.
* \param coord the incoming texcoord (nominally in [0,1])
- * \param length the texture size along one dimension, as int
+ * \param length the texture size along one dimension, as int vector
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
*/
@@ -882,198 +562,6 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
- * Codegen equivalent for u_minify().
- * Return max(1, base_size >> level);
- */
-static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
- LLVMValueRef base_size,
- LLVMValueRef level)
-{
- LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
- size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
- return size;
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param ddx partial derivatives of (s, t, r, q) with respect to X
- * \param ddy partial derivatives of (s, t, r, q) with respect to Y
- * \param lod_bias optional float vector with the shader lod bias
- * \param explicit_lod optional float vector with the explicit lod
- * \param width scalar int texture width
- * \param height scalar int texture height
- * \param depth scalar int texture depth
- *
- * XXX: The resulting lod is scalar, so ignore all but the first element of
- * derivatives, lod_bias, etc that are passed by the shader.
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
- const LLVMValueRef ddx[4],
- const LLVMValueRef ddy[4],
- LLVMValueRef lod_bias, /* optional */
- LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
-
-{
- if (bld->static_state->min_lod == bld->static_state->max_lod) {
- /* User is forcing sampling from a particular mipmap level.
- * This is hit during mipmap generation.
- */
- return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
- }
- else {
- struct lp_build_context *float_bld = &bld->float_bld;
- LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
- bld->static_state->lod_bias);
- LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->min_lod);
- LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->max_lod);
- LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef lod;
-
- if (explicit_lod) {
- lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
- index0, "");
- }
- else {
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef dsdx, dsdy;
- LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
- LLVMValueRef rho;
-
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
-
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
- }
- }
-
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
-
- /* add shader lod bias */
- if (lod_bias) {
- lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
- index0, "");
- lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
- }
- }
-
- /* add sampler lod bias */
- lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
-
- /* clamp lod */
- lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
-
- return lod;
- }
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * Note: this is all scalar code.
- * \param lod scalar float texture level of detail
- * \param level_out returns integer
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
-
- /* clamp level to legal range of levels */
- *level_out = lp_build_clamp(int_bld, level, zero, last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes. Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- level = lp_build_add(int_bld, level, int_bld->one);
- *level1_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
-}
-
-
-/**
* Generate code to sample a mipmap level with nearest filtering.
* If sampling a cube texture, r = cube face in [0,5].
*/
@@ -1291,207 +779,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
-/** Helper used by lp_build_cube_lookup() */
-static LLVMValueRef
-lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
-{
- /* ima = -0.5 / abs(coord); */
- LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
- LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
- return ima;
-}
-
-
-/**
- * Helper used by lp_build_cube_lookup()
- * \param sign scalar +1 or -1
- * \param coord float vector
- * \param ima float vector
- */
-static LLVMValueRef
-lp_build_cube_coord(struct lp_build_context *coord_bld,
- LLVMValueRef sign, int negate_coord,
- LLVMValueRef coord, LLVMValueRef ima)
-{
- /* return negate(coord) * ima * sign + 0.5; */
- LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
- LLVMValueRef res;
-
- assert(negate_coord == +1 || negate_coord == -1);
-
- if (negate_coord == -1) {
- coord = lp_build_negate(coord_bld, coord);
- }
-
- res = lp_build_mul(coord_bld, coord, ima);
- if (sign) {
- sign = lp_build_broadcast_scalar(coord_bld, sign);
- res = lp_build_mul(coord_bld, res, sign);
- }
- res = lp_build_add(coord_bld, res, half);
-
- return res;
-}
-
-
-/** Helper used by lp_build_cube_lookup()
- * Return (major_coord >= 0) ? pos_face : neg_face;
- */
-static LLVMValueRef
-lp_build_cube_face(struct lp_build_sample_context *bld,
- LLVMValueRef major_coord,
- unsigned pos_face, unsigned neg_face)
-{
- LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- major_coord,
- bld->float_bld.zero, "");
- LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
- LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
- LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
- return res;
-}
-
-
-
-/**
- * Generate code to do cube face selection and compute per-face texcoords.
- */
-static void
-lp_build_cube_lookup(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- LLVMValueRef *face,
- LLVMValueRef *face_s,
- LLVMValueRef *face_t)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *coord_bld = &bld->coord_bld;
- LLVMValueRef rx, ry, rz;
- LLVMValueRef arx, ary, arz;
- LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
- LLVMValueRef arx_ge_ary, arx_ge_arz;
- LLVMValueRef ary_ge_arx, ary_ge_arz;
- LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
- LLVMValueRef rx_pos, ry_pos, rz_pos;
-
- assert(bld->coord_bld.type.length == 4);
-
- /*
- * Use the average of the four pixel's texcoords to choose the face.
- */
- rx = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, s));
- ry = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, t));
- rz = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, r));
-
- arx = lp_build_abs(float_bld, rx);
- ary = lp_build_abs(float_bld, ry);
- arz = lp_build_abs(float_bld, rz);
-
- /*
- * Compare sign/magnitude of rx,ry,rz to determine face
- */
- arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
- arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
- ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
- ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
-
- arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
- ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
- rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
-
- {
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
-
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- *face_s = bld->coord_bld.undef;
- *face_t = bld->coord_bld.undef;
- *face = bld->int_bld.undef;
-
- lp_build_name(*face_s, "face_s");
- lp_build_name(*face_t, "face_t");
- lp_build_name(*face, "face");
-
- lp_build_flow_scope_declare(flow_ctx, face_s);
- lp_build_flow_scope_declare(flow_ctx, face_t);
- lp_build_flow_scope_declare(flow_ctx, face);
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
- {
- /* +/- X face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rx);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
- *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
- *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- *face = lp_build_cube_face(bld, rx,
- PIPE_TEX_FACE_POS_X,
- PIPE_TEX_FACE_NEG_X);
- }
- lp_build_else(&if_ctx);
- {
- struct lp_build_flow_context *flow_ctx2;
- struct lp_build_if_state if_ctx2;
-
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
- {
- /* +/- Y face */
- LLVMValueRef sign = lp_build_sgn(float_bld, ry);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
- face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
- face2 = lp_build_cube_face(bld, ry,
- PIPE_TEX_FACE_POS_Y,
- PIPE_TEX_FACE_NEG_Y);
- }
- lp_build_else(&if_ctx2);
- {
- /* +/- Z face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rz);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
- face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- face2 = lp_build_cube_face(bld, rz,
- PIPE_TEX_FACE_POS_Z,
- PIPE_TEX_FACE_NEG_Z);
- }
- lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
- *face_s = face_s2;
- *face_t = face_t2;
- *face = face2;
- }
-
- lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
- }
-}
-
-
-
/**
* Sample the texture/mipmap using given image filter and mip filter.
* data0_ptr and data1_ptr point to the two mipmap levels to sample
@@ -1605,7 +892,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef lod = NULL, lod_fpart = NULL;
- LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
@@ -1685,47 +972,15 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
}
- /*
- * Convert scalar integer mipmap levels into vectors.
- */
- ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
-
- /*
- * Compute width, height at mipmap level 'ilevel0'
- */
- width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
- if (dims >= 2) {
- height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
- row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel0);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride0_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel0);
- if (dims == 3) {
- depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
- }
- }
- }
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* compute width, height, depth for second mipmap level at 'ilevel1' */
- width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
- if (dims >= 2) {
- height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
- row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel1);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride1_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel1);
- if (dims ==3) {
- depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
- }
- }
- }
- }
+ /* compute image size(s) of source mipmap level(s) */
+ lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
+ ilevel0, ilevel1,
+ row_stride_array, img_stride_array,
+ &width0_vec, &width1_vec,
+ &height0_vec, &height1_vec,
+ &depth0_vec, &depth1_vec,
+ &row_stride0_vec, &row_stride1_vec,
+ &img_stride0_vec, &img_stride1_vec);
/*
* Get pointer(s) to image data for mipmap level(s).
@@ -1803,258 +1058,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
-
-static void
-lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride_array,
- LLVMValueRef data_array,
- LLVMValueRef texel_out[4])
-{
- LLVMBuilderRef builder = bld->builder;
- struct lp_build_context i32, h16, u8n;
- LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
- LLVMValueRef i32_c8, i32_c128, i32_c255;
- LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
- LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef data_ptr;
- LLVMValueRef x_stride, y_stride;
- LLVMValueRef x_offset0, x_offset1;
- LLVMValueRef y_offset0, y_offset1;
- LLVMValueRef offset[2][2];
- LLVMValueRef x_subcoord[2], y_subcoord[2];
- LLVMValueRef neighbors_lo[2][2];
- LLVMValueRef neighbors_hi[2][2];
- LLVMValueRef packed, packed_lo, packed_hi;
- LLVMValueRef unswizzled[4];
- const unsigned level = 0;
- unsigned i, j;
-
- assert(bld->static_state->target == PIPE_TEXTURE_2D
- || bld->static_state->target == PIPE_TEXTURE_RECT);
- assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
- assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
- assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
-
- lp_build_context_init(&i32, builder, lp_type_int_vec(32));
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- lp_build_context_init(&u8n, builder, lp_type_unorm(8));
-
- i32_vec_type = lp_build_vec_type(i32.type);
- h16_vec_type = lp_build_vec_type(h16.type);
- u8n_vec_type = lp_build_vec_type(u8n.type);
-
- if (bld->static_state->normalized_coords) {
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- }
-
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
-
- /* convert float to int */
- s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
- t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
-
- /* subtract 0.5 (add -128) */
- i32_c128 = lp_build_const_int_vec(i32.type, -128);
- s = LLVMBuildAdd(builder, s, i32_c128, "");
- t = LLVMBuildAdd(builder, t, i32_c128, "");
-
- /* compute floor (shift right 8) */
- i32_c8 = lp_build_const_int_vec(i32.type, 8);
- s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
- t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
-
- /* compute fractional part (AND with 0xff) */
- i32_c255 = lp_build_const_int_vec(i32.type, 255);
- s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
- t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
-
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
- bld->format_desc->block.bits/8);
-
- y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
-
- lp_build_sample_wrap_linear_int(bld,
- bld->format_desc->block.width,
- s_ipart, width, x_stride,
- bld->static_state->pot_width,
- bld->static_state->wrap_s,
- &x_offset0, &x_offset1,
- &x_subcoord[0], &x_subcoord[1]);
- lp_build_sample_wrap_linear_int(bld,
- bld->format_desc->block.height,
- t_ipart, height, y_stride,
- bld->static_state->pot_height,
- bld->static_state->wrap_t,
- &y_offset0, &y_offset1,
- &y_subcoord[0], &y_subcoord[1]);
-
- offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
- offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
- offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
- offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
-
- /*
- * Transform 4 x i32 in
- *
- * s_fpart = {s0, s1, s2, s3}
- *
- * into 8 x i16
- *
- * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
- *
- * into two 8 x i16
- *
- * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
- * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
- *
- * and likewise for t_fpart. There is no risk of loosing precision here
- * since the fractional parts only use the lower 8bits.
- */
-
- s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
- t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
-
- {
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
-
- for(j = 0; j < h16.type.length; j += 4) {
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
- unsigned subindex = 0;
-#else
- unsigned subindex = 1;
-#endif
- LLVMValueRef index;
-
- index = LLVMConstInt(elem_type, j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_lo[j + i] = index;
-
- index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_hi[j + i] = index;
- }
-
- shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
- shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
-
- s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
- t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
- s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
- t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
- }
-
- /*
- * get pointer to mipmap level 0 data
- */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
-
- /*
- * Fetch the pixels as 4 x 32bit (rgba order might differ):
- *
- * rgba0 rgba1 rgba2 rgba3
- *
- * bit cast them into 16 x u8
- *
- * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
- *
- * unpack them into two 8 x i16:
- *
- * r0 g0 b0 a0 r1 g1 b1 a1
- * r2 g2 b2 a2 r3 g3 b3 a3
- *
- * The higher 8 bits of the resulting elements will be zero.
- */
-
- for (j = 0; j < 2; ++j) {
- for (i = 0; i < 2; ++i) {
- LLVMValueRef rgba8;
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- /*
- * Given the format is a rgba8, just read the pixels as is,
- * without any swizzling. Swizzling will be done later.
- */
- rgba8 = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset[j][i]);
-
- rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
-
- }
- else {
- rgba8 = lp_build_fetch_rgba_aos(bld->builder,
- bld->format_desc,
- u8n.type,
- data_ptr, offset[j][i],
- x_subcoord[i],
- y_subcoord[j]);
- }
-
- lp_build_unpack2(builder, u8n.type, h16.type,
- rgba8,
- &neighbors_lo[j][i], &neighbors_hi[j][i]);
- }
- }
-
- /*
- * Linear interpolate with 8.8 fixed point.
- */
-
- packed_lo = lp_build_lerp_2d(&h16,
- s_fpart_lo, t_fpart_lo,
- neighbors_lo[0][0],
- neighbors_lo[0][1],
- neighbors_lo[1][0],
- neighbors_lo[1][1]);
-
- packed_hi = lp_build_lerp_2d(&h16,
- s_fpart_hi, t_fpart_hi,
- neighbors_hi[0][0],
- neighbors_hi[0][1],
- neighbors_hi[1][0],
- neighbors_hi[1][1]);
-
- packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
-
- /*
- * Convert to SoA and swizzle.
- */
-
- lp_build_rgba8_to_f32_soa(bld->builder,
- bld->texel_type,
- packed, unswizzled);
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- lp_build_format_swizzle_soa(bld->format_desc,
- &bld->texel_bld,
- unswizzled, texel_out);
- } else {
- texel_out[0] = unswizzled[0];
- texel_out[1] = unswizzled[1];
- texel_out[2] = unswizzled[2];
- texel_out[3] = unswizzled[3];
- }
-
- apply_sampler_swizzle(bld, texel_out);
-}
-
-
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
@@ -2181,6 +1184,7 @@ lp_build_sample_soa(LLVMBuilderRef builder,
t = coords[1];
r = coords[2];
+ /* width, height, depth as uint vectors */
width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
@@ -2190,27 +1194,32 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_nop(&bld, texel_out);
}
else if (util_format_fits_8unorm(bld.format_desc) &&
- (static_state->target == PIPE_TEXTURE_2D ||
- static_state->target == PIPE_TEXTURE_RECT) &&
- static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t)) {
- /* special case */
- lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
- row_stride_array, data_array, texel_out);
+ lp_is_simple_wrap_mode(static_state->wrap_s) &&
+ lp_is_simple_wrap_mode(static_state->wrap_t)) {
+ /* do sampling/filtering with fixed pt arithmetic */
+ printf("new sample\n");
+ lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth,
+ width_vec, height_vec, depth_vec,
+ row_stride_array, img_stride_array,
+ data_array, texel_out);
}
+
else {
- if (gallivm_debug & GALLIVM_DEBUG_PERF &&
- (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
- static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST ||
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) &&
+ if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
util_format_fits_8unorm(bld.format_desc)) {
debug_printf("%s: using floating point linear filtering for %s\n",
__FUNCTION__, bld.format_desc->short_name);
+ debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n",
+ static_state->min_img_filter,
+ static_state->mag_img_filter,
+ static_state->min_mip_filter,
+ static_state->wrap_s,
+ static_state->wrap_t);
}
+ printf("old sample\n");
lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth,