summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
diff options
context:
space:
mode:
authorBrian Paul <brianp@vmware.com>2010-09-15 17:04:26 -0600
committerBrian Paul <brianp@vmware.com>2010-09-15 17:04:31 -0600
commit0a7824862eb753878fa79b153b2a111884ff1197 (patch)
tree103c33d4cadc6fe323c810c81be91616c3a0019b /src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
parent95254bbd2ddf0c6207a642604cc218ac9d711501 (diff)
gallivm: expand AoS sampling to cover all filtering modes
...and all texture targets (1D/2D/3D/CUBE).
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c1059
1 files changed, 34 insertions, 1025 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index baf0402f56..f61f23efd1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -40,6 +40,7 @@
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -52,49 +53,11 @@
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
+#include "lp_bld_sample_aos.h"
#include "lp_bld_quad.h"
/**
- * Keep all information for sampling code generation in a single place.
- */
-struct lp_build_sample_context
-{
- LLVMBuilderRef builder;
-
- const struct lp_sampler_static_state *static_state;
-
- struct lp_sampler_dynamic_state *dynamic_state;
-
- const struct util_format_description *format_desc;
-
- /** regular scalar float type */
- struct lp_type float_type;
- struct lp_build_context float_bld;
-
- /** regular scalar float type */
- struct lp_type int_type;
- struct lp_build_context int_bld;
-
- /** Incoming coordinates type and build context */
- struct lp_type coord_type;
- struct lp_build_context coord_bld;
-
- /** Unsigned integer coordinates */
- struct lp_type uint_coord_type;
- struct lp_build_context uint_coord_bld;
-
- /** Signed integer coordinates */
- struct lp_type int_coord_type;
- struct lp_build_context int_coord_bld;
-
- /** Output texels type and build context */
- struct lp_type texel_type;
- struct lp_build_context texel_bld;
-};
-
-
-/**
* Does the given texture wrap mode allow sampling the texture border color?
* XXX maybe move this into gallium util code.
*/
@@ -119,95 +82,10 @@ wrap_mode_uses_border_color(unsigned mode)
}
-static LLVMValueRef
-lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], data_ptr;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
- data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
- return data_ptr;
-}
-
-
-static LLVMValueRef
-lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_mipmap_level(bld, data_array, lvl);
-}
-
-
-/**
- * Dereference stride_array[mipmap_level] array to get a stride.
- * Return stride as a vector.
- */
-static LLVMValueRef
-lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, LLVMValueRef level)
-{
- LLVMValueRef indexes[2], stride;
- indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indexes[1] = level;
- stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
- stride = LLVMBuildLoad(bld->builder, stride, "");
- stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
- return stride;
-}
-
-
-/** Dereference stride_array[0] array to get a stride (as vector). */
-static LLVMValueRef
-lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
- LLVMValueRef stride_array, int level)
-{
- LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_level_stride_vec(bld, stride_array, lvl);
-}
-
-
-static int
-texture_dims(enum pipe_texture_target tex)
-{
- switch (tex) {
- case PIPE_TEXTURE_1D:
- return 1;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_CUBE:
- return 2;
- case PIPE_TEXTURE_3D:
- return 3;
- default:
- assert(0 && "bad texture target in texture_dims()");
- return 2;
- }
-}
-
-
-static void
-apply_sampler_swizzle(struct lp_build_sample_context *bld,
- LLVMValueRef *texel)
-{
- unsigned char swizzles[4];
-
- swizzles[0] = bld->static_state->swizzle_r;
- swizzles[1] = bld->static_state->swizzle_g;
- swizzles[2] = bld->static_state->swizzle_b;
- swizzles[3] = bld->static_state->swizzle_a;
-
- lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
-}
-
-
-
/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
* The computation depends on whether the texture is 1D, 2D or 3D.
- * The result, texel, will be:
+ * The result, texel, will be float vectors:
* texel[0] = red values
* texel[1] = green values
* texel[2] = blue values
@@ -356,204 +234,6 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
- * Return whether the given mode is supported by that function.
- */
-static boolean
-is_simple_wrap_mode(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-
-/**
- * Build LLVM code for texture wrap mode, for scaled integer texcoords.
- * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
- * \param length the texture size along one dimension
- * \param is_pot if TRUE, length is a power of two
- * \param wrap_mode one of PIPE_TEX_WRAP_x
- * \param i0 resulting sub-block pixel coordinate for coord0
- */
-static void
-lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
- unsigned block_length,
- LLVMValueRef coord,
- LLVMValueRef length,
- LLVMValueRef stride,
- boolean is_pot,
- unsigned wrap_mode,
- LLVMValueRef *out_offset,
- LLVMValueRef *out_i)
-{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef length_minus_one;
-
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
-
- switch(wrap_mode) {
- case PIPE_TEX_WRAP_REPEAT:
- if(is_pot)
- coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
- coord = LLVMBuildURem(bld->builder, coord, length, "");
- break;
-
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
- coord = lp_build_min(int_coord_bld, coord, length_minus_one);
- break;
-
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(0);
- }
-
- lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
- out_offset, out_i);
-}
-
-
-/**
- * Build LLVM code for texture wrap mode, for scaled integer texcoords.
- * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
- * \param length the texture size along one dimension
- * \param stride pixel stride along the coordinate axis
- * \param block_length is the length of the pixel block along the
- * coordinate axis
- * \param is_pot if TRUE, length is a power of two
- * \param wrap_mode one of PIPE_TEX_WRAP_x
- * \param offset0 resulting relative offset for coord0
- * \param offset1 resulting relative offset for coord0 + 1
- * \param i0 resulting sub-block pixel coordinate for coord0
- * \param i1 resulting sub-block pixel coordinate for coord0 + 1
- */
-static void
-lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
- unsigned block_length,
- LLVMValueRef coord0,
- LLVMValueRef length,
- LLVMValueRef stride,
- boolean is_pot,
- unsigned wrap_mode,
- LLVMValueRef *offset0,
- LLVMValueRef *offset1,
- LLVMValueRef *i0,
- LLVMValueRef *i1)
-{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef length_minus_one;
- LLVMValueRef lmask, umask, mask;
-
- if (block_length != 1) {
- /*
- * If the pixel block covers more than one pixel then there is no easy
- * way to calculate offset1 relative to offset0. Instead, compute them
- * independently.
- */
-
- LLVMValueRef coord1;
-
- lp_build_sample_wrap_nearest_int(bld,
- block_length,
- coord0,
- length,
- stride,
- is_pot,
- wrap_mode,
- offset0, i0);
-
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
-
- lp_build_sample_wrap_nearest_int(bld,
- block_length,
- coord1,
- length,
- stride,
- is_pot,
- wrap_mode,
- offset1, i1);
-
- return;
- }
-
- /*
- * Scalar pixels -- try to compute offset0 and offset1 with a single stride
- * multiplication.
- */
-
- *i0 = uint_coord_bld->zero;
- *i1 = uint_coord_bld->zero;
-
- length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
-
- switch(wrap_mode) {
- case PIPE_TEX_WRAP_REPEAT:
- if (is_pot) {
- coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
- }
- else {
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
- coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
- }
-
- mask = lp_build_compare(bld->builder, int_coord_bld->type,
- PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
-
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = LLVMBuildAnd(bld->builder,
- lp_build_add(uint_coord_bld, *offset0, stride),
- mask, "");
- break;
-
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
- PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
- umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
- PIPE_FUNC_LESS, coord0, length_minus_one);
-
- coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
- coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
-
- mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
-
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = lp_build_add(uint_coord_bld,
- *offset0,
- LLVMBuildAnd(bld->builder, stride, mask, ""));
- break;
-
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(0);
- *offset0 = uint_coord_bld->zero;
- *offset1 = uint_coord_bld->zero;
- break;
- }
-}
-
-
-/**
* Build LLVM code for texture wrap mode for linear filtering.
* \param x0_out returns first integer texcoord
* \param x1_out returns second integer texcoord
@@ -765,7 +445,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/**
* Build LLVM code for texture wrap mode for nearest filtering.
* \param coord the incoming texcoord (nominally in [0,1])
- * \param length the texture size along one dimension, as int
+ * \param length the texture size along one dimension, as int vector
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
*/
@@ -882,198 +562,6 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
/**
- * Codegen equivalent for u_minify().
- * Return max(1, base_size >> level);
- */
-static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
- LLVMValueRef base_size,
- LLVMValueRef level)
-{
- LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
- size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
- return size;
-}
-
-
-/**
- * Generate code to compute texture level of detail (lambda).
- * \param ddx partial derivatives of (s, t, r, q) with respect to X
- * \param ddy partial derivatives of (s, t, r, q) with respect to Y
- * \param lod_bias optional float vector with the shader lod bias
- * \param explicit_lod optional float vector with the explicit lod
- * \param width scalar int texture width
- * \param height scalar int texture height
- * \param depth scalar int texture depth
- *
- * XXX: The resulting lod is scalar, so ignore all but the first element of
- * derivatives, lod_bias, etc that are passed by the shader.
- */
-static LLVMValueRef
-lp_build_lod_selector(struct lp_build_sample_context *bld,
- const LLVMValueRef ddx[4],
- const LLVMValueRef ddy[4],
- LLVMValueRef lod_bias, /* optional */
- LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
-
-{
- if (bld->static_state->min_lod == bld->static_state->max_lod) {
- /* User is forcing sampling from a particular mipmap level.
- * This is hit during mipmap generation.
- */
- return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
- }
- else {
- struct lp_build_context *float_bld = &bld->float_bld;
- LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
- bld->static_state->lod_bias);
- LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->min_lod);
- LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->max_lod);
- LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef lod;
-
- if (explicit_lod) {
- lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
- index0, "");
- }
- else {
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef dsdx, dsdy;
- LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
- LLVMValueRef rho;
-
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
-
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
- }
- }
-
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
-
- /* add shader lod bias */
- if (lod_bias) {
- lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
- index0, "");
- lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
- }
- }
-
- /* add sampler lod bias */
- lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
-
- /* clamp lod */
- lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
-
- return lod;
- }
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
- * mipmap level index.
- * Note: this is all scalar code.
- * \param lod scalar float texture level of detail
- * \param level_out returns integer
- */
-static void
-lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
-
- /* clamp level to legal range of levels */
- *level_out = lp_build_clamp(int_bld, level, zero, last_level);
-}
-
-
-/**
- * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
- * two (adjacent) mipmap level indexes. Later, we'll sample from those
- * two mipmap levels and interpolate between them.
- */
-static void
-lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
- unsigned unit,
- LLVMValueRef lod,
- LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
-
- last_level = bld->dynamic_state->last_level(bld->dynamic_state,
- bld->builder, unit);
-
- /* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- level = lp_build_add(int_bld, level, int_bld->one);
- *level1_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
-}
-
-
-/**
* Generate code to sample a mipmap level with nearest filtering.
* If sampling a cube texture, r = cube face in [0,5].
*/
@@ -1291,207 +779,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
}
-/** Helper used by lp_build_cube_lookup() */
-static LLVMValueRef
-lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
-{
- /* ima = -0.5 / abs(coord); */
- LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
- LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
- return ima;
-}
-
-
-/**
- * Helper used by lp_build_cube_lookup()
- * \param sign scalar +1 or -1
- * \param coord float vector
- * \param ima float vector
- */
-static LLVMValueRef
-lp_build_cube_coord(struct lp_build_context *coord_bld,
- LLVMValueRef sign, int negate_coord,
- LLVMValueRef coord, LLVMValueRef ima)
-{
- /* return negate(coord) * ima * sign + 0.5; */
- LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
- LLVMValueRef res;
-
- assert(negate_coord == +1 || negate_coord == -1);
-
- if (negate_coord == -1) {
- coord = lp_build_negate(coord_bld, coord);
- }
-
- res = lp_build_mul(coord_bld, coord, ima);
- if (sign) {
- sign = lp_build_broadcast_scalar(coord_bld, sign);
- res = lp_build_mul(coord_bld, res, sign);
- }
- res = lp_build_add(coord_bld, res, half);
-
- return res;
-}
-
-
-/** Helper used by lp_build_cube_lookup()
- * Return (major_coord >= 0) ? pos_face : neg_face;
- */
-static LLVMValueRef
-lp_build_cube_face(struct lp_build_sample_context *bld,
- LLVMValueRef major_coord,
- unsigned pos_face, unsigned neg_face)
-{
- LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- major_coord,
- bld->float_bld.zero, "");
- LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
- LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
- LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
- return res;
-}
-
-
-
-/**
- * Generate code to do cube face selection and compute per-face texcoords.
- */
-static void
-lp_build_cube_lookup(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- LLVMValueRef *face,
- LLVMValueRef *face_s,
- LLVMValueRef *face_t)
-{
- struct lp_build_context *float_bld = &bld->float_bld;
- struct lp_build_context *coord_bld = &bld->coord_bld;
- LLVMValueRef rx, ry, rz;
- LLVMValueRef arx, ary, arz;
- LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
- LLVMValueRef arx_ge_ary, arx_ge_arz;
- LLVMValueRef ary_ge_arx, ary_ge_arz;
- LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
- LLVMValueRef rx_pos, ry_pos, rz_pos;
-
- assert(bld->coord_bld.type.length == 4);
-
- /*
- * Use the average of the four pixel's texcoords to choose the face.
- */
- rx = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, s));
- ry = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, t));
- rz = lp_build_mul(float_bld, c25,
- lp_build_sum_vector(&bld->coord_bld, r));
-
- arx = lp_build_abs(float_bld, rx);
- ary = lp_build_abs(float_bld, ry);
- arz = lp_build_abs(float_bld, rz);
-
- /*
- * Compare sign/magnitude of rx,ry,rz to determine face
- */
- arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
- arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
- ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
- ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
-
- arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
- ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
- rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
-
- {
- struct lp_build_flow_context *flow_ctx;
- struct lp_build_if_state if_ctx;
-
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- *face_s = bld->coord_bld.undef;
- *face_t = bld->coord_bld.undef;
- *face = bld->int_bld.undef;
-
- lp_build_name(*face_s, "face_s");
- lp_build_name(*face_t, "face_t");
- lp_build_name(*face, "face");
-
- lp_build_flow_scope_declare(flow_ctx, face_s);
- lp_build_flow_scope_declare(flow_ctx, face_t);
- lp_build_flow_scope_declare(flow_ctx, face);
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
- {
- /* +/- X face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rx);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
- *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
- *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- *face = lp_build_cube_face(bld, rx,
- PIPE_TEX_FACE_POS_X,
- PIPE_TEX_FACE_NEG_X);
- }
- lp_build_else(&if_ctx);
- {
- struct lp_build_flow_context *flow_ctx2;
- struct lp_build_if_state if_ctx2;
-
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
- ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
-
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
- {
- /* +/- Y face */
- LLVMValueRef sign = lp_build_sgn(float_bld, ry);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
- face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
- face2 = lp_build_cube_face(bld, ry,
- PIPE_TEX_FACE_POS_Y,
- PIPE_TEX_FACE_NEG_Y);
- }
- lp_build_else(&if_ctx2);
- {
- /* +/- Z face */
- LLVMValueRef sign = lp_build_sgn(float_bld, rz);
- LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
- face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- face2 = lp_build_cube_face(bld, rz,
- PIPE_TEX_FACE_POS_Z,
- PIPE_TEX_FACE_NEG_Z);
- }
- lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
- *face_s = face_s2;
- *face_t = face_t2;
- *face = face2;
- }
-
- lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
- }
-}
-
-
-
/**
* Sample the texture/mipmap using given image filter and mip filter.
* data0_ptr and data1_ptr point to the two mipmap levels to sample
@@ -1605,7 +892,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
const unsigned mag_filter = bld->static_state->mag_img_filter;
const int dims = texture_dims(bld->static_state->target);
LLVMValueRef lod = NULL, lod_fpart = NULL;
- LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
+ LLVMValueRef ilevel0, ilevel1 = NULL;
LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
@@ -1685,47 +972,15 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
}
- /*
- * Convert scalar integer mipmap levels into vectors.
- */
- ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
-
- /*
- * Compute width, height at mipmap level 'ilevel0'
- */
- width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
- if (dims >= 2) {
- height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
- row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel0);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride0_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel0);
- if (dims == 3) {
- depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
- }
- }
- }
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* compute width, height, depth for second mipmap level at 'ilevel1' */
- width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
- if (dims >= 2) {
- height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
- row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
- ilevel1);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- img_stride1_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel1);
- if (dims ==3) {
- depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
- }
- }
- }
- }
+ /* compute image size(s) of source mipmap level(s) */
+ lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
+ ilevel0, ilevel1,
+ row_stride_array, img_stride_array,
+ &width0_vec, &width1_vec,
+ &height0_vec, &height1_vec,
+ &depth0_vec, &depth1_vec,
+ &row_stride0_vec, &row_stride1_vec,
+ &img_stride0_vec, &img_stride1_vec);
/*
* Get pointer(s) to image data for mipmap level(s).
@@ -1803,258 +1058,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
-
-static void
-lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef stride_array,
- LLVMValueRef data_array,
- LLVMValueRef texel_out[4])
-{
- LLVMBuilderRef builder = bld->builder;
- struct lp_build_context i32, h16, u8n;
- LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
- LLVMValueRef i32_c8, i32_c128, i32_c255;
- LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
- LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef data_ptr;
- LLVMValueRef x_stride, y_stride;
- LLVMValueRef x_offset0, x_offset1;
- LLVMValueRef y_offset0, y_offset1;
- LLVMValueRef offset[2][2];
- LLVMValueRef x_subcoord[2], y_subcoord[2];
- LLVMValueRef neighbors_lo[2][2];
- LLVMValueRef neighbors_hi[2][2];
- LLVMValueRef packed, packed_lo, packed_hi;
- LLVMValueRef unswizzled[4];
- const unsigned level = 0;
- unsigned i, j;
-
- assert(bld->static_state->target == PIPE_TEXTURE_2D
- || bld->static_state->target == PIPE_TEXTURE_RECT);
- assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
- assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
- assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
-
- lp_build_context_init(&i32, builder, lp_type_int_vec(32));
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- lp_build_context_init(&u8n, builder, lp_type_unorm(8));
-
- i32_vec_type = lp_build_vec_type(i32.type);
- h16_vec_type = lp_build_vec_type(h16.type);
- u8n_vec_type = lp_build_vec_type(u8n.type);
-
- if (bld->static_state->normalized_coords) {
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- }
-
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
-
- /* convert float to int */
- s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
- t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
-
- /* subtract 0.5 (add -128) */
- i32_c128 = lp_build_const_int_vec(i32.type, -128);
- s = LLVMBuildAdd(builder, s, i32_c128, "");
- t = LLVMBuildAdd(builder, t, i32_c128, "");
-
- /* compute floor (shift right 8) */
- i32_c8 = lp_build_const_int_vec(i32.type, 8);
- s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
- t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
-
- /* compute fractional part (AND with 0xff) */
- i32_c255 = lp_build_const_int_vec(i32.type, 255);
- s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
- t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
-
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
- bld->format_desc->block.bits/8);
-
- y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
-
- lp_build_sample_wrap_linear_int(bld,
- bld->format_desc->block.width,
- s_ipart, width, x_stride,
- bld->static_state->pot_width,
- bld->static_state->wrap_s,
- &x_offset0, &x_offset1,
- &x_subcoord[0], &x_subcoord[1]);
- lp_build_sample_wrap_linear_int(bld,
- bld->format_desc->block.height,
- t_ipart, height, y_stride,
- bld->static_state->pot_height,
- bld->static_state->wrap_t,
- &y_offset0, &y_offset1,
- &y_subcoord[0], &y_subcoord[1]);
-
- offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
- offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
- offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
- offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
-
- /*
- * Transform 4 x i32 in
- *
- * s_fpart = {s0, s1, s2, s3}
- *
- * into 8 x i16
- *
- * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
- *
- * into two 8 x i16
- *
- * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
- * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
- *
- * and likewise for t_fpart. There is no risk of loosing precision here
- * since the fractional parts only use the lower 8bits.
- */
-
- s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
- t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
-
- {
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
-
- for(j = 0; j < h16.type.length; j += 4) {
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
- unsigned subindex = 0;
-#else
- unsigned subindex = 1;
-#endif
- LLVMValueRef index;
-
- index = LLVMConstInt(elem_type, j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_lo[j + i] = index;
-
- index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
- for(i = 0; i < 4; ++i)
- shuffles_hi[j + i] = index;
- }
-
- shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
- shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
-
- s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
- t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
- s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
- t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
- }
-
- /*
- * get pointer to mipmap level 0 data
- */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
-
- /*
- * Fetch the pixels as 4 x 32bit (rgba order might differ):
- *
- * rgba0 rgba1 rgba2 rgba3
- *
- * bit cast them into 16 x u8
- *
- * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
- *
- * unpack them into two 8 x i16:
- *
- * r0 g0 b0 a0 r1 g1 b1 a1
- * r2 g2 b2 a2 r3 g3 b3 a3
- *
- * The higher 8 bits of the resulting elements will be zero.
- */
-
- for (j = 0; j < 2; ++j) {
- for (i = 0; i < 2; ++i) {
- LLVMValueRef rgba8;
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- /*
- * Given the format is a rgba8, just read the pixels as is,
- * without any swizzling. Swizzling will be done later.
- */
- rgba8 = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset[j][i]);
-
- rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
-
- }
- else {
- rgba8 = lp_build_fetch_rgba_aos(bld->builder,
- bld->format_desc,
- u8n.type,
- data_ptr, offset[j][i],
- x_subcoord[i],
- y_subcoord[j]);
- }
-
- lp_build_unpack2(builder, u8n.type, h16.type,
- rgba8,
- &neighbors_lo[j][i], &neighbors_hi[j][i]);
- }
- }
-
- /*
- * Linear interpolate with 8.8 fixed point.
- */
-
- packed_lo = lp_build_lerp_2d(&h16,
- s_fpart_lo, t_fpart_lo,
- neighbors_lo[0][0],
- neighbors_lo[0][1],
- neighbors_lo[1][0],
- neighbors_lo[1][1]);
-
- packed_hi = lp_build_lerp_2d(&h16,
- s_fpart_hi, t_fpart_hi,
- neighbors_hi[0][0],
- neighbors_hi[0][1],
- neighbors_hi[1][0],
- neighbors_hi[1][1]);
-
- packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
-
- /*
- * Convert to SoA and swizzle.
- */
-
- lp_build_rgba8_to_f32_soa(bld->builder,
- bld->texel_type,
- packed, unswizzled);
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- lp_build_format_swizzle_soa(bld->format_desc,
- &bld->texel_bld,
- unswizzled, texel_out);
- } else {
- texel_out[0] = unswizzled[0];
- texel_out[1] = unswizzled[1];
- texel_out[2] = unswizzled[2];
- texel_out[3] = unswizzled[3];
- }
-
- apply_sampler_swizzle(bld, texel_out);
-}
-
-
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
@@ -2181,6 +1184,7 @@ lp_build_sample_soa(LLVMBuilderRef builder,
t = coords[1];
r = coords[2];
+ /* width, height, depth as uint vectors */
width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
@@ -2190,27 +1194,32 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_build_sample_nop(&bld, texel_out);
}
else if (util_format_fits_8unorm(bld.format_desc) &&
- (static_state->target == PIPE_TEXTURE_2D ||
- static_state->target == PIPE_TEXTURE_RECT) &&
- static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
- is_simple_wrap_mode(static_state->wrap_s) &&
- is_simple_wrap_mode(static_state->wrap_t)) {
- /* special case */
- lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
- row_stride_array, data_array, texel_out);
+ lp_is_simple_wrap_mode(static_state->wrap_s) &&
+ lp_is_simple_wrap_mode(static_state->wrap_t)) {
+ /* do sampling/filtering with fixed pt arithmetic */
+ printf("new sample\n");
+ lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
+ width, height, depth,
+ width_vec, height_vec, depth_vec,
+ row_stride_array, img_stride_array,
+ data_array, texel_out);
}
+
else {
- if (gallivm_debug & GALLIVM_DEBUG_PERF &&
- (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
- static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST ||
- static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) &&
+ if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
util_format_fits_8unorm(bld.format_desc)) {
debug_printf("%s: using floating point linear filtering for %s\n",
__FUNCTION__, bld.format_desc->short_name);
+ debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n",
+ static_state->min_img_filter,
+ static_state->mag_img_filter,
+ static_state->min_mip_filter,
+ static_state->wrap_s,
+ static_state->wrap_t);
}
+ printf("old sample\n");
lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth,