From 0a7824862eb753878fa79b153b2a111884ff1197 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Sep 2010 17:04:26 -0600 Subject: gallivm: expand AoS sampling to cover all filtering modes ...and all texture targets (1D/2D/3D/CUBE). --- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 1059 +-------------------- 1 file changed, 34 insertions(+), 1025 deletions(-) (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index baf0402f56..f61f23efd1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_format.h" +#include "util/u_cpu_detect.h" #include "lp_bld_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -52,48 +53,10 @@ #include "lp_bld_gather.h" #include "lp_bld_format.h" #include "lp_bld_sample.h" +#include "lp_bld_sample_aos.h" #include "lp_bld_quad.h" -/** - * Keep all information for sampling code generation in a single place. - */ -struct lp_build_sample_context -{ - LLVMBuilderRef builder; - - const struct lp_sampler_static_state *static_state; - - struct lp_sampler_dynamic_state *dynamic_state; - - const struct util_format_description *format_desc; - - /** regular scalar float type */ - struct lp_type float_type; - struct lp_build_context float_bld; - - /** regular scalar float type */ - struct lp_type int_type; - struct lp_build_context int_bld; - - /** Incoming coordinates type and build context */ - struct lp_type coord_type; - struct lp_build_context coord_bld; - - /** Unsigned integer coordinates */ - struct lp_type uint_coord_type; - struct lp_build_context uint_coord_bld; - - /** Signed integer coordinates */ - struct lp_type int_coord_type; - struct lp_build_context int_coord_bld; - - /** Output texels type and build context */ - struct lp_type texel_type; - struct lp_build_context texel_bld; -}; - - /** * Does the given texture wrap mode allow sampling the texture border color? * XXX maybe move this into gallium util code. @@ -119,95 +82,10 @@ wrap_mode_uses_border_color(unsigned mode) } -static LLVMValueRef -lp_build_get_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, LLVMValueRef level) -{ - LLVMValueRef indexes[2], data_ptr; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indexes[1] = level; - data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, ""); - data_ptr = LLVMBuildLoad(bld->builder, data_ptr, ""); - return data_ptr; -} - - -static LLVMValueRef -lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld, - LLVMValueRef data_array, int level) -{ - LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_mipmap_level(bld, data_array, lvl); -} - - -/** - * Dereference stride_array[mipmap_level] array to get a stride. - * Return stride as a vector. - */ -static LLVMValueRef -lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, - LLVMValueRef stride_array, LLVMValueRef level) -{ - LLVMValueRef indexes[2], stride; - indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); - indexes[1] = level; - stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, ""); - stride = LLVMBuildLoad(bld->builder, stride, ""); - stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride); - return stride; -} - - -/** Dereference stride_array[0] array to get a stride (as vector). */ -static LLVMValueRef -lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld, - LLVMValueRef stride_array, int level) -{ - LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0); - return lp_build_get_level_stride_vec(bld, stride_array, lvl); -} - - -static int -texture_dims(enum pipe_texture_target tex) -{ - switch (tex) { - case PIPE_TEXTURE_1D: - return 1; - case PIPE_TEXTURE_2D: - case PIPE_TEXTURE_RECT: - case PIPE_TEXTURE_CUBE: - return 2; - case PIPE_TEXTURE_3D: - return 3; - default: - assert(0 && "bad texture target in texture_dims()"); - return 2; - } -} - - -static void -apply_sampler_swizzle(struct lp_build_sample_context *bld, - LLVMValueRef *texel) -{ - unsigned char swizzles[4]; - - swizzles[0] = bld->static_state->swizzle_r; - swizzles[1] = bld->static_state->swizzle_g; - swizzles[2] = bld->static_state->swizzle_b; - swizzles[3] = bld->static_state->swizzle_a; - - lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles); -} - - - /** * Generate code to fetch a texel from a texture at int coords (x, y, z). * The computation depends on whether the texture is 1D, 2D or 3D. - * The result, texel, will be: + * The result, texel, will be float vectors: * texel[0] = red values * texel[1] = green values * texel[2] = blue values @@ -355,204 +233,6 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld, } -/** - * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time. - * Return whether the given mode is supported by that function. - */ -static boolean -is_simple_wrap_mode(unsigned mode) -{ - switch (mode) { - case PIPE_TEX_WRAP_REPEAT: - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - return TRUE; - default: - return FALSE; - } -} - - -/** - * Build LLVM code for texture wrap mode, for scaled integer texcoords. - * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size - * \param length the texture size along one dimension - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param i0 resulting sub-block pixel coordinate for coord0 - */ -static void -lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord, - LLVMValueRef length, - LLVMValueRef stride, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *out_offset, - LLVMValueRef *out_i) -{ - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef length_minus_one; - - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if(is_pot) - coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord = LLVMBuildURem(bld->builder, coord, length, ""); - break; - - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); - coord = lp_build_min(int_coord_bld, coord, length_minus_one); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - } - - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, - out_offset, out_i); -} - - -/** - * Build LLVM code for texture wrap mode, for scaled integer texcoords. - * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size - * \param length the texture size along one dimension - * \param stride pixel stride along the coordinate axis - * \param block_length is the length of the pixel block along the - * coordinate axis - * \param is_pot if TRUE, length is a power of two - * \param wrap_mode one of PIPE_TEX_WRAP_x - * \param offset0 resulting relative offset for coord0 - * \param offset1 resulting relative offset for coord0 + 1 - * \param i0 resulting sub-block pixel coordinate for coord0 - * \param i1 resulting sub-block pixel coordinate for coord0 + 1 - */ -static void -lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, - unsigned block_length, - LLVMValueRef coord0, - LLVMValueRef length, - LLVMValueRef stride, - boolean is_pot, - unsigned wrap_mode, - LLVMValueRef *offset0, - LLVMValueRef *offset1, - LLVMValueRef *i0, - LLVMValueRef *i1) -{ - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; - struct lp_build_context *int_coord_bld = &bld->int_coord_bld; - LLVMValueRef length_minus_one; - LLVMValueRef lmask, umask, mask; - - if (block_length != 1) { - /* - * If the pixel block covers more than one pixel then there is no easy - * way to calculate offset1 relative to offset0. Instead, compute them - * independently. - */ - - LLVMValueRef coord1; - - lp_build_sample_wrap_nearest_int(bld, - block_length, - coord0, - length, - stride, - is_pot, - wrap_mode, - offset0, i0); - - coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); - - lp_build_sample_wrap_nearest_int(bld, - block_length, - coord1, - length, - stride, - is_pot, - wrap_mode, - offset1, i1); - - return; - } - - /* - * Scalar pixels -- try to compute offset0 and offset1 with a single stride - * multiplication. - */ - - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; - - length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); - - switch(wrap_mode) { - case PIPE_TEX_WRAP_REPEAT: - if (is_pot) { - coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); - } - else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); - } - - mask = lp_build_compare(bld->builder, int_coord_bld->type, - PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), - mask, ""); - break; - - case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, - PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); - umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, - PIPE_FUNC_LESS, coord0, length_minus_one); - - coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); - coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); - - mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); - - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, - *offset0, - LLVMBuildAnd(bld->builder, stride, mask, "")); - break; - - case PIPE_TEX_WRAP_CLAMP: - case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - case PIPE_TEX_WRAP_MIRROR_REPEAT: - case PIPE_TEX_WRAP_MIRROR_CLAMP: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - default: - assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; - break; - } -} - - /** * Build LLVM code for texture wrap mode for linear filtering. * \param x0_out returns first integer texcoord @@ -765,7 +445,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /** * Build LLVM code for texture wrap mode for nearest filtering. * \param coord the incoming texcoord (nominally in [0,1]) - * \param length the texture size along one dimension, as int + * \param length the texture size along one dimension, as int vector * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x */ @@ -881,198 +561,6 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } -/** - * Codegen equivalent for u_minify(). - * Return max(1, base_size >> level); - */ -static LLVMValueRef -lp_build_minify(struct lp_build_sample_context *bld, - LLVMValueRef base_size, - LLVMValueRef level) -{ - LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify"); - size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one); - return size; -} - - -/** - * Generate code to compute texture level of detail (lambda). - * \param ddx partial derivatives of (s, t, r, q) with respect to X - * \param ddy partial derivatives of (s, t, r, q) with respect to Y - * \param lod_bias optional float vector with the shader lod bias - * \param explicit_lod optional float vector with the explicit lod - * \param width scalar int texture width - * \param height scalar int texture height - * \param depth scalar int texture depth - * - * XXX: The resulting lod is scalar, so ignore all but the first element of - * derivatives, lod_bias, etc that are passed by the shader. - */ -static LLVMValueRef -lp_build_lod_selector(struct lp_build_sample_context *bld, - const LLVMValueRef ddx[4], - const LLVMValueRef ddy[4], - LLVMValueRef lod_bias, /* optional */ - LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth) - -{ - if (bld->static_state->min_lod == bld->static_state->max_lod) { - /* User is forcing sampling from a particular mipmap level. - * This is hit during mipmap generation. - */ - return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod); - } - else { - struct lp_build_context *float_bld = &bld->float_bld; - LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(), - bld->static_state->lod_bias); - LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->min_lod); - LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), - bld->static_state->max_lod); - LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef lod; - - if (explicit_lod) { - lod = LLVMBuildExtractElement(bld->builder, explicit_lod, - index0, ""); - } - else { - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef dsdx, dsdy; - LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL; - LLVMValueRef rho; - - dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx"); - dsdx = lp_build_abs(float_bld, dsdx); - dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy"); - dsdy = lp_build_abs(float_bld, dsdy); - if (dims > 1) { - dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx"); - dtdx = lp_build_abs(float_bld, dtdx); - dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy"); - dtdy = lp_build_abs(float_bld, dtdy); - if (dims > 2) { - drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx"); - drdx = lp_build_abs(float_bld, drdx); - drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy"); - drdy = lp_build_abs(float_bld, drdy); - } - } - - /* Compute rho = max of all partial derivatives scaled by texture size. - * XXX this could be vectorized somewhat - */ - rho = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dsdx, dsdy), - lp_build_int_to_float(float_bld, width), ""); - if (dims > 1) { - LLVMValueRef max; - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, dtdx, dtdy), - lp_build_int_to_float(float_bld, height), ""); - rho = lp_build_max(float_bld, rho, max); - if (dims > 2) { - max = LLVMBuildFMul(bld->builder, - lp_build_max(float_bld, drdx, drdy), - lp_build_int_to_float(float_bld, depth), ""); - rho = lp_build_max(float_bld, rho, max); - } - } - - /* compute lod = log2(rho) */ - lod = lp_build_log2(float_bld, rho); - - /* add shader lod bias */ - if (lod_bias) { - lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias, - index0, ""); - lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias"); - } - } - - /* add sampler lod bias */ - lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias"); - - /* clamp lod */ - lod = lp_build_clamp(float_bld, lod, min_lod, max_lod); - - return lod; - } -} - - -/** - * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer - * mipmap level index. - * Note: this is all scalar code. - * \param lod scalar float texture level of detail - * \param level_out returns integer - */ -static void -lp_build_nearest_mip_level(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level_out) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; - - LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0); - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_iround(float_bld, lod); - - /* clamp level to legal range of levels */ - *level_out = lp_build_clamp(int_bld, level, zero, last_level); -} - - -/** - * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to - * two (adjacent) mipmap level indexes. Later, we'll sample from those - * two mipmap levels and interpolate between them. - */ -static void -lp_build_linear_mip_levels(struct lp_build_sample_context *bld, - unsigned unit, - LLVMValueRef lod, - LLVMValueRef *level0_out, - LLVMValueRef *level1_out, - LLVMValueRef *weight_out) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *int_bld = &bld->int_bld; - LLVMValueRef last_level, level; - - last_level = bld->dynamic_state->last_level(bld->dynamic_state, - bld->builder, unit); - - /* convert float lod to integer */ - level = lp_build_ifloor(float_bld, lod); - - /* compute level 0 and clamp to legal range of levels */ - *level0_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - /* compute level 1 and clamp to legal range of levels */ - level = lp_build_add(int_bld, level, int_bld->one); - *level1_out = lp_build_clamp(int_bld, level, - int_bld->zero, - last_level); - - *weight_out = lp_build_fract(float_bld, lod); -} - - /** * Generate code to sample a mipmap level with nearest filtering. * If sampling a cube texture, r = cube face in [0,5]. @@ -1291,207 +779,6 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } -/** Helper used by lp_build_cube_lookup() */ -static LLVMValueRef -lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord) -{ - /* ima = -0.5 / abs(coord); */ - LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5); - LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); - LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); - return ima; -} - - -/** - * Helper used by lp_build_cube_lookup() - * \param sign scalar +1 or -1 - * \param coord float vector - * \param ima float vector - */ -static LLVMValueRef -lp_build_cube_coord(struct lp_build_context *coord_bld, - LLVMValueRef sign, int negate_coord, - LLVMValueRef coord, LLVMValueRef ima) -{ - /* return negate(coord) * ima * sign + 0.5; */ - LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5); - LLVMValueRef res; - - assert(negate_coord == +1 || negate_coord == -1); - - if (negate_coord == -1) { - coord = lp_build_negate(coord_bld, coord); - } - - res = lp_build_mul(coord_bld, coord, ima); - if (sign) { - sign = lp_build_broadcast_scalar(coord_bld, sign); - res = lp_build_mul(coord_bld, res, sign); - } - res = lp_build_add(coord_bld, res, half); - - return res; -} - - -/** Helper used by lp_build_cube_lookup() - * Return (major_coord >= 0) ? pos_face : neg_face; - */ -static LLVMValueRef -lp_build_cube_face(struct lp_build_sample_context *bld, - LLVMValueRef major_coord, - unsigned pos_face, unsigned neg_face) -{ - LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE, - major_coord, - bld->float_bld.zero, ""); - LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0); - LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0); - LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, ""); - return res; -} - - - -/** - * Generate code to do cube face selection and compute per-face texcoords. - */ -static void -lp_build_cube_lookup(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef r, - LLVMValueRef *face, - LLVMValueRef *face_s, - LLVMValueRef *face_t) -{ - struct lp_build_context *float_bld = &bld->float_bld; - struct lp_build_context *coord_bld = &bld->coord_bld; - LLVMValueRef rx, ry, rz; - LLVMValueRef arx, ary, arz; - LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25); - LLVMValueRef arx_ge_ary, arx_ge_arz; - LLVMValueRef ary_ge_arx, ary_ge_arz; - LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; - LLVMValueRef rx_pos, ry_pos, rz_pos; - - assert(bld->coord_bld.type.length == 4); - - /* - * Use the average of the four pixel's texcoords to choose the face. - */ - rx = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, s)); - ry = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, t)); - rz = lp_build_mul(float_bld, c25, - lp_build_sum_vector(&bld->coord_bld, r)); - - arx = lp_build_abs(float_bld, rx); - ary = lp_build_abs(float_bld, ry); - arz = lp_build_abs(float_bld, rz); - - /* - * Compare sign/magnitude of rx,ry,rz to determine face - */ - arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, ""); - arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, ""); - ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, ""); - ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, ""); - - arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, ""); - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - - rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, ""); - ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, ""); - rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, ""); - - { - struct lp_build_flow_context *flow_ctx; - struct lp_build_if_state if_ctx; - - flow_ctx = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx); - - *face_s = bld->coord_bld.undef; - *face_t = bld->coord_bld.undef; - *face = bld->int_bld.undef; - - lp_build_name(*face_s, "face_s"); - lp_build_name(*face_t, "face_t"); - lp_build_name(*face, "face"); - - lp_build_flow_scope_declare(flow_ctx, face_s); - lp_build_flow_scope_declare(flow_ctx, face_t); - lp_build_flow_scope_declare(flow_ctx, face); - - lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz); - { - /* +/- X face */ - LLVMValueRef sign = lp_build_sgn(float_bld, rx); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); - *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); - *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - *face = lp_build_cube_face(bld, rx, - PIPE_TEX_FACE_POS_X, - PIPE_TEX_FACE_NEG_X); - } - lp_build_else(&if_ctx); - { - struct lp_build_flow_context *flow_ctx2; - struct lp_build_if_state if_ctx2; - - LLVMValueRef face_s2 = bld->coord_bld.undef; - LLVMValueRef face_t2 = bld->coord_bld.undef; - LLVMValueRef face2 = bld->int_bld.undef; - - flow_ctx2 = lp_build_flow_create(bld->builder); - lp_build_flow_scope_begin(flow_ctx2); - lp_build_flow_scope_declare(flow_ctx2, &face_s2); - lp_build_flow_scope_declare(flow_ctx2, &face_t2); - lp_build_flow_scope_declare(flow_ctx2, &face2); - - ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, ""); - - lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz); - { - /* +/- Y face */ - LLVMValueRef sign = lp_build_sgn(float_bld, ry); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); - face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima); - face2 = lp_build_cube_face(bld, ry, - PIPE_TEX_FACE_POS_Y, - PIPE_TEX_FACE_NEG_Y); - } - lp_build_else(&if_ctx2); - { - /* +/- Z face */ - LLVMValueRef sign = lp_build_sgn(float_bld, rz); - LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); - face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima); - face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); - face2 = lp_build_cube_face(bld, rz, - PIPE_TEX_FACE_POS_Z, - PIPE_TEX_FACE_NEG_Z); - } - lp_build_endif(&if_ctx2); - lp_build_flow_scope_end(flow_ctx2); - lp_build_flow_destroy(flow_ctx2); - *face_s = face_s2; - *face_t = face_t2; - *face = face2; - } - - lp_build_endif(&if_ctx); - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); - } -} - - - /** * Sample the texture/mipmap using given image filter and mip filter. * data0_ptr and data1_ptr point to the two mipmap levels to sample @@ -1605,7 +892,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, const unsigned mag_filter = bld->static_state->mag_img_filter; const int dims = texture_dims(bld->static_state->target); LLVMValueRef lod = NULL, lod_fpart = NULL; - LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL; + LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; @@ -1685,47 +972,15 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } } - /* - * Convert scalar integer mipmap levels into vectors. - */ - ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) - ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1); - - /* - * Compute width, height at mipmap level 'ilevel0' - */ - width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec); - if (dims >= 2) { - height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec); - row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array, - ilevel0); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - img_stride0_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel0); - if (dims == 3) { - depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec); - } - } - } - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* compute width, height, depth for second mipmap level at 'ilevel1' */ - width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec); - if (dims >= 2) { - height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec); - row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array, - ilevel1); - if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) { - img_stride1_vec = lp_build_get_level_stride_vec(bld, - img_stride_array, - ilevel1); - if (dims ==3) { - depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec); - } - } - } - } + /* compute image size(s) of source mipmap level(s) */ + lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, + ilevel0, ilevel1, + row_stride_array, img_stride_array, + &width0_vec, &width1_vec, + &height0_vec, &height1_vec, + &depth0_vec, &depth1_vec, + &row_stride0_vec, &row_stride1_vec, + &img_stride0_vec, &img_stride1_vec); /* * Get pointer(s) to image data for mipmap level(s). @@ -1803,258 +1058,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld, } - -static void -lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld, - LLVMValueRef s, - LLVMValueRef t, - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef stride_array, - LLVMValueRef data_array, - LLVMValueRef texel_out[4]) -{ - LLVMBuilderRef builder = bld->builder; - struct lp_build_context i32, h16, u8n; - LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; - LLVMValueRef i32_c8, i32_c128, i32_c255; - LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef data_ptr; - LLVMValueRef x_stride, y_stride; - LLVMValueRef x_offset0, x_offset1; - LLVMValueRef y_offset0, y_offset1; - LLVMValueRef offset[2][2]; - LLVMValueRef x_subcoord[2], y_subcoord[2]; - LLVMValueRef neighbors_lo[2][2]; - LLVMValueRef neighbors_hi[2][2]; - LLVMValueRef packed, packed_lo, packed_hi; - LLVMValueRef unswizzled[4]; - const unsigned level = 0; - unsigned i, j; - - assert(bld->static_state->target == PIPE_TEXTURE_2D - || bld->static_state->target == PIPE_TEXTURE_RECT); - assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR); - assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR); - assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE); - - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); - - if (bld->static_state->normalized_coords) { - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, ""); - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - - /* convert float to int */ - s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); - t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); - - /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_const_int_vec(i32.type, -128); - s = LLVMBuildAdd(builder, s, i32_c128, ""); - t = LLVMBuildAdd(builder, t, i32_c128, ""); - - /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); - s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); - t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); - - /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_const_int_vec(i32.type, 255); - s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); - t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); - - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, - bld->format_desc->block.bits/8); - - y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level); - - lp_build_sample_wrap_linear_int(bld, - bld->format_desc->block.width, - s_ipart, width, x_stride, - bld->static_state->pot_width, - bld->static_state->wrap_s, - &x_offset0, &x_offset1, - &x_subcoord[0], &x_subcoord[1]); - lp_build_sample_wrap_linear_int(bld, - bld->format_desc->block.height, - t_ipart, height, y_stride, - bld->static_state->pot_height, - bld->static_state->wrap_t, - &y_offset0, &y_offset1, - &y_subcoord[0], &y_subcoord[1]); - - offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0); - offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0); - offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1); - offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1); - - /* - * Transform 4 x i32 in - * - * s_fpart = {s0, s1, s2, s3} - * - * into 8 x i16 - * - * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} - * - * into two 8 x i16 - * - * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} - * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} - * - * and likewise for t_fpart. There is no risk of loosing precision here - * since the fractional parts only use the lower 8bits. - */ - - s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); - t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); - - { - LLVMTypeRef elem_type = LLVMInt32Type(); - LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef shuffle_lo; - LLVMValueRef shuffle_hi; - - for(j = 0; j < h16.type.length; j += 4) { -#ifdef PIPE_ARCH_LITTLE_ENDIAN - unsigned subindex = 0; -#else - unsigned subindex = 1; -#endif - LLVMValueRef index; - - index = LLVMConstInt(elem_type, j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_lo[j + i] = index; - - index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); - for(i = 0; i < 4; ++i) - shuffles_hi[j + i] = index; - } - - shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); - shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); - - s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, ""); - t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, ""); - s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, ""); - t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, ""); - } - - /* - * get pointer to mipmap level 0 data - */ - data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level); - - /* - * Fetch the pixels as 4 x 32bit (rgba order might differ): - * - * rgba0 rgba1 rgba2 rgba3 - * - * bit cast them into 16 x u8 - * - * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 - * - * unpack them into two 8 x i16: - * - * r0 g0 b0 a0 r1 g1 b1 a1 - * r2 g2 b2 a2 r3 g3 b3 a3 - * - * The higher 8 bits of the resulting elements will be zero. - */ - - for (j = 0; j < 2; ++j) { - for (i = 0; i < 2; ++i) { - LLVMValueRef rgba8; - - if (util_format_is_rgba8_variant(bld->format_desc)) { - /* - * Given the format is a rgba8, just read the pixels as is, - * without any swizzling. Swizzling will be done later. - */ - rgba8 = lp_build_gather(bld->builder, - bld->texel_type.length, - bld->format_desc->block.bits, - bld->texel_type.width, - data_ptr, offset[j][i]); - - rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); - - } - else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, - bld->format_desc, - u8n.type, - data_ptr, offset[j][i], - x_subcoord[i], - y_subcoord[j]); - } - - lp_build_unpack2(builder, u8n.type, h16.type, - rgba8, - &neighbors_lo[j][i], &neighbors_hi[j][i]); - } - } - - /* - * Linear interpolate with 8.8 fixed point. - */ - - packed_lo = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[0][0], - neighbors_lo[0][1], - neighbors_lo[1][0], - neighbors_lo[1][1]); - - packed_hi = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[0][0], - neighbors_hi[0][1], - neighbors_hi[1][0], - neighbors_hi[1][1]); - - packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi); - - /* - * Convert to SoA and swizzle. - */ - - lp_build_rgba8_to_f32_soa(bld->builder, - bld->texel_type, - packed, unswizzled); - - if (util_format_is_rgba8_variant(bld->format_desc)) { - lp_build_format_swizzle_soa(bld->format_desc, - &bld->texel_bld, - unswizzled, texel_out); - } else { - texel_out[0] = unswizzled[0]; - texel_out[1] = unswizzled[1]; - texel_out[2] = unswizzled[2]; - texel_out[3] = unswizzled[3]; - } - - apply_sampler_swizzle(bld, texel_out); -} - - static void lp_build_sample_compare(struct lp_build_sample_context *bld, LLVMValueRef p, @@ -2181,6 +1184,7 @@ lp_build_sample_soa(LLVMBuilderRef builder, t = coords[1]; r = coords[2]; + /* width, height, depth as uint vectors */ width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width); height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height); depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth); @@ -2190,27 +1194,32 @@ lp_build_sample_soa(LLVMBuilderRef builder, lp_build_sample_nop(&bld, texel_out); } else if (util_format_fits_8unorm(bld.format_desc) && - (static_state->target == PIPE_TEXTURE_2D || - static_state->target == PIPE_TEXTURE_RECT) && - static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR && - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && - is_simple_wrap_mode(static_state->wrap_s) && - is_simple_wrap_mode(static_state->wrap_t)) { - /* special case */ - lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec, - row_stride_array, data_array, texel_out); + lp_is_simple_wrap_mode(static_state->wrap_s) && + lp_is_simple_wrap_mode(static_state->wrap_t)) { + /* do sampling/filtering with fixed pt arithmetic */ + printf("new sample\n"); + lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy, + lod_bias, explicit_lod, + width, height, depth, + width_vec, height_vec, depth_vec, + row_stride_array, img_stride_array, + data_array, texel_out); } + else { - if (gallivm_debug & GALLIVM_DEBUG_PERF && - (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST || - static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST || - static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) && + if ((gallivm_debug & GALLIVM_DEBUG_PERF) && util_format_fits_8unorm(bld.format_desc)) { debug_printf("%s: using floating point linear filtering for %s\n", __FUNCTION__, bld.format_desc->short_name); + debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n", + static_state->min_img_filter, + static_state->mag_img_filter, + static_state->min_mip_filter, + static_state->wrap_s, + static_state->wrap_t); } + printf("old sample\n"); lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lod_bias, explicit_lod, width, height, depth, -- cgit v1.2.3