diff options
author | José Fonseca <jfonseca@vmware.com> | 2010-07-01 13:57:48 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2010-07-01 15:02:17 +0100 |
commit | 8d93f360c582297b9ced11c234ab4bd53103a8a6 (patch) | |
tree | 01a3baa4c00d6e5202cc59a0430985ff6f194475 /src/gallium/auxiliary | |
parent | b919bb7f6119d59751fe846cabe5b0d587f46edc (diff) |
gallivm: Support 4 x unorm8 in lp_build_fetch_rgba_aos().
Uses code and ideas from Brian Paul.
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm_translate.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format.h | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 218 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 4 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_type.h | 48 |
5 files changed, 213 insertions, 70 deletions
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index d7da7ed357..ec7d0a455c 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -7,6 +7,7 @@ #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_format.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_type.h" #include "util/u_memory.h" #include "util/u_format.h" @@ -466,6 +467,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder, const struct util_format_description *format_desc; LLVMValueRef zero; int i; + struct lp_type type = lp_float32_vec4_type(); /* * The above can only cope with straight arrays: no bitfields, @@ -493,5 +495,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder, format_desc = util_format_description(from_format); zero = LLVMConstNull(LLVMInt32Type()); - return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero); + return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 5f5036e7bd..c335ca46a7 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -48,9 +48,9 @@ struct lp_build_context; */ LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, - LLVMValueRef packed); +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled); LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, @@ -60,6 +60,7 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, + struct lp_type type, LLVMValueRef ptr, LLVMValueRef i, LLVMValueRef j); @@ -72,7 +73,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, void lp_build_format_swizzle_soa(const struct util_format_description *format_desc, struct lp_build_context *bld, - const LLVMValueRef *unswizzled, + const LLVMValueRef unswizzled[4], LLVMValueRef swizzled_out[4]); void diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 87e3e72a6e..bec2a80d76 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -38,33 +38,122 @@ #include "util/u_math.h" #include "util/u_string.h" +#include "lp_bld_arit.h" #include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" +#include "lp_bld_const.h" +#include "lp_bld_conv.h" +#include "lp_bld_swizzle.h" #include "lp_bld_format.h" /** + * Basic swizzling. Rearrange the order of the unswizzled array elements + * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported + * too. + * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. + */ +LLVMValueRef +lp_build_format_swizzle_aos(const struct util_format_description *desc, + struct lp_build_context *bld, + LLVMValueRef unswizzled) +{ + unsigned char swizzles[4]; + unsigned chan; + + assert(bld->type.length % 4 == 0); + + for (chan = 0; chan < 4; ++chan) { + enum util_format_swizzle swizzle; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { + /* + * For ZS formats do RGBA = ZZZ1 + */ + if (chan == 3) { + swizzle = UTIL_FORMAT_SWIZZLE_1; + } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { + swizzle = UTIL_FORMAT_SWIZZLE_0; + } else { + swizzle = desc->swizzle[0]; + } + } else { + swizzle = desc->swizzle[chan]; + } + swizzles[chan] = swizzle; + } + + return lp_build_swizzle_aos(bld, unswizzled, swizzles); +} + + +/** + * Whether the format matches the vector type, apart of swizzles. + */ +static INLINE boolean +format_matches_type(const struct util_format_description *desc, + struct lp_type type) +{ + enum util_format_type chan_type; + unsigned chan; + + assert(type.length % 4 == 0); + + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || + desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) { + return FALSE; + } + + if (type.floating) { + chan_type = UTIL_FORMAT_TYPE_FLOAT; + } else if (type.fixed) { + chan_type = UTIL_FORMAT_TYPE_FIXED; + } else if (type.sign) { + chan_type = UTIL_FORMAT_TYPE_SIGNED; + } else { + chan_type = UTIL_FORMAT_TYPE_UNSIGNED; + } + + for (chan = 0; chan < desc->nr_channels; ++chan) { + if (desc->channel[chan].size != type.width) { + return FALSE; + } + + if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { + if (desc->channel[chan].type != chan_type || + desc->channel[chan].normalized != type.norm) { + return FALSE; + } + } + } + + return TRUE; +} + + +/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value + * @param type the desired return type (float[4] vs. ubyte[4]) * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * - * @return RGBA in a 4 floats vector. + * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ -LLVMValueRef -lp_build_unpack_rgba_aos(LLVMBuilderRef builder, - const struct util_format_description *desc, +static INLINE LLVMValueRef +lp_build_unpack_rgba_aos(const struct util_format_description *desc, + struct lp_build_context *bld, LLVMValueRef packed) { + LLVMBuilderRef builder = bld->builder; + struct lp_type type = bld->type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; - LLVMValueRef swizzles[4]; - LLVMValueRef aux[4]; + boolean normalized; - int empty_channel; boolean needs_uitofp; unsigned shift; unsigned i; @@ -98,7 +187,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; - empty_channel = -1; shift = 0; /* Loop over 4 color components */ @@ -109,7 +197,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, shifts[i] = LLVMGetUndef(LLVMInt32Type()); masks[i] = LLVMConstNull(LLVMInt32Type()); scales[i] = LLVMConstNull(LLVMFloatType()); - empty_channel = i; } else { unsigned long long mask = (1ULL << bits) - 1; @@ -158,52 +245,21 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder, else scaled = casted; - for (i = 0; i < 4; ++i) - aux[i] = LLVMGetUndef(LLVMFloatType()); + /* + * Type conversion. + * + * TODO: We could avoid floating conversion for integer to + * integer conversions. + */ - /* Build swizzles vector to put components into R,G,B,A order */ - for (i = 0; i < 4; ++i) { - enum util_format_swizzle swizzle; + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + &scaled, 1, &scaled, 1); - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { - /* - * For ZS formats do RGBA = ZZZ1 - */ - if (i == 3) { - swizzle = UTIL_FORMAT_SWIZZLE_1; - } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { - swizzle = UTIL_FORMAT_SWIZZLE_0; - } else { - swizzle = desc->swizzle[0]; - } - } else { - swizzle = desc->swizzle[i]; - } - - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_X: - case UTIL_FORMAT_SWIZZLE_Y: - case UTIL_FORMAT_SWIZZLE_Z: - case UTIL_FORMAT_SWIZZLE_W: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0); - break; - case UTIL_FORMAT_SWIZZLE_0: - assert(empty_channel >= 0); - swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0); - break; - case UTIL_FORMAT_SWIZZLE_1: - swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0); - aux[0] = LLVMConstReal(LLVMFloatType(), 1.0); - break; - case UTIL_FORMAT_SWIZZLE_NONE: - swizzles[i] = LLVMGetUndef(LLVMFloatType()); - assert(0); - break; - } - } + scaled = lp_build_format_swizzle_aos(desc, bld, scaled); - return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4), - LLVMConstVector(swizzles, 4), ""); + return scaled; } @@ -316,16 +372,23 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats - * these will always be (0,). - * \return valueRef with the float[4] RGBA pixel + * these will always be (0, 0). + * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, + struct lp_type type, LLVMValueRef ptr, LLVMValueRef i, LLVMValueRef j) { + struct lp_build_context bld; + + /* XXX: For now we only support one pixel at a time */ + assert(type.length == 4); + + lp_build_context_init(&bld, builder, type); if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || @@ -347,7 +410,24 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, packed = LLVMBuildLoad(builder, ptr, "packed"); - return lp_build_unpack_rgba_aos(builder, format_desc, packed); + if (format_matches_type(format_desc, type)) { + /* + * The format matches the type (apart of a swizzle) so no need for + * scaling or converting. + */ + + assert(format_desc->block.bits <= type.width * type.length); + if (format_desc->block.bits < type.width * type.length) { + packed = LLVMBuildZExt(builder, packed, + LLVMIntType(type.width * type.length), ""); + } + + packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), ""); + + return lp_build_format_swizzle_aos(format_desc, &bld, packed); + } else { + return lp_build_unpack_rgba_aos(format_desc, &bld, packed); + } } else if (format_desc->fetch_rgba_float) { /* @@ -361,8 +441,12 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; + LLVMTypeRef f32t = LLVMFloatType(); + LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); + LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMValueRef function; - LLVMValueRef tmp; + LLVMValueRef tmp_ptr; + LLVMValueRef tmp_val; LLVMValueRef args[4]; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", @@ -379,7 +463,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, LLVMTypeRef function_type; ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[0] = pf32t; arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); @@ -394,25 +478,35 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } - tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + tmp_ptr = lp_build_alloca(builder, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ - args[0] = LLVMBuildBitCast(builder, tmp, - LLVMPointerType(LLVMFloatType(), 0), ""); + args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = ptr; args[2] = i; args[3] = j; LLVMBuildCall(builder, function, args, Elements(args), ""); - return LLVMBuildLoad(builder, tmp, ""); + tmp_val = LLVMBuildLoad(builder, tmp_ptr, ""); + + if (type.floating) { + /* No further conversion necessary */ + } else { + lp_build_conv(builder, + lp_float32_vec4_type(), + type, + &tmp_val, 1, &tmp_val, 1); + } + + return tmp_val; } else { assert(0); - return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); + return lp_build_undef(type); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index e1b94adc85..a4a36a090d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -324,8 +324,6 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, unsigned k, chan; - assert(type.floating); - for (chan = 0; chan < 4; ++chan) { rgba_out[chan] = lp_build_undef(type); } @@ -345,7 +343,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, + tmp = lp_build_fetch_rgba_aos(builder, format_desc, type, ptr, i_elem, j_elem); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h index df77ef2155..3ffe916f8e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_type.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h @@ -316,6 +316,54 @@ LLVMTypeRef lp_build_int32_vec4_type(void); +static INLINE struct lp_type +lp_float32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = TRUE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_int32_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = TRUE; + type.norm = FALSE; + type.width = 32; + type.length = 4; + + return type; +} + + +static INLINE struct lp_type +lp_unorm8_vec4_type(void) +{ + struct lp_type type; + + memset(&type, 0, sizeof(type)); + type.floating = FALSE; + type.sign = FALSE; + type.norm = TRUE; + type.width = 8; + type.length = 4; + + return type; +} + + struct lp_type lp_uint_type(struct lp_type type); |