From ec8d9523d465554e3ffaa1aeef46bfff868281d3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Apr 2010 16:21:08 +0200 Subject: gallivm: Universal format support on lp_build_fetch_rgba_aos via util_format_description::fetch_rgba_float This therefore adds support to half float vertex buffers. --- src/gallium/auxiliary/gallivm/lp_bld_format.h | 4 +- src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 79 ++++++++++++++++++++++- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 62 +++--------------- 3 files changed, 89 insertions(+), 56 deletions(-) (limited to 'src/gallium/auxiliary/gallivm') diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index ecf2cfd62c..085937588f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -59,7 +59,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr); + LLVMValueRef ptr, + LLVMValueRef i, + LLVMValueRef j); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 191562d460..5cd5b93bdf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -34,8 +34,11 @@ #include "util/u_format.h" +#include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_string.h" +#include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_swizzle.h" @@ -295,12 +298,17 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, /** * Fetch a pixel into a 4 float AoS. + * + * i and j are the sub-block pixel coordinates. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr) + LLVMValueRef ptr, + LLVMValueRef i, + LLVMValueRef j) { + if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && @@ -309,7 +317,9 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, util_is_pot(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && - !format_desc->is_mixed) + !format_desc->is_mixed && + (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || + format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef packed; @@ -321,6 +331,71 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, return lp_build_unpack_rgba_aos(builder, format_desc, packed); } + else if (format_desc->fetch_rgba_float) { + /* + * Fallback to calling util_format_description::fetch_rgba_float. + * + * This is definitely not the most efficient way of fetching pixels, as + * we miss the opportunity to do vectorization, but this it is a + * convenient for formats or scenarios for which there was no opportunity + * or incentive to optimize. + */ + + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + char name[256]; + LLVMValueRef function; + LLVMValueRef tmp; + LLVMValueRef args[4]; + + util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", + format_desc->short_name); + + /* + * Declare and bind format_desc->fetch_rgba_float(). + */ + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + ret_type = LLVMVoidType(); + arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); + } + + /* + * XXX: this should better go to the first block in the function + */ + + tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + + /* + * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result + * in the SoA vectors. + */ + + args[0] = LLVMBuildBitCast(builder, tmp, + LLVMPointerType(LLVMFloatType(), 0), ""); + args[1] = ptr; + args[2] = i; + args[3] = j; + + LLVMBuildCall(builder, function, args, 4, ""); + + return LLVMBuildLoad(builder, tmp, ""); + } else { assert(0); return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 2b66162eb4..c7b20f4201 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -307,70 +307,28 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, } else { /* - * Fallback to calling util_format_description::fetch_rgba_float for each - * pixel. + * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * - * This is definitely not the most efficient way of fetching pixels, as - * we miss the opportunity to do vectorization, but this it is a + * This is not the most efficient way of fetching pixels, as + * we miss some opportunities to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - char name[256]; - LLVMValueRef function; - LLVMValueRef tmp; unsigned k, chan; assert(type.floating); - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); - - /* - * Declare and bind format_desc->fetch_rgba_float(). - */ - - function = LLVMGetNamedFunction(module, name); - if (!function) { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[4]; - LLVMTypeRef function_type; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); - arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); - arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - assert(LLVMIsDeclaration(function)); - - LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); - } - for (chan = 0; chan < 4; ++chan) { rgba[chan] = lp_build_undef(type); } - tmp = LLVMBuildArrayAlloca(builder, - LLVMFloatType(), - LLVMConstInt(LLVMInt32Type(), 4, 0), - ""); - - /* - * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result - * in the SoA vectors. - */ - for(k = 0; k < type.length; ++k) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); LLVMValueRef offset_elem; LLVMValueRef ptr; LLVMValueRef i_elem, j_elem; - LLVMValueRef args[4]; + LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); @@ -378,17 +336,15 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); - args[0] = tmp; - args[1] = ptr; - args[2] = i_elem; - args[3] = j_elem; + tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem); - LLVMBuildCall(builder, function, args, 4, ""); + /* + * AoS to SoA + */ for (chan = 0; chan < 4; ++chan) { LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), - tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, ""); - tmp_chan = LLVMBuildLoad(builder, tmp_chan, ""); + tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, ""); } } -- cgit v1.2.3