/************************************************************************** * * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /** * @file * AoS pixel format manipulation. * * @author Jose Fonseca */ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_math.h" #include "util/u_string.h" #include "lp_bld_arit.h" #include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_flow.h" #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_swizzle.h" #include "lp_bld_format.h" /** * Basic swizzling. Rearrange the order of the unswizzled array elements * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported * too. * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. */ LLVMValueRef lp_build_format_swizzle_aos(const struct util_format_description *desc, struct lp_build_context *bld, LLVMValueRef unswizzled) { unsigned char swizzles[4]; unsigned chan; assert(bld->type.length % 4 == 0); for (chan = 0; chan < 4; ++chan) { enum util_format_swizzle swizzle; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { /* * For ZS formats do RGBA = ZZZ1 */ if (chan == 3) { swizzle = UTIL_FORMAT_SWIZZLE_1; } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { swizzle = UTIL_FORMAT_SWIZZLE_0; } else { swizzle = desc->swizzle[0]; } } else { swizzle = desc->swizzle[chan]; } swizzles[chan] = swizzle; } return lp_build_swizzle_aos(bld, unswizzled, swizzles); } /** * Whether the format matches the vector type, apart of swizzles. */ static INLINE boolean format_matches_type(const struct util_format_description *desc, struct lp_type type) { enum util_format_type chan_type; unsigned chan; assert(type.length % 4 == 0); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) { return FALSE; } if (type.floating) { chan_type = UTIL_FORMAT_TYPE_FLOAT; } else if (type.fixed) { chan_type = UTIL_FORMAT_TYPE_FIXED; } else if (type.sign) { chan_type = UTIL_FORMAT_TYPE_SIGNED; } else { chan_type = UTIL_FORMAT_TYPE_UNSIGNED; } for (chan = 0; chan < desc->nr_channels; ++chan) { if (desc->channel[chan].size != type.width) { return FALSE; } if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { if (desc->channel[chan].type != chan_type || desc->channel[chan].normalized != type.norm) { return FALSE; } } } return TRUE; } /** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param type the desired return type (float[4] vs. ubyte[4]) * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef lp_build_unpack_rgba_aos(const struct util_format_description *desc, struct lp_build_context *bld, LLVMValueRef packed) { LLVMBuilderRef builder = bld->builder; struct lp_type type = bld->type; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; boolean normalized; boolean needs_uitofp; unsigned shift; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ if (desc->block.bits < 32) packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), ""); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), packed, LLVMConstNull(LLVMInt32Type()), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(LLVMVectorType(LLVMInt32Type(), 4)), LLVMConstNull(LLVMVectorType(LLVMInt32Type(), 4)), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; shift = 0; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32Type()); masks[i] = LLVMConstNull(LLVMInt32Type()); scales[i] = LLVMConstNull(LLVMFloatType()); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); masks[i] = LLVMConstInt(LLVMInt32Type(), mask, 0); if (desc->channel[i].normalized) { scales[i] = LLVMConstReal(LLVMFloatType(), 1.0/mask); normalized = TRUE; } else scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); } shift += bits; } /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} * into masked = {B, G, R, A} */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), ""); } /* At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ lp_build_conv(builder, lp_float32_vec4_type(), type, &scaled, 1, &scaled, 1); scaled = lp_build_format_swizzle_aos(desc, bld, scaled); return scaled; } /** * Pack a single pixel. * * @param rgba 4 float vector with the unpacked components. * * XXX: This is mostly for reference and testing -- operating a single pixel at * a time is rarely if ever needed. */ LLVMValueRef lp_build_pack_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *desc, LLVMValueRef rgba) { LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; LLVMValueRef shifted, casted, scaled, unswizzled; LLVMValueRef shifts[4]; LLVMValueRef scales[4]; boolean normalized; unsigned shift; unsigned i, j; assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); type = LLVMIntType(desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (desc->swizzle[j] == i) break; } if (j < 4) swizzles[i] = LLVMConstInt(LLVMInt32Type(), j, 0); else swizzles[i] = LLVMGetUndef(LLVMInt32Type()); } unswizzled = LLVMBuildShuffleVector(builder, rgba, LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; shift = 0; for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32Type()); scales[i] = LLVMGetUndef(LLVMFloatType()); } else { unsigned mask = (1 << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); shifts[i] = LLVMConstInt(LLVMInt32Type(), shift, 0); if (desc->channel[i].normalized) { scales[i] = LLVMConstReal(LLVMFloatType(), mask); normalized = TRUE; } else scales[i] = LLVMConstReal(LLVMFloatType(), 1.0); } shift += bits; } if (normalized) scaled = LLVMBuildMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32Type(), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, LLVMConstInt(LLVMInt32Type(), i, 0), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else packed = component; } } if (!packed) packed = LLVMGetUndef(LLVMInt32Type()); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); return packed; } /** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef ptr, LLVMValueRef i, LLVMValueRef j) { struct lp_build_context bld; /* XXX: For now we only support one pixel at a time */ assert(type.length == 4); lp_build_context_init(&bld, builder, type); if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_pot(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef packed; ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) , ""); packed = LLVMBuildLoad(builder, ptr, "packed"); if (format_matches_type(format_desc, type)) { /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ assert(format_desc->block.bits <= type.width * type.length); if (format_desc->block.bits < type.width * type.length) { packed = LLVMBuildZExt(builder, packed, LLVMIntType(type.width * type.length), ""); } packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } else { return lp_build_unpack_rgba_aos(format_desc, &bld, packed); } } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { LLVMValueRef packed; LLVMValueRef rgba; ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(LLVMInt32Type(), 0), "packed_ptr"); packed = LLVMBuildLoad(builder, ptr, "packed"); rgba = lp_build_unpack_subsampled_to_rgba_aos(builder, format_desc, 1, packed, i, j); lp_build_conv(builder, lp_unorm8_vec4_type(), type, &rgba, 1, &rgba, 1); return rgba; } else if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatType(); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp_val; LLVMValueRef args[4]; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidType(); arg_types[0] = pf32t; arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); LLVMAddGlobalMapping(lp_build_engine, function, func_to_pointer((func_pointer)format_desc->fetch_rgba_float)); } tmp_ptr = lp_build_alloca(builder, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = ptr; args[2] = i; args[3] = j; LLVMBuildCall(builder, function, args, Elements(args), ""); tmp_val = LLVMBuildLoad(builder, tmp_ptr, ""); if (type.floating) { /* No further conversion necessary */ } else { lp_build_conv(builder, lp_float32_vec4_type(), type, &tmp_val, 1, &tmp_val, 1); } return tmp_val; } else { assert(0); return lp_build_undef(type); } }