From 8988424ee8de7948e55279fe622ffdacdb6e5f8a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 7 Aug 2009 09:51:48 +0100 Subject: llvmpipe: Bootstrap type conversions. --- src/gallium/drivers/llvmpipe/SConscript | 6 + src/gallium/drivers/llvmpipe/lp_bld_conv.c | 190 ++++++++++++++ src/gallium/drivers/llvmpipe/lp_bld_conv.h | 54 ++++ src/gallium/drivers/llvmpipe/lp_test_conv.c | 392 ++++++++++++++++++++++++++++ 4 files changed, 642 insertions(+) create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.c create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_conv.h create mode 100644 src/gallium/drivers/llvmpipe/lp_test_conv.c (limited to 'src') diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript index 7982e4219a..71e4e6a03f 100644 --- a/src/gallium/drivers/llvmpipe/SConscript +++ b/src/gallium/drivers/llvmpipe/SConscript @@ -12,6 +12,7 @@ llvmpipe = env.ConvenienceLibrary( 'lp_fs_llvm.c', 'lp_bld_arit.c', 'lp_bld_const.c', + 'lp_bld_conv.c', 'lp_bld_intr.c', 'lp_bld_pack.c', 'lp_bld_unpack.c', @@ -67,4 +68,9 @@ env.Program( source = ['lp_test_blend.c', 'lp_test_main.c'], ) +env.Program( + target = 'lp_test_conv', + source = ['lp_test_conv.c', 'lp_test_main.c'], +) + Export('llvmpipe') diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c new file mode 100644 index 0000000000..aea1ac2526 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Helper + * + * LLVM IR doesn't support all basic arithmetic operations we care about (most + * notably min/max and saturated operations), and it is often necessary to + * resort machine-specific intrinsics directly. The functions here hide all + * these implementation details from the other modules. + * + * We also do simple expressions simplification here. Reasons are: + * - it is very easy given we have all necessary information readily available + * - LLVM optimization passes fail to simplify several vector expressions + * - We often know value constraints which the optimization passes have no way + * of knowing, such as when source arguments are known to be in [0, 1] range. + * + * @author Jose Fonseca + */ + + +#include "util/u_debug.h" + +#include "lp_bld_type.h" +#include "lp_bld_const.h" +#include "lp_bld_intr.h" +#include "lp_bld_conv.h" + + +static LLVMValueRef +lp_build_trunc(LLVMBuilderRef builder, + union lp_type src_type, + union lp_type dst_type, + LLVMValueRef *src, unsigned num_srcs) +{ + LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length); + + for(i = 0; i < num_srcs; ++i) + tmp[i] = src[i]; + + while(src_type.width > dst_type.width) { + LLVMTypeRef tmp_vec_type = lp_build_vec_type(src_type); + union lp_type new_type = src_type; + LLVMTypeRef new_vec_type; + + new_type.width /= 2; + new_type.length *= 2; + new_vec_type = lp_build_vec_type(new_type); + + for(i = 0; i < num_srcs/2; ++i) { + LLVMValueRef lo = tmp[2*i + 0]; + LLVMValueRef hi = tmp[2*i + 1]; + LLVMValueRef packed = NULL; + + if(src_type.width == 32) { + /* FIXME: we only have a packed signed intrinsic */ + packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi); + } + else if(src_type.width == 16) { + if(dst_type.sign) + packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", tmp_vec_type, lo, hi); + else + packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", tmp_vec_type, lo, hi); + } + else + assert(0); + + tmp[i] = LLVMBuildBitCast(builder, packed, new_vec_type, ""); + } + + src_type = new_type; + + num_srcs /= 2; + } + + assert(num_srcs == 1); + + return tmp[0]; +} + + +/** + * Convert between two SIMD types. + * + * Converting between SIMD types of different element width poses a problem: + * SIMD registers have a fixed number of bits, so different element widths + * imply different vector lengths. Therefore we must multiplex the multiple + * incoming sources into a single destination vector, or demux a single incoming + * vector into multiple vectors. + */ +void +lp_build_conv(LLVMBuilderRef builder, + union lp_type src_type, + union lp_type dst_type, + LLVMValueRef *src, unsigned num_srcs, + LLVMValueRef *dst, unsigned num_dsts) +{ + unsigned i; + + /* Register width must remain constant */ + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + if(!src_type.norm && dst_type.norm) { + /* FIXME: clamp */ + } + + if(src_type.floating && !dst_type.floating) { + double dscale; + LLVMTypeRef tmp; + + /* Rescale */ + dscale = lp_const_scale(dst_type); + if (dscale != 1.0) { + LLVMValueRef scale = lp_build_const_uni(src_type, dscale); + for(i = 0; i < num_srcs; ++i) + src[i] = LLVMBuildMul(builder, src[i], scale, ""); + } + + /* Use an equally sized integer for intermediate computations */ + src_type.floating = FALSE; + tmp = lp_build_vec_type(src_type); + for(i = 0; i < num_srcs; ++i) { +#if 0 + if(dst_type.sign) + src[i] = LLVMBuildFPToSI(builder, src[i], tmp, ""); + else + src[i] = LLVMBuildFPToUI(builder, src[i], tmp, ""); +#else + /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */ + src[i] = LLVMBuildFPToSI(builder, src[i], tmp, ""); +#endif + } + } + else { + unsigned src_shift = lp_const_shift(src_type); + unsigned dst_shift = lp_const_shift(dst_type); + + if(src_shift > dst_shift) { + LLVMValueRef shift = lp_build_int_const_uni(src_type, src_shift - dst_shift); + for(i = 0; i < num_srcs; ++i) + if(dst_type.sign) + src[i] = LLVMBuildAShr(builder, src[i], shift, ""); + else + src[i] = LLVMBuildLShr(builder, src[i], shift, ""); + } + } + + if(src_type.width > dst_type.width) { + assert(num_dsts == 1); + dst[0] = lp_build_trunc(builder, src_type, dst_type, src, num_srcs); + } + else + assert(0); +} diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h new file mode 100644 index 0000000000..03be8f28ca --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Helper functions for type conversions. + * + * @author Jose Fonseca + */ + + +#ifndef LP_BLD_CONV_H +#define LP_BLD_CONV_H + + +#include + + +union lp_type type; + + +void +lp_build_conv(LLVMBuilderRef builder, + union lp_type src_type, + union lp_type dst_type, + LLVMValueRef *srcs, unsigned num_srcs, + LLVMValueRef *dsts, unsigned num_dsts); + + +#endif /* !LP_BLD_CONV_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c new file mode 100644 index 0000000000..6b43279ae5 --- /dev/null +++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c @@ -0,0 +1,392 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Unit tests for type conversion. + * + * @author Jose Fonseca + */ + + +#include "lp_bld_type.h" +#include "lp_bld_conv.h" +#include "lp_test.h" + + +typedef void (*conv_test_ptr_t)(const void *src, const void *dst); + + +void +write_tsv_header(FILE *fp) +{ + fprintf(fp, + "result\t" + "cycles\t" + "type\t" + "src_type\t" + "dst_type\n"); + + fflush(fp); +} + + +static void +write_tsv_row(FILE *fp, + union lp_type src_type, + union lp_type dst_type, + double cycles, + boolean success) +{ + fprintf(fp, "%s\t", success ? "pass" : "fail"); + + fprintf(fp, "%.1f\t", cycles + 0.5); + + dump_type(fp, src_type); + fprintf(fp, "\t"); + + dump_type(fp, dst_type); + fprintf(fp, "\t"); + + fflush(fp); +} + + +static void +dump_conv_types(FILE *fp, + union lp_type src_type, + union lp_type dst_type) +{ + fprintf(fp, "src_type="); + dump_type(fp, src_type); + + fprintf(fp, " dst_type="); + dump_type(fp, dst_type); + + fflush(fp); +} + + +static LLVMValueRef +add_conv_test(LLVMModuleRef module, + union lp_type src_type, unsigned num_srcs, + union lp_type dst_type, unsigned num_dsts) +{ + LLVMTypeRef args[2]; + LLVMValueRef func; + LLVMValueRef src_ptr; + LLVMValueRef dst_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef src[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef dst[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0); + args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0); + + func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); + LLVMSetFunctionCallConv(func, LLVMCCallConv); + src_ptr = LLVMGetParam(func, 0); + dst_ptr = LLVMGetParam(func, 1); + + block = LLVMAppendBasicBlock(func, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + for(i = 0; i < num_srcs; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, ""); + src[i] = LLVMBuildLoad(builder, ptr, ""); + } + + lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts); + + for(i = 0; i < num_dsts; ++i) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); + LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, ""); + LLVMBuildStore(builder, dst[i], ptr); + } + + LLVMBuildRetVoid(builder);; + + LLVMDisposeBuilder(builder); + return func; +} + + +static boolean +test_one(unsigned verbose, + FILE *fp, + union lp_type src_type, + union lp_type dst_type) +{ + LLVMModuleRef module = NULL; + LLVMValueRef func = NULL; + LLVMExecutionEngineRef engine = NULL; + LLVMModuleProviderRef provider = NULL; + LLVMPassManagerRef pass = NULL; + char *error = NULL; + conv_test_ptr_t conv_test_ptr; + boolean success; + const unsigned n = 32; + int64_t cycles[n]; + double cycles_avg = 0.0; + unsigned num_srcs; + unsigned num_dsts; + unsigned i, j; + + if(verbose >= 1) + dump_conv_types(stdout, src_type, dst_type); + + if(src_type.length > dst_type.length) { + num_srcs = 1; + num_dsts = src_type.length/dst_type.length; + } + else { + num_dsts = 1; + num_srcs = dst_type.length/src_type.length; + } + + assert(src_type.width * src_type.length == dst_type.width * dst_type.length); + + /* We must not loose or gain channels. Only precision */ + assert(src_type.length * num_srcs == dst_type.length * num_dsts); + + + module = LLVMModuleCreateWithName("test"); + + func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts); + + if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { + LLVMDumpModule(module); + abort(); + } + LLVMDisposeMessage(error); + + provider = LLVMCreateModuleProviderForExistingModule(module); + if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { + dump_conv_types(stderr, src_type, dst_type); + fprintf(stderr, "\n"); + fprintf(stderr, "%s\n", error); + LLVMDisposeMessage(error); + abort(); + } + +#if 0 + pass = LLVMCreatePassManager(); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); + /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, + * but there are more on SVN. */ + LLVMAddConstantPropagationPass(pass); + LLVMAddInstructionCombiningPass(pass); + LLVMAddPromoteMemoryToRegisterPass(pass); + LLVMAddGVNPass(pass); + LLVMAddCFGSimplificationPass(pass); + LLVMRunPassManager(pass, module); +#else + (void)pass; +#endif + + if(verbose >= 2) + LLVMDumpModule(module); + + conv_test_ptr = (conv_test_ptr_t)LLVMGetPointerToGlobal(engine, func); + + success = TRUE; + for(i = 0; i < n && success; ++i) { + unsigned src_stride = src_type.length*src_type.width/8; + unsigned dst_stride = dst_type.length*dst_type.width/8; + uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH]; + int64_t start_counter = 0; + int64_t end_counter = 0; + + for(j = 0; j < num_srcs; ++j) { + random_vec(src_type, src + j*src_stride); + read_vec(src_type, src + j*src_stride, fref + j*src_type.length); + } + + for(j = 0; j < num_dsts; ++j) { + write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length); + } + + start_counter = rdtsc(); + conv_test_ptr(src, dst); + end_counter = rdtsc(); + + cycles[i] = end_counter - start_counter; + + for(j = 0; j < num_dsts; ++j) { + if(!compare_vec(dst_type, dst + j*dst_stride, ref + j*dst_stride)) + success = FALSE; + } + + if (!success) { + dump_conv_types(stderr, src_type, dst_type); + fprintf(stderr, "\n"); + fprintf(stderr, "MISMATCH\n"); + + for(j = 0; j < num_srcs; ++j) { + fprintf(stderr, " Src%u: ", j); + dump_vec(stderr, src_type, src + j*src_stride); + fprintf(stderr, "\n"); + } + + for(j = 0; j < src_type.length*num_srcs; ++j) + fprintf(stderr, " %f", fref[j]); + fprintf(stderr, "\n"); + + for(j = 0; j < num_dsts; ++j) { + fprintf(stderr, " Dst%u: ", j); + dump_vec(stderr, dst_type, dst + j*dst_stride); + fprintf(stderr, "\n"); + + fprintf(stderr, " Ref%u: ", j); + dump_vec(stderr, dst_type, ref + j*dst_stride); + fprintf(stderr, "\n"); + } + } + } + + /* + * Unfortunately the output of cycle counter is not very reliable as it comes + * -- sometimes we get outliers (due IRQs perhaps?) which are + * better removed to avoid random or biased data. + */ + { + double sum = 0.0, sum2 = 0.0; + double avg, std; + unsigned m; + + for(i = 0; i < n; ++i) { + sum += cycles[i]; + sum2 += cycles[i]*cycles[i]; + } + + avg = sum/n; + std = sqrtf((sum2 - n*avg*avg)/n); + + m = 0; + sum = 0.0; + for(i = 0; i < n; ++i) { + if(fabs(cycles[i] - avg) <= 4.0*std) { + sum += cycles[i]; + ++m; + } + } + + cycles_avg = sum/m; + + } + + if(verbose >= 1) { + fprintf(stdout, " cycles=%.1f", cycles_avg); + } + + if(verbose >= 1) { + fprintf(stdout, " result=%s\n", success ? "pass" : "fail"); + fflush(stdout); + } + + if(fp) + write_tsv_row(fp, src_type, dst_type, cycles_avg, success); + + if (!success) { + LLVMDumpModule(module); + LLVMWriteBitcodeToFile(module, "conv.bc"); + fprintf(stderr, "conv.bc written\n"); + abort(); + } + + LLVMFreeMachineCodeForFunction(engine, func); + + LLVMDisposeExecutionEngine(engine); + if(pass) + LLVMDisposePassManager(pass); + + return success; +} + + +const union lp_type conv_types[] = { + /* float, fixed, sign, norm, width, len */ + {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, /* f32 x 4 */ + {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */ +}; + + +const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]); + + +boolean +test_all(unsigned verbose, FILE *fp) +{ + const union lp_type *src_type; + const union lp_type *dst_type; + bool success = TRUE; + + for(src_type = conv_types; src_type < &conv_types[1 /* num_types */]; ++src_type) { + for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) { + + if(src_type == dst_type) + continue; + + if(!test_one(verbose, fp, *src_type, *dst_type)) + success = FALSE; + + } + } + + return success; +} + + +boolean +test_some(unsigned verbose, FILE *fp, unsigned long n) +{ + const union lp_type *src_type; + const union lp_type *dst_type; + unsigned long i; + bool success = TRUE; + + for(i = 0; i < n; ++i) { + src_type = &conv_types[0 /* random() % num_types */]; + + do { + dst_type = &conv_types[random() % num_types]; + } while (src_type == dst_type); + + if(!test_one(verbose, fp, *src_type, *dst_type)) + success = FALSE; + } + + return success; +} -- cgit v1.2.3