summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe/lp_test_blend.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_test_blend.c')
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c881
1 files changed, 881 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
new file mode 100644
index 0000000000..8dfad468e3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -0,0 +1,881 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for blend LLVM IR generation
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Blend computation code derived from code written by
+ * @author Brian Paul <brian@vmware.com>
+ */
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_blend.h"
+#include "lp_bld_debug.h"
+#include "lp_test.h"
+
+
+enum vector_mode
+{
+ AoS = 0,
+ SoA = 1
+};
+
+
+typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "cycles_per_channel\t"
+ "mode\t"
+ "type\t"
+ "sep_func\t"
+ "sep_src_factor\t"
+ "sep_dst_factor\t"
+ "rgb_func\t"
+ "rgb_src_factor\t"
+ "rgb_dst_factor\t"
+ "alpha_func\t"
+ "alpha_src_factor\t"
+ "alpha_dst_factor\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ union lp_type type,
+ double cycles,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ if (mode == AoS) {
+ fprintf(fp, "%.1f\t", cycles / type.length);
+ fprintf(fp, "aos\t");
+ }
+
+ if (mode == SoA) {
+ fprintf(fp, "%.1f\t", cycles / (4 * type.length));
+ fprintf(fp, "soa\t");
+ }
+
+ fprintf(fp, "%s%u%sx%u\t",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ "%s\t%s\t%s\t",
+ blend->rgb_func != blend->alpha_func ? "true" : "false",
+ blend->rgb_src_factor != blend->alpha_src_factor ? "true" : "false",
+ blend->rgb_dst_factor != blend->alpha_dst_factor ? "true" : "false");
+
+ fprintf(fp,
+ "%s\t%s\t%s\t%s\t%s\t%s\n",
+ debug_dump_blend_func(blend->rgb_func, TRUE),
+ debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
+ debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
+ debug_dump_blend_func(blend->alpha_func, TRUE),
+ debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
+ debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
+
+ fflush(fp);
+}
+
+
+static void
+dump_blend_type(FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ union lp_type type)
+{
+ fprintf(fp, "%s", mode ? "soa" : "aos");
+
+ fprintf(fp, " type=%s%u%sx%u",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+ "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE),
+ "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
+ "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
+ "alpha_func", debug_dump_blend_func(blend->alpha_func, TRUE),
+ "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
+ "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
+
+ fprintf(fp, " ...\n");
+ fflush(fp);
+}
+
+
+static LLVMValueRef
+add_blend_test(LLVMModuleRef module,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ union lp_type type)
+{
+ LLVMTypeRef ret_type;
+ LLVMTypeRef vec_type;
+ LLVMTypeRef args[4];
+ LLVMValueRef func;
+ LLVMValueRef src_ptr;
+ LLVMValueRef dst_ptr;
+ LLVMValueRef const_ptr;
+ LLVMValueRef res_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+
+ ret_type = LLVMInt64Type();
+ vec_type = lp_build_vec_type(type);
+
+ args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
+ func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ src_ptr = LLVMGetParam(func, 0);
+ dst_ptr = LLVMGetParam(func, 1);
+ const_ptr = LLVMGetParam(func, 2);
+ res_ptr = LLVMGetParam(func, 3);
+
+ block = LLVMAppendBasicBlock(func, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ if (mode == AoS) {
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef con;
+ LLVMValueRef res;
+
+ src = LLVMBuildLoad(builder, src_ptr, "src");
+ dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+ con = LLVMBuildLoad(builder, const_ptr, "const");
+
+ res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
+
+ lp_build_name(res, "res");
+
+ LLVMBuildStore(builder, res, res_ptr);
+ }
+
+ if (mode == SoA) {
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+ LLVMValueRef res[4];
+ unsigned i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
+ dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+ con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+ lp_build_name(src[i], "src.%c", "rgba"[i]);
+ lp_build_name(con[i], "con.%c", "rgba"[i]);
+ lp_build_name(dst[i], "dst.%c", "rgba"[i]);
+ }
+
+ lp_build_blend_soa(builder, blend, type, src, dst, con, res);
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ lp_build_name(res[i], "res.%c", "rgba"[i]);
+ LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
+ }
+ }
+
+ LLVMBuildRetVoid(builder);;
+
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+
+/** Add and limit result to ceiling of 1.0 */
+#define ADD_SAT(R, A, B) \
+do { \
+ R = (A) + (B); if (R > 1.0f) R = 1.0f; \
+} while (0)
+
+/** Subtract and limit result to floor of 0.0 */
+#define SUB_SAT(R, A, B) \
+do { \
+ R = (A) - (B); if (R < 0.0f) R = 0.0f; \
+} while (0)
+
+
+static void
+compute_blend_ref_term(unsigned rgb_factor,
+ unsigned alpha_factor,
+ const double *factor,
+ const double *src,
+ const double *dst,
+ const double *con,
+ double *term)
+{
+ double temp;
+
+ switch (rgb_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[0] = factor[0]; /* R */
+ term[1] = factor[1]; /* G */
+ term[2] = factor[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term[0] = factor[0] * src[0]; /* R */
+ term[1] = factor[1] * src[1]; /* G */
+ term[2] = factor[2] * src[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[0] = factor[0] * src[3]; /* R */
+ term[1] = factor[1] * src[3]; /* G */
+ term[2] = factor[2] * src[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term[0] = factor[0] * dst[0]; /* R */
+ term[1] = factor[1] * dst[1]; /* G */
+ term[2] = factor[2] * dst[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[0] = factor[0] * dst[3]; /* R */
+ term[1] = factor[1] * dst[3]; /* G */
+ term[2] = factor[2] * dst[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ temp = MIN2(src[3], 1.0f - dst[3]);
+ term[0] = factor[0] * temp; /* R */
+ term[1] = factor[1] * temp; /* G */
+ term[2] = factor[2] * temp; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term[0] = factor[0] * con[0]; /* R */
+ term[1] = factor[1] * con[1]; /* G */
+ term[2] = factor[2] * con[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[0] = factor[0] * con[3]; /* R */
+ term[1] = factor[1] * con[3]; /* G */
+ term[2] = factor[2] * con[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[0] = 0.0f; /* R */
+ term[1] = 0.0f; /* G */
+ term[2] = 0.0f; /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ term[0] = factor[0] * (1.0f - src[0]); /* R */
+ term[1] = factor[1] * (1.0f - src[1]); /* G */
+ term[2] = factor[2] * (1.0f - src[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[0] = factor[0] * (1.0f - src[3]); /* R */
+ term[1] = factor[1] * (1.0f - src[3]); /* G */
+ term[2] = factor[2] * (1.0f - src[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[0] = factor[0] * (1.0f - dst[3]); /* R */
+ term[1] = factor[1] * (1.0f - dst[3]); /* G */
+ term[2] = factor[2] * (1.0f - dst[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ term[0] = factor[0] * (1.0f - dst[0]); /* R */
+ term[1] = factor[1] * (1.0f - dst[1]); /* G */
+ term[2] = factor[2] * (1.0f - dst[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ term[0] = factor[0] * (1.0f - con[0]); /* R */
+ term[1] = factor[1] * (1.0f - con[1]); /* G */
+ term[2] = factor[2] * (1.0f - con[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[0] = factor[0] * (1.0f - con[3]); /* R */
+ term[1] = factor[1] * (1.0f - con[3]); /* G */
+ term[2] = factor[2] * (1.0f - con[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute src/first term A
+ */
+ switch (alpha_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[3] = factor[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[3] = factor[3] * src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[3] = factor[3] * dst[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ term[3] = src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[3] = factor[3] * con[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[3] = 0.0f; /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[3] = factor[3] * (1.0f - src[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[3] = factor[3] * (1.0f - dst[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[3] = factor[3] * (1.0f - con[3]);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+compute_blend_ref(const struct pipe_blend_state *blend,
+ const double *src,
+ const double *dst,
+ const double *con,
+ double *res)
+{
+ double src_term[4];
+ double dst_term[4];
+
+ compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term);
+ compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term);
+
+ /*
+ * Combine RGB terms
+ */
+ switch (blend->rgb_func) {
+ case PIPE_BLEND_ADD:
+ ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
+ ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
+ ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
+ SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
+ SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
+ SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
+ SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ res[0] = MIN2(src_term[0], dst_term[0]); /* R */
+ res[1] = MIN2(src_term[1], dst_term[1]); /* G */
+ res[2] = MIN2(src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ res[0] = MAX2(src_term[0], dst_term[0]); /* R */
+ res[1] = MAX2(src_term[1], dst_term[1]); /* G */
+ res[2] = MAX2(src_term[2], dst_term[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine A terms
+ */
+ switch (blend->alpha_func) {
+ case PIPE_BLEND_ADD:
+ ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ res[3] = MIN2(src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ res[3] = MAX2(src_term[3], dst_term[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static boolean
+test_one(unsigned verbose,
+ FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ union lp_type type)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef func = NULL;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ blend_test_ptr_t blend_test_ptr;
+ boolean success;
+ const unsigned n = 32;
+ int64_t cycles[n];
+ double cycles_avg = 0.0;
+ unsigned i, j;
+
+ if(verbose >= 1)
+ dump_blend_type(stdout, blend, mode, type);
+
+ module = LLVMModuleCreateWithName("test");
+
+ func = add_blend_test(module, blend, mode, type);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ if(verbose >= 2)
+ LLVMDumpModule(module);
+
+ blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
+
+ if(verbose >= 2)
+ lp_disassemble(blend_test_ptr);
+
+ success = TRUE;
+ for(i = 0; i < n && success; ++i) {
+ if(mode == AoS) {
+ uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ random_vec(type, src);
+ random_vec(type, dst);
+ random_vec(type, con);
+
+ {
+ double fsrc[LP_MAX_VECTOR_LENGTH];
+ double fdst[LP_MAX_VECTOR_LENGTH];
+ double fcon[LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH];
+
+ read_vec(type, src, fsrc);
+ read_vec(type, dst, fdst);
+ read_vec(type, con, fcon);
+
+ for(j = 0; j < type.length; j += 4)
+ compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
+
+ write_vec(type, ref, fref);
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ if(!compare_vec(type, res, ref)) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+
+ fprintf(stderr, " Src: ");
+ dump_vec(stderr, type, src);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst: ");
+ dump_vec(stderr, type, dst);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con: ");
+ dump_vec(stderr, type, con);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res: ");
+ dump_vec(stderr, type, res);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref: ");
+ dump_vec(stderr, type, ref);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if(mode == SoA) {
+ const unsigned stride = type.length*type.width/8;
+ uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+ boolean mismatch;
+
+ for(j = 0; j < 4; ++j) {
+ random_vec(type, src + j*stride);
+ random_vec(type, dst + j*stride);
+ random_vec(type, con + j*stride);
+ }
+
+ {
+ double fsrc[4];
+ double fdst[4];
+ double fcon[4];
+ double fref[4];
+ unsigned k;
+
+ for(k = 0; k < type.length; ++k) {
+ for(j = 0; j < 4; ++j) {
+ fsrc[j] = read_elem(type, src + j*stride, k);
+ fdst[j] = read_elem(type, dst + j*stride, k);
+ fcon[j] = read_elem(type, con + j*stride, k);
+ }
+
+ compute_blend_ref(blend, fsrc, fdst, fcon, fref);
+
+ for(j = 0; j < 4; ++j)
+ write_elem(type, ref + j*stride, k, fref[j]);
+ }
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ mismatch = FALSE;
+ for (j = 0; j < 4; ++j)
+ if(!compare_vec(type, res + j*stride, ref + j*stride))
+ mismatch = TRUE;
+
+ if (mismatch) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j) {
+ char channel = "RGBA"[j];
+ fprintf(stderr, " Src%c: ", channel);
+ dump_vec(stderr, type, src + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst%c: ", channel);
+ dump_vec(stderr, type, dst + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con%c: ", channel);
+ dump_vec(stderr, type, con + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res%c: ", channel);
+ dump_vec(stderr, type, res + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%c: ", channel);
+ dump_vec(stderr, type, ref + j*stride);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+ }
+
+ /*
+ * Unfortunately the output of cycle counter is not very reliable as it comes
+ * -- sometimes we get outliers (due IRQs perhaps?) which are
+ * better removed to avoid random or biased data.
+ */
+ {
+ double sum = 0.0, sum2 = 0.0;
+ double avg, std;
+ unsigned m;
+
+ for(i = 0; i < n; ++i) {
+ sum += cycles[i];
+ sum2 += cycles[i]*cycles[i];
+ }
+
+ avg = sum/n;
+ std = sqrtf((sum2 - n*avg*avg)/n);
+
+ m = 0;
+ sum = 0.0;
+ for(i = 0; i < n; ++i) {
+ if(fabs(cycles[i] - avg) <= 4.0*std) {
+ sum += cycles[i];
+ ++m;
+ }
+ }
+
+ cycles_avg = sum/m;
+
+ }
+
+ if(fp)
+ write_tsv_row(fp, blend, mode, type, cycles_avg, success);
+
+ if (!success) {
+ if(verbose < 2)
+ LLVMDumpModule(module);
+ LLVMWriteBitcodeToFile(module, "blend.bc");
+ fprintf(stderr, "blend.bc written\n");
+ fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
+ abort();
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, func);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+
+const unsigned
+blend_factors[] = {
+ PIPE_BLENDFACTOR_ZERO,
+ PIPE_BLENDFACTOR_ONE,
+ PIPE_BLENDFACTOR_SRC_COLOR,
+ PIPE_BLENDFACTOR_SRC_ALPHA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_CONST_COLOR,
+ PIPE_BLENDFACTOR_CONST_ALPHA,
+#if 0
+ PIPE_BLENDFACTOR_SRC1_COLOR,
+ PIPE_BLENDFACTOR_SRC1_ALPHA,
+#endif
+ PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
+ PIPE_BLENDFACTOR_INV_SRC_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC_ALPHA,
+ PIPE_BLENDFACTOR_INV_DST_COLOR,
+ PIPE_BLENDFACTOR_INV_DST_ALPHA,
+ PIPE_BLENDFACTOR_INV_CONST_COLOR,
+ PIPE_BLENDFACTOR_INV_CONST_ALPHA,
+#if 0
+ PIPE_BLENDFACTOR_INV_SRC1_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
+#endif
+};
+
+
+const unsigned
+blend_funcs[] = {
+ PIPE_BLEND_ADD,
+ PIPE_BLEND_SUBTRACT,
+ PIPE_BLEND_REVERSE_SUBTRACT,
+ PIPE_BLEND_MIN,
+ PIPE_BLEND_MAX
+};
+
+
+const union lp_type blend_types[] = {
+ /* float, fixed, sign, norm, width, len */
+ {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */
+ {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */
+};
+
+
+const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
+const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
+const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ enum vector_mode mode;
+ const union lp_type *type;
+ bool success = TRUE;
+
+ for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
+ for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
+ for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
+ for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
+ for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
+ for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
+ for(mode = 0; mode < 2; ++mode) {
+ for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+ if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ continue;
+
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+ blend.colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ enum vector_mode mode;
+ const union lp_type *type;
+ unsigned long i;
+ bool success = TRUE;
+
+ for(i = 0; i < n; ++i) {
+ rgb_func = &blend_funcs[random() % num_funcs];
+ alpha_func = &blend_funcs[random() % num_funcs];
+ rgb_src_factor = &blend_factors[random() % num_factors];
+ alpha_src_factor = &blend_factors[random() % num_factors];
+
+ do {
+ rgb_dst_factor = &blend_factors[random() % num_factors];
+ } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ do {
+ alpha_dst_factor = &blend_factors[random() % num_factors];
+ } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ mode = random() & 1;
+
+ type = &blend_types[random() % num_types];
+
+ memset(&blend, 0, sizeof blend);
+ blend.blend_enable = 1;
+ blend.rgb_func = *rgb_func;
+ blend.rgb_src_factor = *rgb_src_factor;
+ blend.rgb_dst_factor = *rgb_dst_factor;
+ blend.alpha_func = *alpha_func;
+ blend.alpha_src_factor = *alpha_src_factor;
+ blend.alpha_dst_factor = *alpha_dst_factor;
+ blend.colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+ }
+
+ return success;
+}