diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_quad.c | 8 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_swizzle.c | 176 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_swizzle.h | 8 |
3 files changed, 169 insertions, 23 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index 38fd5a39ef..ca36046d22 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -61,8 +61,8 @@ LLVMValueRef lp_build_ddx(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left); - LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right); + LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left); + LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right); return lp_build_sub(bld, a_right, a_left); } @@ -71,8 +71,8 @@ LLVMValueRef lp_build_ddy(struct lp_build_context *bld, LLVMValueRef a) { - LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top); - LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom); + LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top); + LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom); return lp_build_sub(bld, a_bottom, a_top); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 53705fa082..20cf96ca66 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ - if (n <= 4) { + if (type.width >= 16) { /* * Shuffle. */ @@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld, * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ - struct lp_type type4 = type; + struct lp_type type4; const char shifts[4][2] = { { 1, 2}, {-1, 2}, @@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld, a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), ""); + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + + type4 = type; + type4.floating = FALSE; type4.width *= 4; type4.length /= 4; @@ -176,31 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld, LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]) +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]) { - const unsigned n = bld->type.length; + const struct lp_type type = bld->type; + const unsigned n = type.length; unsigned i, j; - if(a == bld->undef || a == bld->zero || a == bld->one) + if (swizzles[0] == PIPE_SWIZZLE_RED && + swizzles[1] == PIPE_SWIZZLE_GREEN && + swizzles[2] == PIPE_SWIZZLE_BLUE && + swizzles[3] == PIPE_SWIZZLE_ALPHA) { return a; + } - if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3]) - return lp_build_broadcast_aos(bld, a, swizzle[0]); + if (swizzles[0] == swizzles[1] && + swizzles[1] == swizzles[2] && + swizzles[2] == swizzles[3]) { + switch (swizzles[0]) { + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + return lp_build_broadcast_aos(bld, a, swizzles[0]); + case PIPE_SWIZZLE_ZERO: + return bld->zero; + case PIPE_SWIZZLE_ONE: + return bld->one; + default: + assert(0); + return bld->undef; + } + } - { + if (type.width >= 16) { /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type)); + LLVMTypeRef i32t = LLVMInt32Type(); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; + + memset(aux, 0, sizeof aux); + + for(j = 0; j < n; j += 4) { + for(i = 0; i < 4; ++i) { + unsigned shuffle; + switch (swizzles[i]) { + default: + assert(0); + /* fall through */ + case PIPE_SWIZZLE_RED: + case PIPE_SWIZZLE_GREEN: + case PIPE_SWIZZLE_BLUE: + case PIPE_SWIZZLE_ALPHA: + shuffle = j + swizzles[i]; + break; + case PIPE_SWIZZLE_ZERO: + shuffle = type.length + 0; + if (!aux[0]) { + aux[0] = lp_build_const_elem(type, 0.0); + } + break; + case PIPE_SWIZZLE_ONE: + shuffle = type.length + 1; + if (!aux[1]) { + aux[1] = lp_build_const_elem(type, 1.0); + } + break; + } + shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); + } + } - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0); + for (i = 0; i < n; ++i) { + if (!aux[i]) { + aux[i] = undef; + } + } - return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); + return LLVMBuildShuffleVector(bld->builder, a, + LLVMConstVector(aux, n), + LLVMConstVector(shuffles, n), ""); + } else { + /* + * Bit mask and shifts. + * + * For example, this will convert BGRA to RGBA by doing + * + * rgba = (bgra & 0x00ff0000) >> 16 + * | (bgra & 0xff00ff00) + * | (bgra & 0x000000ff) << 16 + * + * This is necessary not only for faster cause, but because X86 backend + * will refuse shuffles of <4 x i8> vectors + */ + LLVMValueRef res; + struct lp_type type4; + boolean cond[4]; + unsigned chan; + int shift; + + /* + * Start with a mixture of 1 and 0. + */ + for (chan = 0; chan < 4; ++chan) { + cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE; + } + res = lp_build_select_aos(bld, bld->one, bld->zero, cond); + + /* + * Build a type where each element is an integer that cover the four + * channels. + */ + type4 = type; + type4.floating = FALSE; + type4.width *= 4; + type4.length /= 4; + + a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), ""); + res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), ""); + + /* + * Mask and shift the channels, trying to group as many channels in the + * same shift as possible + */ + for (shift = -3; shift <= 3; ++shift) { + unsigned long long mask = 0; + + assert(type4.width <= sizeof(mask)*8); + + for (chan = 0; chan < 4; ++chan) { + /* FIXME: big endian */ + if (swizzles[chan] < 4 && + chan - swizzles[chan] == shift) { + mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); + } + } + + if (mask) { + LLVMValueRef masked; + LLVMValueRef shifted; + + if (0) + debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask); + + masked = LLVMBuildAnd(bld->builder, a, + lp_build_const_int_vec(type4, mask), ""); + if (shift > 0) { + shifted = LLVMBuildShl(bld->builder, masked, + lp_build_const_int_vec(type4, shift*type.width), ""); + } else if (shift < 0) { + shifted = LLVMBuildLShr(bld->builder, masked, + lp_build_const_int_vec(type4, -shift*type.width), ""); + } else { + shifted = masked; + } + + res = LLVMBuildOr(bld->builder, res, shifted, ""); + } + } + + return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), ""); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index 509e97c0ae..315e1bcb54 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -68,12 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld, /** * Swizzle a vector consisting of an array of XYZW structs. * - * @param swizzle is the in [0,4[ range. + * @param swizzles is the in [0,4[ range. */ LLVMValueRef -lp_build_swizzle1_aos(struct lp_build_context *bld, - LLVMValueRef a, - const unsigned char swizzle[4]); +lp_build_swizzle_aos(struct lp_build_context *bld, + LLVMValueRef a, + const unsigned char swizzles[4]); LLVMValueRef |