summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore1
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile9
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c466
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c243
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h80
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c205
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_query.c72
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c119
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.cpp62
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.h50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.c418
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.h95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c375
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c60
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c24
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.c82
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.c931
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py278
45 files changed, 2418 insertions, 1902 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
new file mode 100644
index 0000000000..257b72d7b2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -0,0 +1 @@
+lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index cd7b6356d2..cdf318844c 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -17,10 +17,13 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_query.c \
lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_pack.c \
+ lp_bld_sample.c \
lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
@@ -54,4 +57,10 @@ C_SOURCES = \
lp_tile_cache.c \
lp_tile_soa.c
+CPP_SOURCES = \
+ lp_bld_misc.cpp
+
include ../../Makefile.template
+
+lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
+ python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index f4a9a3b22e..f4410f8201 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,13 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
+env.CodeGenerate(
+ target = 'lp_tile_soa.c',
+ script = 'lp_tile_soa.py',
+ source = ['#src/gallium/auxiliary/util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
llvmpipe = env.ConvenienceLibrary(
target = 'llvmpipe',
source = [
@@ -23,9 +30,13 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_query.c',
'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
+ 'lp_bld_misc.cpp',
+ 'lp_bld_pack.c',
+ 'lp_bld_sample.c',
'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_logic.c',
@@ -68,7 +79,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries)
env.Program(
target = 'lp_test_format',
- source = ['lp_test_format.c'],
+ source = ['lp_test_format.c', 'lp_test_main.c'],
)
env.Program(
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 31433318a7..9c59677a74 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,12 +47,16 @@
#include "util/u_memory.h"
#include "util/u_debug.h"
+#include "util/u_math.h"
#include "util/u_string.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
+#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -71,30 +75,28 @@ lp_build_min_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating) {
- if(type.width == 32)
+ if(type.width == 32 && util_cpu_caps.has_sse)
intrinsic = "llvm.x86.sse.min.ps";
- if(type.width == 64)
+ if(type.width == 64 && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.min.pd";
}
else {
- if(type.width == 8 && !type.sign)
+ if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pminu.b";
- if(type.width == 8 && type.sign)
+ if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminsb";
- if(type.width == 16 && !type.sign)
+ if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminuw";
- if(type.width == 16 && type.sign)
+ if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmins.w";
- if(type.width == 32 && !type.sign)
+ if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminud";
- if(type.width == 32 && type.sign)
+ if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminsd";
}
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -119,30 +121,28 @@ lp_build_max_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating) {
- if(type.width == 32)
+ if(type.width == 32 && util_cpu_caps.has_sse)
intrinsic = "llvm.x86.sse.max.ps";
- if(type.width == 64)
+ if(type.width == 64 && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.max.pd";
}
else {
- if(type.width == 8 && !type.sign)
+ if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmaxu.b";
- if(type.width == 8 && type.sign)
+ if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxsb";
- if(type.width == 16 && !type.sign)
+ if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxuw";
- if(type.width == 16 && type.sign)
+ if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmaxs.w";
- if(type.width == 32 && !type.sign)
+ if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxud";
- if(type.width == 32 && type.sign)
+ if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxsd";
}
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -204,15 +204,14 @@ lp_build_add(struct lp_build_context *bld,
if(a == bld->one || b == bld->one)
return bld->one;
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width * type.length == 128 &&
+ if(util_cpu_caps.has_sse2 &&
+ type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -257,15 +256,14 @@ lp_build_sub(struct lp_build_context *bld,
if(b == bld->one)
return bld->zero;
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width * type.length == 128 &&
+ if(util_cpu_caps.has_sse2 &&
+ type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -284,45 +282,6 @@ lp_build_sub(struct lp_build_context *bld,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build constant int vector of width 'n' and value 'c'.
- */
-static LLVMValueRef
-lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(type, c, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
* Normalized 8bit multiplication.
*
* - alpha plus one
@@ -365,33 +324,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
*/
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
+ struct lp_type i16_type,
LLVMValueRef a, LLVMValueRef b)
{
- static LLVMValueRef c01 = NULL;
- static LLVMValueRef c08 = NULL;
- static LLVMValueRef c80 = NULL;
+ LLVMValueRef c8;
LLVMValueRef ab;
- if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
- if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
- if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
+ c8 = lp_build_int_const_scalar(i16_type, 8);
#if 0
/* a*b/255 ~= (a*(b + 1)) >> 256 */
- b = LLVMBuildAdd(builder, b, c01, "");
+ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
ab = LLVMBuildMul(builder, a, b, "");
#else
- /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
+ /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
ab = LLVMBuildMul(builder, a, b, "");
- ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
- ab = LLVMBuildAdd(builder, ab, c80, "");
+ ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
+ ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
#endif
- ab = LLVMBuildLShr(builder, ab, c08, "");
+ ab = LLVMBuildLShr(builder, ab, c8, "");
return ab;
}
@@ -406,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b)
{
const struct lp_type type = bld->type;
+ LLVMValueRef shift;
+ LLVMValueRef res;
if(a == bld->zero)
return bld->zero;
@@ -419,53 +377,104 @@ lp_build_mul(struct lp_build_context *bld,
return bld->undef;
if(!type.floating && !type.fixed && type.norm) {
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 8 && type.length == 16) {
- LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
- LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
- static LLVMValueRef ml = NULL;
- static LLVMValueRef mh = NULL;
- LLVMValueRef al, ah, bl, bh;
- LLVMValueRef abl, abh;
- LLVMValueRef ab;
-
- if(!ml) ml = lp_build_unpack_shuffle(16, 0);
- if(!mh) mh = lp_build_unpack_shuffle(16, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
- bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
- ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
- bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
+ if(type.width == 8) {
+ struct lp_type i16_type = lp_wider_type(type);
+ LLVMValueRef al, ah, bl, bh, abl, abh, ab;
- /* NOP */
- al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
- bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
- ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
- bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
+ lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
+ lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);
/* PMULLW, PSRLW, PADDW */
- abl = lp_build_mul_u8n(bld->builder, al, bl);
- abh = lp_build_mul_u8n(bld->builder, ah, bh);
+ abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
+ abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);
- /* PACKUSWB */
- ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
-
- /* NOP */
- ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
+ ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
return ab;
}
-#endif
/* FIXME */
assert(0);
}
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstMul(a, b);
+ if(type.fixed)
+ shift = lp_build_int_const_scalar(type, type.width/2);
+ else
+ shift = NULL;
+
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ res = LLVMConstMul(a, b);
+ if(shift) {
+ if(type.sign)
+ res = LLVMConstAShr(res, shift);
+ else
+ res = LLVMConstLShr(res, shift);
+ }
+ }
+ else {
+ res = LLVMBuildMul(bld->builder, a, b, "");
+ if(shift) {
+ if(type.sign)
+ res = LLVMBuildAShr(bld->builder, res, shift, "");
+ else
+ res = LLVMBuildLShr(bld->builder, res, shift, "");
+ }
+ }
+
+ return res;
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b)
+{
+ LLVMValueRef factor;
+
+ if(b == 0)
+ return bld->zero;
+
+ if(b == 1)
+ return a;
- return LLVMBuildMul(bld->builder, a, b, "");
+ if(b == -1)
+ return LLVMBuildNeg(bld->builder, a, "");
+
+ if(b == 2 && bld->type.floating)
+ return lp_build_add(bld, a, a);
+
+ if(util_is_pot(b)) {
+ unsigned shift = ffs(b) - 1;
+
+ if(bld->type.floating) {
+#if 0
+ /*
+ * Power of two multiplication by directly manipulating the mantissa.
+ *
+ * XXX: This might not be always faster, it will introduce a small error
+ * for multiplication by zero, and it will produce wrong results
+ * for Inf and NaN.
+ */
+ unsigned mantissa = lp_mantissa(bld->type);
+ factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+ a = LLVMBuildAdd(bld->builder, a, factor, "");
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+ return a;
+#endif
+ }
+ else {
+ factor = lp_build_const_scalar(bld->type, shift);
+ return LLVMBuildShl(bld->builder, a, factor, "");
+ }
+ }
+
+ factor = lp_build_const_scalar(bld->type, (double)b);
+ return lp_build_mul(bld, a, factor);
}
@@ -493,22 +502,43 @@ lp_build_div(struct lp_build_context *bld,
if(LLVMIsConstant(a) && LLVMIsConstant(b))
return LLVMConstFDiv(a, b);
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
-#endif
return LLVMBuildFDiv(bld->builder, a, b, "");
}
+/**
+ * Linear interpolation.
+ *
+ * This also works for integer values with a few caveats.
+ *
+ * @sa http://www.stereopsis.com/doubleblend.html
+ */
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
- return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+ LLVMValueRef delta;
+ LLVMValueRef res;
+
+ delta = lp_build_sub(bld, v1, v0);
+
+ res = lp_build_mul(bld, x, delta);
+
+ res = lp_build_add(bld, v0, res);
+
+ if(bld->type.fixed)
+ /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
+ * but it will be wrong for other uses. Basically we need a more
+ * powerful lp_type, capable of further distinguishing the values
+ * interpretation from the value storage. */
+ res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+
+ return res;
}
@@ -606,8 +636,7 @@ lp_build_abs(struct lp_build_context *bld,
return a;
}
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width*type.length == 128) {
+ if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
@@ -617,7 +646,6 @@ lp_build_abs(struct lp_build_context *bld,
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
}
}
-#endif
return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
}
@@ -684,6 +712,8 @@ lp_build_round_sse41(struct lp_build_context *bld,
assert(type.floating);
assert(type.width*type.length == 128);
+ assert(lp_check_value(type, a));
+ assert(util_cpu_caps.has_sse4_1);
switch(type.width) {
case 32:
@@ -703,20 +733,45 @@ lp_build_round_sse41(struct lp_build_context *bld,
LLVMValueRef
-lp_build_round(struct lp_build_context *bld,
+lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
assert(type.floating);
+ assert(lp_check_value(type, a));
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
-#endif
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
+ res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
+}
- /* FIXME */
- assert(0);
- return bld->undef;
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_iround(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
}
@@ -728,13 +783,15 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
-#endif
-
- /* FIXME */
- assert(0);
- return bld->undef;
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_ifloor(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
}
@@ -745,59 +802,143 @@ lp_build_ceil(struct lp_build_context *bld,
const struct lp_type type = bld->type;
assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_iceil(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
+}
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
-#endif
- /* FIXME */
- assert(0);
- return bld->undef;
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_itrunc(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
}
LLVMValueRef
-lp_build_trunc(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_iround(struct lp_build_context *bld,
+ LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
assert(type.floating);
+ assert(lp_check_value(type, a));
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
-#endif
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef half;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = lp_build_const_scalar(type, 0.5);
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+
+ res = LLVMBuildAdd(bld->builder, a, half, "");
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
- /* FIXME */
- assert(0);
- return bld->undef;
+ return res;
}
-/**
- * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating to zero.
- */
LLVMValueRef
-lp_build_int(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
assert(type.floating);
+ assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
+ else {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? -0.99999(9)f : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
+
+ res = LLVMBuildAdd(bld->builder, a, offset, "");
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+
+ return res;
}
LLVMValueRef
-lp_build_ifloor(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_iceil(struct lp_build_context *bld,
+ LLVMValueRef a)
{
- a = lp_build_floor(bld, a);
- a = lp_build_int(bld, a);
- return a;
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
+ else {
+ assert(0);
+ res = bld->undef;
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+
+ return res;
}
@@ -837,11 +978,9 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- /* XXX: is this really necessary? */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
+ /* FIXME: improve precision */
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}
@@ -858,11 +997,8 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
- /* XXX: is this really necessary? */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
-#endif
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index d68a97c4b8..62be4b9aee 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
@@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b);
+
+LLVMValueRef
lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -126,11 +131,18 @@ lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
-lp_build_int(struct lp_build_context *bld,
- LLVMValueRef a);
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a);
+LLVMValueRef
+lp_build_iceil(struct lp_build_context *bld,
+ LLVMValueRef a);
LLVMValueRef
-lp_build_ifloor(struct lp_build_context *bld,
+lp_build_iround(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index ffb302f736..cb8e1c7b00 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,7 +42,7 @@
#include <pipe/p_compiler.h>
-struct lp_type type;
+struct lp_type;
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index 186cac70f6..9935209437 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -63,11 +63,13 @@
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
#include "lp_bld_conv.h"
@@ -198,243 +200,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- /* TODO: cache results in a static table */
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build shuffle vectors that match PACKxx instructions.
- */
-static LLVMValueRef
-lp_build_const_pack_shuffle(unsigned n)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- /* TODO: cache results in a static table */
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Expand the bit width.
- *
- * This will only change the number of bits the values are represented, not the
- * values themselves.
- */
-static void
-lp_build_expand(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- LLVMValueRef src,
- LLVMValueRef *dst, unsigned num_dsts)
-{
- unsigned num_tmps;
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length == dst_type.length * num_dsts);
-
- num_tmps = 1;
- dst[0] = src;
-
- while(src_type.width < dst_type.width) {
- struct lp_type new_type = src_type;
- LLVMTypeRef new_vec_type;
-
- new_type.width *= 2;
- new_type.length /= 2;
- new_vec_type = lp_build_vec_type(new_type);
-
- for(i = num_tmps; i--; ) {
- LLVMValueRef zero;
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
- LLVMValueRef lo;
- LLVMValueRef hi;
-
- zero = lp_build_zero(src_type);
- shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0);
- shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, "");
- hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, "");
-
- dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, "");
- dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, "");
- }
-
- src_type = new_type;
-
- num_tmps *= 2;
- }
-
- assert(num_tmps == num_dsts);
-}
-
-
-/**
- * Non-interleaved pack.
- *
- * This will move values as
- *
- * lo = __ l0 __ l1 __ l2 __.. __ ln
- * hi = __ h0 __ h1 __ h2 __.. __ hn
- * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
- *
- * TODO: handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack2(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- LLVMValueRef lo,
- LLVMValueRef hi)
-{
- LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
- LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
- LLVMValueRef shuffle;
- LLVMValueRef res;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * 2 == dst_type.length);
-
- assert(!src_type.floating);
- assert(!dst_type.floating);
-
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(src_type.width * src_type.length == 128) {
- /* All X86 non-interleaved pack instructions all take signed inputs and
- * saturate them, so saturate beforehand. */
- if(!src_type.sign && !clamped) {
- struct lp_build_context bld;
- unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
- LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
- lp_build_context_init(&bld, builder, src_type);
- lo = lp_build_min(&bld, lo, dst_max);
- hi = lp_build_min(&bld, hi, dst_max);
- }
-
- switch(src_type.width) {
- case 32:
- if(dst_type.sign)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
- else
- /* PACKUSDW is the only instrinsic with a consistent signature */
- return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
- break;
-
- case 16:
- if(dst_type.sign)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
- else
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
- break;
-
- default:
- assert(0);
- return LLVMGetUndef(dst_vec_type);
- break;
- }
-
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
- }
-#endif
-
- lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
- hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
-
- shuffle = lp_build_const_pack_shuffle(dst_type.length);
-
- res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
-
- return res;
-}
-
-
-/**
- * Truncate the bit width.
- *
- * TODO: Handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
-{
- LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * num_srcs == dst_type.length);
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = src[i];
-
- while(src_type.width > dst_type.width) {
- struct lp_type new_type = src_type;
-
- new_type.width /= 2;
- new_type.length *= 2;
-
- /* Take in consideration the sign changes only in the last step */
- if(new_type.width == dst_type.width)
- new_type.sign = dst_type.sign;
-
- num_srcs /= 2;
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped,
- tmp[2*i + 0], tmp[2*i + 1]);
-
- src_type = new_type;
- }
-
- assert(num_srcs == 1);
-
- return tmp[0];
-}
-
-
-/**
* Generic type conversion.
*
* TODO: Take a precision argument, or even better, add a new precision member
@@ -573,7 +338,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width < dst_type.width) {
assert(num_tmps == 1);
- lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
+ lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = num_dsts;
@@ -693,7 +458,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
- lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts);
+ lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts);
}
else {
assert(num_srcs == num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index ca378804d2..948e68fae4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 21c665c4d4..98ec1cb1b9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -210,7 +210,4 @@ lp_build_depth_test(LLVMBuilderRef builder,
dst = lp_build_select(&bld, z_bitmask, src, dst);
LLVMBuildStore(builder, dst, dst_ptr);
}
-
- /* FIXME */
- assert(!state->occlusion_count);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 6d3f692619..970bee379f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -25,8 +25,8 @@
*
**************************************************************************/
-#ifndef LP_BLD_H
-#define LP_BLD_H
+#ifndef LP_BLD_FORMAT_H
+#define LP_BLD_FORMAT_H
/**
@@ -42,62 +42,34 @@ struct util_format_description;
struct lp_type;
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc);
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-LLVMValueRef
-lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled);
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed);
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed);
+
LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef rgba);
void
@@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef *rgba);
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba);
-
-#endif /* !LP_BLD_H */
+#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index b9b5d84bed..5836e0173f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -25,18 +25,39 @@
*
**************************************************************************/
+/**
+ * @file
+ * AoS pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
#include "lp_bld_format.h"
+/**
+ * Unpack a single pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef packed)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
@@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i;
- desc = util_format_description(format);
-
/* FIXME: Support more formats */
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
@@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
}
+/**
+ * Take a vector with packed pixels and unpack into a rgba8 vector.
+ *
+ * Formats with bit depth smaller than 32bits are accepted, but they must be
+ * padded to 32bits.
+ */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed)
+{
+ struct lp_build_context bld;
+ bool rgba8;
+ LLVMValueRef res;
+ unsigned i;
+
+ lp_build_context_init(&bld, builder, type);
+
+ /* FIXME: Support more formats */
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->block.width == 1);
+ assert(desc->block.height == 1);
+ assert(desc->block.bits <= 32);
+
+ assert(!type.floating);
+ assert(!type.fixed);
+ assert(type.norm);
+ assert(type.width == 8);
+ assert(type.length % 4 == 0);
+
+ rgba8 = TRUE;
+ for(i = 0; i < 4; ++i) {
+ assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
+ if(desc->channel[0].size != 8)
+ rgba8 = FALSE;
+ }
+
+ if(rgba8) {
+ /*
+ * The pixel is already in a rgba8 format variant. All it is necessary
+ * is to swizzle the channels.
+ */
+
+ unsigned char swizzles[4];
+ boolean zeros[4]; /* bitwise AND mask */
+ boolean ones[4]; /* bitwise OR mask */
+ boolean swizzles_needed = FALSE;
+ boolean zeros_needed = FALSE;
+ boolean ones_needed = FALSE;
+
+ for(i = 0; i < 4; ++i) {
+ enum util_format_swizzle swizzle = desc->swizzle[i];
+
+ /* Initialize with the no-op case */
+ swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
+ zeros[i] = TRUE;
+ ones[i] = FALSE;
+
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ if(swizzle != swizzles[i]) {
+ swizzles[i] = swizzle;
+ swizzles_needed = TRUE;
+ }
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ zeros[i] = FALSE;
+ zeros_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ ones[i] = TRUE;
+ ones_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ assert(0);
+ break;
+ }
+ }
+
+ res = packed;
+
+ if(swizzles_needed)
+ res = lp_build_swizzle1_aos(&bld, res, swizzles);
+
+ if(zeros_needed) {
+ /* Mask out zero channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
+ res = LLVMBuildAnd(builder, res, mask, "");
+ }
+
+ if(ones_needed) {
+ /* Or one channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
+ res = LLVMBuildOr(builder, res, mask, "");
+ }
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ res = lp_build_undef(type);
+ }
+
+ return res;
+}
+
+
+/**
+ * Pack a single pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef rgba)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef packed = NULL;
LLVMValueRef swizzles[4];
@@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i, j;
- desc = util_format_description(format);
-
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
@@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
return packed;
}
-
-
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- /* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
- assert(desc->block.bits <= 32);
-
- type = LLVMIntType(desc->block.bits);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- packed = LLVMBuildLoad(builder, ptr, "");
-
- return lp_build_unpack_rgba_aos(builder, format, packed);
-}
-
-
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
-
- type = LLVMIntType(desc->block.bits);
-
- packed = lp_build_pack_rgba_aos(builder, format, rgba);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
new file mode 100644
index 0000000000..f3832d07ff
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Utility functions to make assertions about formats.
+ *
+ * This module centralizes most of logic used when determining what algorithm
+ * is most suitable (i.e., most efficient yet correct) for a given format.
+ *
+ * It might be possible to move some of these functions to u_format module,
+ * but since tiny differences in the format my render it more/less
+ * appropriate to a given algorithm it is impossible to make any long term
+ * guarantee about the semantics of these functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+/**
+ * Whether this format is a 4 rgba8 variant
+ */
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc)
+{
+ unsigned chan;
+
+ if(desc->block.width != 1 ||
+ desc->block.height != 1 ||
+ desc->block.bits != 32)
+ return FALSE;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+ return FALSE;
+ if(desc->channel[chan].size != 8)
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index b5ff434e1a..64151d169d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -34,66 +34,17 @@
#include "lp_bld_format.h"
-/**
- * Gather elements from scatter positions in memory into a single vector.
- *
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
-
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
-
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
-
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
- }
-
- return res;
-}
-
-
static LLVMValueRef
-lp_build_format_swizzle(struct lp_type type,
- const LLVMValueRef *inputs,
- enum util_format_swizzle swizzle)
+lp_build_format_swizzle_chan_soa(struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ enum util_format_swizzle swizzle)
{
switch (swizzle) {
case UTIL_FORMAT_SWIZZLE_X:
case UTIL_FORMAT_SWIZZLE_Y:
case UTIL_FORMAT_SWIZZLE_Z:
case UTIL_FORMAT_SWIZZLE_W:
- return inputs[swizzle];
+ return unswizzled[swizzle];
case UTIL_FORMAT_SWIZZLE_0:
return lp_build_zero(type);
case UTIL_FORMAT_SWIZZLE_1:
@@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type,
void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled)
+{
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ swizzled[2] = swizzled[1] = swizzled[0] = depth;
+ swizzled[3] = lp_build_one(type);
+ }
+ else {
+ unsigned chan;
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ }
+ }
+}
+
+
+void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
struct lp_type type,
@@ -119,7 +92,9 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
unsigned chan;
/* FIXME: Support more formats */
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
+ (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY &&
+ format_desc->block.bits == format_desc->channel[0].size));
assert(format_desc->block.width == 1);
assert(format_desc->block.height == 1);
assert(format_desc->block.bits <= 32);
@@ -170,39 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
start = stop;
}
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- enum util_format_swizzle swizzle = format_desc->swizzle[0];
- LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
- rgba[2] = rgba[1] = rgba[0] = depth;
- rgba[3] = lp_build_one(type);
- }
- else {
- for (chan = 0; chan < 4; ++chan) {
- enum util_format_swizzle swizzle = format_desc->swizzle[chan];
- rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
- }
- }
-}
-
-
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba)
-{
- LLVMValueRef packed;
-
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(format_desc->block.width == 1);
- assert(format_desc->block.height == 1);
- assert(format_desc->block.bits <= 32);
-
- packed = lp_build_gather(builder,
- type.length, format_desc->block.bits, type.width,
- base_ptr, offsets);
-
- lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+ lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca6d1..818c0e943e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
/**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
- LLVMValueRef coeff,
- int step)
-{
- LLVMValueRef factor;
-
- switch(step) {
- case 0:
- return bld->base.zero;
- case 1:
- return coeff;
- case 2:
- return lp_build_add(&bld->base, coeff, coeff);
- default:
- factor = lp_build_const_scalar(bld->base.type, (double)step);
- return lp_build_mul(&bld->base, coeff, factor);
- }
-}
-
-
-/**
* Multiply the dadx and dady with the xstep and ystep respectively.
*/
static void
@@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
if (mode != TGSI_INTERPOLATE_CONSTANT) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
if(mask & (1 << chan)) {
- bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
- bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+ bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+ bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
}
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 6b6f820769..db22a8028a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -33,6 +33,8 @@
*/
+#include "util/u_cpu_detect.h"
+
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
@@ -65,7 +67,7 @@ lp_build_cmp(struct lp_build_context *bld,
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
- if(type.floating) {
+ if(type.floating && util_cpu_caps.has_sse) {
LLVMValueRef args[3];
unsigned cc;
boolean swap;
@@ -114,7 +116,7 @@ lp_build_cmp(struct lp_build_context *bld,
res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
return res;
}
- else {
+ else if(util_cpu_caps.has_sse2) {
static const struct {
unsigned swap:1;
unsigned eq:1;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index a4ee7723b5..d67500ef70 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
new file mode 100644
index 0000000000..c9acaf1f16
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "llvm/Config/config.h"
+
+#include "pipe/p_config.h"
+
+#include "lp_bld_misc.h"
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+namespace llvm {
+ extern void LinkInJIT();
+}
+
+
+void
+LLVMLinkInJIT(void)
+{
+ llvm::LinkInJIT();
+}
+
+
+extern "C" int X86TargetMachineModule;
+
+
+void
+LLVMInitializeNativeTarget(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ X86TargetMachineModule = 1;
+#endif
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
new file mode 100644
index 0000000000..51a84c5e25
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_MISC_H
+#define LP_BLD_MISC_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+LLVMLinkInJIT(void);
+
+void
+LLVMInitializeNativeTarget(void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* !LP_BLD_MISC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
new file mode 100644
index 0000000000..bc360ad77a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -0,0 +1,418 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for packing/unpacking.
+ *
+ * Pack/unpacking is necessary for conversion between types of different
+ * bit width.
+ *
+ * They are also commonly used when an computation needs higher
+ * precision for the intermediate values. For example, if one needs the
+ * function:
+ *
+ * c = compute(a, b);
+ *
+ * to use more precision for intermediate results then one should implement it
+ * as:
+ *
+ * LLVMValueRef
+ * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b)
+ * {
+ * struct lp_type wide_type = lp_wider_type(type);
+ * LLVMValueRef al, ah, bl, bh, cl, ch, c;
+ *
+ * lp_build_unpack2(builder, type, wide_type, a, &al, &ah);
+ * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh);
+ *
+ * cl = compute_half(al, bl);
+ * ch = compute_half(ah, bh);
+ *
+ * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch);
+ *
+ * return c;
+ * }
+ *
+ * where compute_half() would do the computation for half the elements with
+ * twice the precision.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
+
+
+/**
+ * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i, j;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+ assert(lo_hi < 2);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
+ elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
+ elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
+ }
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Build shuffle vectors that match PACKxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_pack_shuffle(unsigned n)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0; i < n; ++i)
+ elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Interleave vector elements.
+ *
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ */
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle;
+
+ shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi);
+
+ return LLVMBuildShuffleVector(builder, a, b, shuffle, "");
+}
+
+
+/**
+ * Double the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi)
+{
+ LLVMValueRef msb;
+ LLVMTypeRef dst_vec_type;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(dst_type.width == src_type.width * 2);
+ assert(dst_type.length * 2 == src_type.length);
+
+ if(dst_type.sign && src_type.sign) {
+ /* Replicate the sign bit in the most significant bits */
+ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
+ }
+ else
+ /* Most significant bits always zero */
+ msb = lp_build_zero(src_type);
+
+ /* Interleave bits */
+ if(util_cpu_caps.little_endian) {
+ *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
+ }
+ else {
+ *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
+ }
+
+ /* Cast the result into the new type (twice as wide) */
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
+ *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
+}
+
+
+/**
+ * Expand the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ unsigned num_tmps;
+ unsigned i;
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length == dst_type.length * num_dsts);
+
+ num_tmps = 1;
+ dst[0] = src;
+
+ while(src_type.width < dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width *= 2;
+ tmp_type.length /= 2;
+
+ for(i = num_tmps; i--; ) {
+ lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]);
+ }
+
+ src_type = tmp_type;
+
+ num_tmps *= 2;
+ }
+
+ assert(num_tmps == num_dsts);
+}
+
+
+/**
+ * Non-interleaved pack.
+ *
+ * This will move values as
+ *
+ * lo = __ l0 __ l1 __ l2 __.. __ ln
+ * hi = __ h0 __ h1 __ h2 __.. __ hn
+ * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ *
+ * It is assumed the values are already clamped into the destination type range.
+ * Values outside that range will produce undefined results. Use
+ * lp_build_packs2 instead.
+ */
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+ LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
+ LLVMValueRef shuffle;
+ LLVMValueRef res;
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
+ switch(src_type.width) {
+ case 32:
+ if(dst_type.sign) {
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+ }
+ else {
+ if (util_cpu_caps.has_sse4_1) {
+ /* PACKUSDW is the only instrinsic with a consistent signature */
+ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
+ }
+ else {
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ }
+ }
+ break;
+
+ case 16:
+ if(dst_type.sign)
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+ else
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+ break;
+
+ default:
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ break;
+ }
+
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
+
+ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
+ hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
+
+ shuffle = lp_build_const_pack_shuffle(dst_type.length);
+
+ res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
+
+ return res;
+}
+
+
+
+/**
+ * Non-interleaved pack and saturate.
+ *
+ * Same as lp_build_pack2 but will saturate values so that they fit into the
+ * destination type.
+ */
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ boolean clamp;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.sign == dst_type.sign);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ clamp = TRUE;
+
+ /* All X86 SSE non-interleaved pack instructions take signed inputs and
+ * saturate them, so no need to clamp for those cases. */
+ if(util_cpu_caps.has_sse2 &&
+ src_type.width * src_type.length == 128 &&
+ src_type.sign)
+ clamp = FALSE;
+
+ if(clamp) {
+ struct lp_build_context bld;
+ unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
+ LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
+ lp_build_context_init(&bld, builder, src_type);
+ lo = lp_build_min(&bld, lo, dst_max);
+ hi = lp_build_min(&bld, hi, dst_max);
+ /* FIXME: What about lower bound? */
+ }
+
+ return lp_build_pack2(builder, src_type, dst_type, lo, hi);
+}
+
+
+/**
+ * Truncate the bit width.
+ *
+ * TODO: Handle saturation consistently.
+ */
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
+{
+ LLVMValueRef (*pack2)(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length);
+
+ if(clamped)
+ pack2 = &lp_build_pack2;
+ else
+ pack2 = &lp_build_packs2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = src[i];
+
+ while(src_type.width > dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width /= 2;
+ tmp_type.length *= 2;
+
+ /* Take in consideration the sign changes only in the last step */
+ if(tmp_type.width == dst_type.width)
+ tmp_type.sign = dst_type.sign;
+
+ num_srcs /= 2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]);
+
+ src_type = tmp_type;
+ }
+
+ assert(num_srcs == 1);
+
+ return tmp[0];
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
new file mode 100644
index 0000000000..fb2a34984a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for packing/unpacking conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_PACK_H
+#define LP_BLD_PACK_H
+
+
+#include <llvm-c/Core.h>
+
+
+struct lp_type;
+
+
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi);
+
+
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi);
+
+
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs);
+
+
+#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
new file mode 100644
index 0000000000..4d272bea87
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- common code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width[0]);
+ state->pot_height = util_is_pot(texture->height[0]);
+ state->pot_depth = util_is_pot(texture->depth[0]);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ if(sampler->compare_mode) {
+ state->compare_mode = sampler->compare_mode;
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Compute the offset of a pixel.
+ *
+ * x, y, y_stride are vectors
+ */
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
+
+ x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(bld, x, x_stride);
+ y_offset = lp_build_mul(bld, y, y_stride);
+
+ offset = lp_build_add(bld, x_offset, y_offset);
+ }
+
+ return offset;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 403d0e4836..8cb8210ca7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,9 @@
struct pipe_texture;
struct pipe_sampler_state;
+struct util_format_description;
struct lp_type;
+struct lp_build_context;
/**
@@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr);
+
+
void
lp_build_sample_soa(LLVMBuilderRef builder,
const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 8ca1be6f1b..47b68b71e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
/**
* @file
- * Texture sampling.
+ * Texture sampling -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
@@ -35,54 +35,23 @@
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_debug.h"
+#include "util/u_debug_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
- const struct pipe_texture *texture,
- const struct pipe_sampler_state *sampler)
-{
- memset(state, 0, sizeof *state);
-
- if(!texture)
- return;
-
- if(!sampler)
- return;
-
- state->format = texture->format;
- state->target = texture->target;
- state->pot_width = util_is_pot(texture->width[0]);
- state->pot_height = util_is_pot(texture->height[0]);
- state->pot_depth = util_is_pot(texture->depth[0]);
-
- state->wrap_s = sampler->wrap_s;
- state->wrap_t = sampler->wrap_t;
- state->wrap_r = sampler->wrap_r;
- state->min_img_filter = sampler->min_img_filter;
- state->min_mip_filter = sampler->min_mip_filter;
- state->mag_img_filter = sampler->mag_img_filter;
- if(sampler->compare_mode) {
- state->compare_mode = sampler->compare_mode;
- state->compare_func = sampler->compare_func;
- }
- state->normalized_coords = sampler->normalized_coords;
- state->prefilter = sampler->prefilter;
-}
-
-
-
/**
* Keep all information for sampling code generation in a single place.
*/
@@ -111,66 +80,61 @@ struct lp_build_sample_context
static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
{
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef x_stride;
LLVMValueRef offset;
+ LLVMValueRef packed;
+
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
+
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+ packed = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+
+ lp_build_unpack_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ packed, texel);
+}
- x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
- if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- LLVMValueRef x_lo, x_hi;
- LLVMValueRef y_lo, y_hi;
- LLVMValueRef x_stride_lo, x_stride_hi;
- LLVMValueRef y_stride_lo, y_stride_hi;
- LLVMValueRef x_offset_lo, x_offset_hi;
- LLVMValueRef y_offset_lo, y_offset_hi;
- LLVMValueRef offset_lo, offset_hi;
-
- x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
- y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
- x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
- y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
- x_stride_lo = x_stride;
- y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
- x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
- y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
- x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
- y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
- offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
- x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
- y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
- offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
- offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
- }
- else {
- LLVMValueRef x_offset;
- LLVMValueRef y_offset;
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef offset;
- x_offset = lp_build_mul(int_coord_bld, x, x_stride);
- y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
- offset = lp_build_add(int_coord_bld, x_offset, y_offset);
- }
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
- lp_build_load_rgba_soa(bld->builder,
- bld->format_desc,
- bld->texel_type,
- data_ptr,
- offset,
- texel);
+ return lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
}
@@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* FIXME */
- _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+ _debug_printf("warning: failed to translate texture wrap mode %s\n",
+ debug_dump_tex_wrap(wrap_mode, TRUE));
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
break;
@@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+ lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
}
@@ -274,8 +239,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
- x0 = lp_build_int(&bld->coord_bld, s_ipart);
- y0 = lp_build_int(&bld->coord_bld, t_ipart);
+ x0 = lp_build_itrunc(&bld->coord_bld, s_ipart);
+ y0 = lp_build_itrunc(&bld->coord_bld, t_ipart);
x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
@@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
- lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
- lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
- lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+ lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
/* TODO: Don't interpolate missing channels */
for(chan = 0; chan < 4; ++chan) {
@@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+ unsigned chan;
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+ if(stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+ LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2];
+ LLVMValueRef neighbors_lo[2][2];
+ LLVMValueRef neighbors_hi[2][2];
+ LLVMValueRef packed, packed_lo, packed_hi;
+ LLVMValueRef unswizzled[4];
+
+ lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+ i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+ i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+ i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+ s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+ t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+ x0 = s_ipart;
+ y0 = t_ipart;
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ /*
+ * Transform 4 x i32 in
+ *
+ * s_fpart = {s0, s1, s2, s3}
+ *
+ * into 8 x i16
+ *
+ * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+ *
+ * into two 8 x i16
+ *
+ * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+ * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+ *
+ * and likewise for t_fpart. There is no risk of loosing precision here
+ * since the fractional parts only use the lower 8bits.
+ */
+
+ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+ t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+ {
+ LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffle_lo;
+ LLVMValueRef shuffle_hi;
+ unsigned i, j;
+
+ for(j = 0; j < h16.type.length; j += 4) {
+ unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+ LLVMValueRef index;
+
+ index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_lo[j + i] = index;
+
+ index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_hi[j + i] = index;
+ }
+
+ shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+ shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+ s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+ t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+ s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+ t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+
+ neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+ neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+ neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+ neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+ neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+ neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+ neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+ neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+ /*
+ * Linear interpolate with 8.8 fixed point.
+ */
+
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0],
+ neighbors_lo[0][1],
+ neighbors_lo[1][0],
+ neighbors_lo[1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0],
+ neighbors_hi[0][1],
+ neighbors_hi[1][0],
+ neighbors_hi[1][1]);
+
+ packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+ /*
+ * Convert to SoA and swizzle.
+ */
+
+ packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+ lp_build_rgba8_to_f32_soa(bld->builder,
+ bld->texel_type,
+ packed, unswizzled);
+
+ lp_build_format_swizzle_soa(bld->format_desc,
+ bld->texel_type, unswizzled,
+ texel);
+}
+
+
+static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
LLVMValueRef *texel)
@@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ if(lp_format_is_rgba8(bld.format_desc))
+ lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+ else
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index 1f6da80448..b9472127a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index adc81569ed..64027de6aa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -158,14 +158,14 @@ emit_fetch(
const unsigned chan_index )
{
const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
switch (swizzle) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
switch (reg->SrcRegister.File) {
case TGSI_FILE_CONSTANT: {
@@ -198,14 +198,6 @@ emit_fetch(
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- res = bld->base.zero;
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- res = bld->base.one;
- break;
-
default:
assert( 0 );
return bld->base.undef;
@@ -394,12 +386,7 @@ emit_kil(
unsigned swizzle;
/* Unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
- /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
- * not to be tested. */
- if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
- continue;
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
/* Check if the component has not been already tested. */
assert(swizzle < NUM_CHANNELS);
@@ -488,7 +475,6 @@ emit_instruction(
#endif
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
@@ -1386,15 +1372,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_NOISE1:
- case TGSI_OPCODE_NOISE2:
- case TGSI_OPCODE_NOISE3:
- case TGSI_OPCODE_NOISE4:
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.zero;
- }
- break;
-
case TGSI_OPCODE_NOP:
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 606243d6c5..1320a26721 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -160,12 +160,31 @@ lp_build_int_vec_type(struct lp_type type)
struct lp_type
lp_int_type(struct lp_type type)
{
- struct lp_type int_type;
+ struct lp_type res_type;
- memset(&int_type, 0, sizeof int_type);
- int_type.width = type.width;
- int_type.length = type.length;
- return int_type;
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = type.width;
+ res_type.length = type.length;
+
+ return res_type;
+}
+
+
+/**
+ * Return the type with twice the bit width (hence half the number of elements).
+ */
+struct lp_type
+lp_wider_type(struct lp_type type)
+{
+ struct lp_type res_type;
+
+ memcpy(&res_type, &type, sizeof res_type);
+ res_type.width *= 2;
+ res_type.length /= 2;
+
+ assert(res_type.length);
+
+ return res_type;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index ee5ca3483c..2fb233d335 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -43,13 +43,18 @@
/**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
+/**
* Several functions can only cope with vectors of length up to this value.
* You may need to increase that value if you want to represent bigger vectors.
*/
#define LP_MAX_VECTOR_LENGTH 16
-#define LP_MAX_TYPE_WIDTH 64
-
/**
* The LLVM type system can't conveniently express all the things we care about
@@ -134,6 +139,91 @@ struct lp_build_context
};
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.norm = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
LLVMTypeRef
lp_build_elem_type(struct lp_type type);
@@ -166,6 +256,10 @@ struct lp_type
lp_int_type(struct lp_type type);
+struct lp_type
+lp_wider_type(struct lp_type type);
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index a4b2bd8c2a..202cb8ef43 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -107,11 +107,16 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
if (llvmpipe->draw)
draw_destroy( llvmpipe->draw );
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
+ pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL);
+ }
+ pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
+ pipe_texture_reference(&llvmpipe->texture[i], NULL);
+ }
for (i = 0; i < Elements(llvmpipe->constants); i++) {
if (llvmpipe->constants[i].buffer) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 8d5a0d4f1f..7df340554e 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -88,9 +88,6 @@ struct llvmpipe_context {
/** Mapped vertex buffers */
ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
- /** Mapped constant buffers */
- void *mapped_constants[PIPE_SHADER_TYPES];
-
/** Vertex format */
struct vertex_info vertex_info;
struct vertex_info vertex_info_vbuf;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 89772e62d3..0aa13a1fc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,54 +45,6 @@
-static void
-llvmpipe_map_constant_buffers(struct llvmpipe_context *lp)
-{
- struct pipe_screen *screen = lp->pipe.screen;
- uint i, size;
-
- for (i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (lp->constants[i].buffer && lp->constants[i].buffer->size)
- lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- }
-
- if (lp->constants[PIPE_SHADER_VERTEX].buffer)
- size = lp->constants[PIPE_SHADER_VERTEX].buffer->size;
- else
- size = 0;
-
- lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT];
-
- draw_set_mapped_constant_buffer(lp->draw,
- lp->mapped_constants[PIPE_SHADER_VERTEX],
- size);
-}
-
-
-static void
-llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp)
-{
- struct pipe_screen *screen = lp->pipe.screen;
- uint i;
-
- /* really need to flush all prims since the vert/frag shaders const buffers
- * are going away now.
- */
- draw_flush(lp->draw);
-
- draw_set_mapped_constant_buffer(lp->draw, NULL, 0);
-
- lp->jit_context.constants = NULL;
-
- for (i = 0; i < 2; i++) {
- if (lp->constants[i].buffer && lp->constants[i].buffer->size)
- screen->buffer_unmap(screen, lp->constants[i].buffer);
- lp->mapped_constants[i] = NULL;
- }
-}
-
-
boolean
llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
@@ -124,7 +76,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
llvmpipe_update_derived( lp );
llvmpipe_map_transfers(lp);
- llvmpipe_map_constant_buffers(lp);
/*
* Map vertex buffers
@@ -163,7 +114,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
/* Note: leave drawing surfaces mapped */
- llvmpipe_unmap_constant_buffers(lp);
lp->dirty_render_cache = TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index b5c1c95bb7..cd8381fe30 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -58,8 +58,10 @@ llvmpipe_flush( struct pipe_context *pipe,
* in the hope that a later clear will wipe them out.
*/
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++)
- if (llvmpipe->cbuf_cache[i])
+ if (llvmpipe->cbuf_cache[i]) {
+ lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]);
lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
+ }
/* Need this call for hardware buffers before swapbuffers.
*
@@ -71,8 +73,10 @@ llvmpipe_flush( struct pipe_context *pipe,
}
else if (flags & PIPE_FLUSH_RENDER_CACHE) {
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++)
- if (llvmpipe->cbuf_cache[i])
+ if (llvmpipe->cbuf_cache[i]) {
+ lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]);
lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
+ }
/* FIXME: untile zsbuf! */
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index f7111c1e5c..13535dd638 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,8 +36,10 @@
#include <llvm-c/Transforms/Scalar.h>
#include "util/u_memory.h"
+#include "util/u_cpu_detect.h"
#include "lp_screen.h"
#include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
#include "lp_jit.h"
@@ -147,10 +149,16 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
{
char *error = NULL;
-#ifdef LLVM_NATIVE_ARCH
+ util_cpu_detect();
+
+#if 0
+ /* For simulating less capable machines */
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+#endif
+
LLVMLinkInJIT();
LLVMInitializeNativeTarget();
-#endif
screen->module = LLVMModuleCreateWithName("llvmpipe");
@@ -168,8 +176,15 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
LLVMAddTargetData(screen->target, screen->pass);
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
+ /* TODO: Add more passes */
LLVMAddConstantPropagationPass(screen->pass);
- LLVMAddInstructionCombiningPass(screen->pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(screen->pass);
+ }
LLVMAddPromoteMemoryToRegisterPass(screen->pass);
LLVMAddGVNPass(screen->pass);
LLVMAddCFGSimplificationPass(screen->pass);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 2d2fc19a65..c43b3da450 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -115,6 +115,7 @@ struct setup_context {
/**
* Execute fragment shader for the four fragments in the quad.
*/
+ALIGN_STACK
static void
shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quads[],
@@ -124,7 +125,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quad = quads[0];
const unsigned x = quad->input.x0;
const unsigned y = quad->input.y0;
- uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+ uint8_t *tile;
uint8_t *color;
void *depth;
uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
@@ -150,7 +151,13 @@ shade_quads(struct llvmpipe_context *llvmpipe,
mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
/* color buffer */
- color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
+ if(llvmpipe->framebuffer.nr_cbufs >= 1 &&
+ llvmpipe->framebuffer.cbufs[0]) {
+ tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+ color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
+ }
+ else
+ color = NULL;
/* depth buffer */
if(llvmpipe->zsbuf_map) {
@@ -271,11 +278,13 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
* until we codegenerate single-quad variants of the fragment pipeline
* we need this hack. */
const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
- struct quad_header quads[nr_quads];
- struct quad_header *quad_ptrs[nr_quads];
+ struct quad_header quads[4];
+ struct quad_header *quad_ptrs[4];
int x0 = block_x(quad->input.x0);
unsigned i;
+ assert(nr_quads == 4);
+
for(i = 0; i < nr_quads; ++i) {
int x = x0 + 2*i;
if(x == quad->input.x0)
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 9faed5a0b1..2e9aa9fffe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -83,6 +83,7 @@
#include "lp_bld_debug.h"
#include "lp_screen.h"
#include "lp_context.h"
+#include "lp_buffer.h"
#include "lp_state.h"
#include "lp_quad.h"
#include "lp_tex_sample.h"
@@ -399,9 +400,9 @@ generate_fragment(struct llvmpipe_context *lp,
#ifdef DEBUG
tgsi_dump(shader->base.tokens, 0);
if(key->depth.enabled) {
+ debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
- debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
}
if(key->alpha.enabled) {
debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
@@ -419,6 +420,34 @@ generate_fragment(struct llvmpipe_context *lp,
debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if(key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ pf_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ debug_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if(key->sampler[i].compare_mode)
+ debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ }
+ }
+
#endif
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -582,6 +611,11 @@ generate_fragment(struct llvmpipe_context *lp,
* Translate the LLVM IR into machine code.
*/
+ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function);
+ abort();
+ }
+
LLVMRunFunctionPassManager(screen->pass, variant->function);
#ifdef DEBUG
@@ -589,11 +623,6 @@ generate_fragment(struct llvmpipe_context *lp,
debug_printf("\n");
#endif
- if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
- abort();
- }
-
variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
#ifdef DEBUG
@@ -672,16 +701,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ const struct pipe_constant_buffer *constants)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct pipe_buffer *buffer = constants ? constants->buffer : NULL;
+ unsigned size = buffer ? buffer->size : 0;
+ const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL;
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ if(shader == PIPE_SHADER_VERTEX)
+ draw_flush(llvmpipe->draw);
+
/* note: reference counting */
- pipe_buffer_reference(&llvmpipe->constants[shader].buffer,
- buf ? buf->buffer : NULL);
+ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
+
+ if(shader == PIPE_SHADER_FRAGMENT) {
+ llvmpipe->jit_context.constants = data;
+ }
+
+ if(shader == PIPE_SHADER_VERTEX) {
+ draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+ }
llvmpipe->dirty |= LP_NEW_CONSTANTS;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 177a26b7b1..c06ce8b75c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -53,10 +53,11 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
/* check if changing cbuf */
if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) {
/* flush old */
+ lp_tile_cache_map_transfers(lp->cbuf_cache[i]);
lp_flush_tile_cache(lp->cbuf_cache[i]);
/* assign new */
- lp->framebuffer.cbufs[i] = fb->cbufs[i];
+ pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]);
/* update cache */
lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]);
@@ -81,7 +82,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
}
/* assign new */
- lp->framebuffer.zsbuf = fb->zsbuf;
+ pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf);
/* Tell draw module how deep the Z/depth buffer is */
if (lp->framebuffer.zsbuf) {
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index a88e110c66..39d80726e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -56,6 +56,9 @@
#include "lp_bld_type.h"
+#define LP_TEST_NUM_SAMPLES 32
+
+
void
write_tsv_header(FILE *fp);
@@ -68,17 +71,28 @@ boolean
test_all(unsigned verbose, FILE *fp);
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
static INLINE uint64_t
rdtsc(void)
{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
#else
- return 0;
+
+#define rdtsc() 0
+
#endif
-}
+
float
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 94b661dcba..149fec1d54 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -477,8 +477,8 @@ test_one(unsigned verbose,
char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned i, j;
@@ -530,11 +530,11 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -595,11 +595,11 @@ test_one(unsigned verbose,
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9dcf58e5dc..ac2a6d05e3 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -156,8 +156,8 @@ test_one(unsigned verbose,
char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned num_srcs;
unsigned num_dsts;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 7d83f899e6..b2403ad521 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -35,10 +35,11 @@
#include <llvm-c/Target.h>
#include <llvm-c/Transforms/Scalar.h>
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
-#include "lp_bld_flow.h"
#include "lp_bld_format.h"
+#include "lp_test.h"
struct pixel_test_case
@@ -89,40 +90,63 @@ struct pixel_test_case test_cases[] =
};
-typedef void (*load_ptr_t)(const void *, float *);
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct util_format_description *desc,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%s\n", desc->name);
+
+ fflush(fp);
+}
+
+
+typedef void (*load_ptr_t)(const uint32_t packed, float *);
static LLVMValueRef
add_load_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
- struct lp_build_loop_state loop;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMInt32Type();
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
+ if(desc->block.bits < 32)
+ packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), "");
- rgba = lp_build_load_rgba_aos(builder, format, ptr);
- LLVMBuildStore(builder, rgba, rgba_ptr);
+ rgba = lp_build_unpack_rgba_aos(builder, desc, packed);
- lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
+ LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
@@ -131,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module,
}
-typedef void (*store_ptr_t)(void *, const float *);
+typedef void (*store_ptr_t)(uint32_t *, const float *);
static LLVMValueRef
add_store_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed_ptr;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
+ LLVMValueRef packed;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMPointerType(LLVMInt32Type(), 0);
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
@@ -160,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba_aos(builder, format, ptr, rgba);
+ packed = lp_build_pack_rgba_aos(builder, desc, rgba);
+
+ if(desc->block.bits < 32)
+ packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+ LLVMBuildStore(builder, packed, packed_ptr);
LLVMBuildRetVoid(builder);
@@ -170,7 +200,7 @@ add_store_rgba_test(LLVMModuleRef module,
static boolean
-test_format(const struct pixel_test_case *test)
+test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
{
LLVMModuleRef module = NULL;
LLVMValueRef load = NULL;
@@ -192,8 +222,8 @@ test_format(const struct pixel_test_case *test)
module = LLVMModuleCreateWithName("test");
- load = add_load_rgba_test(module, test->format);
- store = add_store_rgba_test(module, test->format);
+ load = add_load_rgba_test(module, desc);
+ store = add_store_rgba_test(module, desc);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -229,7 +259,7 @@ test_format(const struct pixel_test_case *test)
memset(unpacked, 0, sizeof unpacked);
packed = 0;
- load_ptr(&test->packed, unpacked);
+ load_ptr(test->packed, unpacked);
store_ptr(&packed, unpacked);
success = TRUE;
@@ -255,23 +285,29 @@ test_format(const struct pixel_test_case *test)
if(pass)
LLVMDisposePassManager(pass);
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
return success;
}
-int main(int argc, char **argv)
+boolean
+test_all(unsigned verbose, FILE *fp)
{
unsigned i;
- int ret;
-
-#ifdef LLVM_NATIVE_ARCH
- LLVMLinkInJIT();
- LLVMInitializeNativeTarget();
-#endif
+ bool success = TRUE;
for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
- if(!test_format(&test_cases[i]))
- ret = 1;
+ if(!test_format(verbose, fp, &test_cases[i]))
+ success = FALSE;
- return ret;
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index f07fa256f1..314544aa9a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -34,10 +34,25 @@
*/
+#include "util/u_cpu_detect.h"
+
#include "lp_bld_const.h"
+#include "lp_bld_misc.h"
#include "lp_test.h"
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+ if (x >= 0.0)
+ return floor(x + 0.5);
+ else
+ return ceil(x - 0.5);
+}
+#endif
+
+
void
dump_type(FILE *fp,
struct lp_type type)
@@ -365,10 +380,10 @@ int main(int argc, char **argv)
n = atoi(argv[i]);
}
-#ifdef LLVM_NATIVE_ARCH
LLVMLinkInJIT();
LLVMInitializeNativeTarget();
-#endif
+
+ util_cpu_detect();
if(fp) {
/* Warm up the caches */
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 724d437833..a00f2495df 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -66,16 +66,24 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
pf_get_block(lpt->base.format, &lpt->base.block);
for (level = 0; level <= pt->last_level; level++) {
+ unsigned nblocksx, nblocksy;
+
pt->width[level] = width;
pt->height[level] = height;
pt->depth[level] = depth;
pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
- lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16);
+ pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
+
+ /* Allocate storage for whole quads. This is particularly important
+ * for depth surfaces, which are currently stored in a swizzled format. */
+ nblocksx = pf_get_nblocksx(&pt->block, align(width, 2));
+ nblocksy = pf_get_nblocksy(&pt->block, align(height, 2));
+
+ lpt->stride[level] = align(nblocksx*pt->block.size, 16);
lpt->level_offset[level] = buffer_size;
- buffer_size += (pt->nblocksy[level] *
+ buffer_size += (nblocksy *
((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
lpt->stride[level]);
@@ -353,17 +361,9 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
if(lpt->dt) {
struct llvmpipe_winsys *winsys = screen->winsys;
- unsigned flags = 0;
-
- if (transfer->usage != PIPE_TRANSFER_READ) {
- flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
- }
-
- if (transfer->usage != PIPE_TRANSFER_WRITE) {
- flags |= PIPE_BUFFER_USAGE_CPU_READ;
- }
- map = winsys->displaytarget_map(winsys, lpt->dt, flags);
+ map = winsys->displaytarget_map(winsys, lpt->dt,
+ pipe_transfer_buffer_flags(transfer));
if (map == NULL)
return NULL;
}
@@ -373,7 +373,7 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
+ if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE))
{
/* Do something to notify sharing contexts of a texture change.
* In llvmpipe, that would mean flushing the texture cache.
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 0c06b659a1..ec3e002d62 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -236,47 +236,41 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
if(!pt)
return;
+ assert(tc->transfer_map);
+
/* push the tile to all positions marked as clear */
for (y = 0; y < pt->height; y += TILE_SIZE) {
for (x = 0; x < pt->width; x += TILE_SIZE) {
struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE];
- switch(tile->status) {
- case LP_TILE_STATUS_UNDEFINED:
- break;
-
- case LP_TILE_STATUS_CLEAR: {
- /**
- * Actually clear the tiles which were flagged as being in a clear state.
- */
-
- struct pipe_screen *screen = pt->texture->screen;
- unsigned tw = TILE_SIZE;
- unsigned th = TILE_SIZE;
- void *dst;
-
- if (pipe_clip_tile(x, y, &tw, &th, pt))
- continue;
-
- dst = screen->transfer_map(screen, pt);
- assert(dst);
- if(!dst)
- continue;
-
- util_fill_rect(dst, &pt->block, pt->stride,
- x, y, tw, th,
- tc->clear_val);
-
- screen->transfer_unmap(screen, pt);
+ if(tile->status != LP_TILE_STATUS_UNDEFINED) {
+ unsigned w = TILE_SIZE;
+ unsigned h = TILE_SIZE;
+
+ if (!pipe_clip_tile(x, y, &w, &h, pt)) {
+ switch(tile->status) {
+ case LP_TILE_STATUS_CLEAR:
+ /* Actually clear the tiles which were flagged as being in a
+ * clear state. */
+ util_fill_rect(tc->transfer_map, &pt->block, pt->stride,
+ x, y, w, h,
+ tc->clear_val);
+ break;
+
+ case LP_TILE_STATUS_DEFINED:
+ lp_tile_write_4ub(pt->format,
+ tile->color,
+ tc->transfer_map, pt->stride,
+ x, y, w, h);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
tile->status = LP_TILE_STATUS_UNDEFINED;
- break;
- }
-
- case LP_TILE_STATUS_DEFINED:
- lp_put_tile_rgba_soa(pt, x, y, tile->color);
- tile->status = LP_TILE_STATUS_UNDEFINED;
- break;
}
}
}
@@ -294,6 +288,9 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE];
struct pipe_transfer *pt = tc->transfer;
+ assert(tc->surface);
+ assert(tc->transfer);
+
switch(tile->status) {
case LP_TILE_STATUS_CLEAR:
/* don't get tile from framebuffer, just clear it */
@@ -301,11 +298,22 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
tile->status = LP_TILE_STATUS_DEFINED;
break;
- case LP_TILE_STATUS_UNDEFINED:
- /* get new tile data from transfer */
- lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color);
+ case LP_TILE_STATUS_UNDEFINED: {
+ unsigned w = TILE_SIZE;
+ unsigned h = TILE_SIZE;
+
+ x &= ~(TILE_SIZE - 1);
+ y &= ~(TILE_SIZE - 1);
+
+ if (!pipe_clip_tile(x, y, &w, &h, tc->transfer))
+ lp_tile_read_4ub(pt->format,
+ tile->color,
+ tc->transfer_map, tc->transfer->stride,
+ x, y, w, h);
+
tile->status = LP_TILE_STATUS_DEFINED;
break;
+ }
case LP_TILE_STATUS_DEFINED:
/* nothing to do */
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.c b/src/gallium/drivers/llvmpipe/lp_tile_soa.c
deleted file mode 100644
index 4e4ccb31cc..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.c
+++ /dev/null
@@ -1,931 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * RGBA/float tile get/put functions.
- * Usable both by drivers and state trackers.
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "util/u_rect.h"
-#include "util/u_tile.h"
-#include "lp_tile_cache.h"
-#include "lp_tile_soa.h"
-
-
-const unsigned char
-tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {
- { 0, 1, 4, 5, 8, 9, 12, 13},
- { 2, 3, 6, 7, 10, 11, 14, 15}
-};
-
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-a8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
- TILE_PIXEL(p, j, i, 3) = (pixel >> 24) & 0xff;
- }
- }
-}
-
-
-static void
-a8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-x8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
- TILE_PIXEL(p, j, i, 3) = 0xff;
- }
- }
-}
-
-
-static void
-x8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
-
-static void
-b8g8r8a8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 24) & 0xff;
- TILE_PIXEL(p, j, i, 3) = (pixel >> 0) & 0xff;
- }
- }
-}
-
-
-static void
-b8g8r8a8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
-
-static void
-a1r5g5b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 3) = ((pixel >> 15) ) * 255;
- }
- }
-}
-
-
-static void
-a1r5g5b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- r = r >> 3; /* 5 bits */
- g = g >> 3; /* 5 bits */
- b = b >> 3; /* 5 bits */
- a = a >> 7; /* 1 bit */
- *dst++ = (a << 15) | (r << 10) | (g << 5) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
-
-static void
-a4r4g4b4_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 8) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 4) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 3) = ((pixel >> 12) ) * 255 / 15;
- }
- }
-}
-
-
-static void
-a4r4g4b4_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- r >>= 4;
- g >>= 4;
- b >>= 4;
- a >>= 4;
- *dst++ = (a << 12) | (r << 16) | (g << 4) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_R5G6B5_UNORM ***/
-
-static void
-r5g6b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x3f) * 255 / 63;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-r5g6b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- uint r = (uint) TILE_PIXEL(p, j, i, 0) * 31 / 255;
- uint g = (uint) TILE_PIXEL(p, j, i, 1) * 63 / 255;
- uint b = (uint) TILE_PIXEL(p, j, i, 2) * 31 / 255;
- *dst++ = (r << 11) | (g << 5) | (b);
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_Z16_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z16_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const float scale = 1.0f / 65535.0f;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src++ * scale;
- }
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_L8_UNORM ***/
-
-static void
-l8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = *src;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-l8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r;
- r = TILE_PIXEL(p, j, i, 0);
- *dst++ = (ubyte) r;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_A8_UNORM ***/
-
-static void
-a8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = 0;
- TILE_PIXEL(p, j, i, 3) = *src;
- }
- }
-}
-
-
-static void
-a8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned a;
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (ubyte) a;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_R16_SNORM ***/
-
-static void
-r16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) = MAX2(src[0] >> 7, 0);
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = 0;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-r16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, dst++) {
- dst[0] = TILE_PIXEL(p, j, i, 0) << 7;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
-
-static void
-r16g16b16a16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src += 4) {
- TILE_PIXEL(p, j, i, 0) = src[0] >> 8;
- TILE_PIXEL(p, j, i, 1) = src[1] >> 8;
- TILE_PIXEL(p, j, i, 2) = src[2] >> 8;
- TILE_PIXEL(p, j, i, 3) = src[3] >> 8;
- }
- }
-}
-
-
-static void
-r16g16b16a16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, dst += 4) {
- dst[0] = TILE_PIXEL(p, j, i, 0) << 8;
- dst[1] = TILE_PIXEL(p, j, i, 1) << 8;
- dst[2] = TILE_PIXEL(p, j, i, 2) << 8;
- dst[3] = TILE_PIXEL(p, j, i, 3) << 8;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_I8_UNORM ***/
-
-static void
-i8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src;
- }
- }
-}
-
-
-static void
-i8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r;
- r = TILE_PIXEL(p, j, i, 0);
- *dst++ = (ubyte) r;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A8L8_UNORM ***/
-
-static void
-a8l8_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- ushort ra = *src++;
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = ra & 0xff;
- TILE_PIXEL(p, j, i, 3) = ra >> 8;
- }
- }
-}
-
-
-static void
-a8l8_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, a;
- r = TILE_PIXEL(p, j, i, 0);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (a << 8) | r;
- }
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_Z32_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z32_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / (double) 0xffffffff;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (*src++ * scale);
- }
- }
-}
-
-
-/*** PIPE_FORMAT_S8Z24_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-s8z24_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff));
- }
- }
-}
-
-
-/*** PIPE_FORMAT_Z24S8_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-z24s8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8));
- }
- }
-}
-
-
-/*** PIPE_FORMAT_Z32_FLOAT ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z32f_get_tile_rgba(const float *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src++;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
-
-/**
- * Convert YCbCr (or YCrCb) to RGBA.
- */
-static void
-ycbcr_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p,
- boolean rev)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- /* do two texels at a time */
- for (j = 0; j < (w & ~1); j += 2, src += 2) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j, i, 0) = r;
- TILE_PIXEL(p, j, i, 1) = g;
- TILE_PIXEL(p, j, i, 2) = b;
- TILE_PIXEL(p, j, i, 3) = 255;
-
- /* odd pixel: use y1,cr,cb */
- r = 1.164f * (y1-16) + 1.596f * (cr-128);
- g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y1-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j + 1, i, 0) = r;
- TILE_PIXEL(p, j + 1, i, 1) = g;
- TILE_PIXEL(p, j + 1, i, 2) = b;
- TILE_PIXEL(p, j + 1, i, 3) = 255;
- }
- /* do the last texel */
- if (w & 1) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j, i, 0) = r;
- TILE_PIXEL(p, j, i, 1) = g;
- TILE_PIXEL(p, j, i, 2) = b;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-fake_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 255 : 0;
- }
- }
-}
-
-
-static void
-lp_tile_raw_to_rgba_soa(enum pipe_format format,
- void *src,
- uint w, uint h,
- uint8_t *p)
-{
- switch (format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g6b5_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_A8L8_UNORM:
- a8l8_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_get_tile_rgba((short *) src, w, h, p);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_get_tile_rgba((short *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- z16_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z32_UNORM:
- z32_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- s8z24_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- z24s8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- z32f_get_tile_rgba((float *) src, w, h, p);
- break;
- case PIPE_FORMAT_YCBCR:
- ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE);
- break;
- case PIPE_FORMAT_YCBCR_REV:
- ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE);
- break;
- default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
- fake_get_tile_rgba(src, w, h, p);
- }
-}
-
-
-void
-lp_get_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- uint8_t *p)
-{
- uint w = TILE_SIZE, h = TILE_SIZE;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, pt))
- return;
-
- packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
-
- if (!packed)
- return;
-
- if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
- assert((x & 1) == 0);
-
- pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
-
- lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p);
-
- FREE(packed);
-}
-
-
-void
-lp_put_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- const uint8_t *p)
-{
- uint w = TILE_SIZE, h = TILE_SIZE;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, pt))
- return;
-
- packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
-
- if (!packed)
- return;
-
- switch (pt->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g6b5_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- assert(0);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A8L8_UNORM:
- a8l8_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_put_tile_rgba((short *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_put_tile_rgba((short *) packed, w, h, p);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- /*z16_put_tile_rgba((ushort *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_Z32_UNORM:
- /*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
- }
-
- pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
-
- FREE(packed);
-}
-
-
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 3d8c703b73..040b01865d 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -64,14 +64,18 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH];
void
-lp_get_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- uint8_t *p);
+lp_tile_read_4ub(enum pipe_format format,
+ uint8_t *dst,
+ const void *src, unsigned src_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
void
-lp_put_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- const uint8_t *p);
+lp_tile_write_4ub(enum pipe_format format,
+ const uint8_t *src,
+ void *dst, unsigned dst_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
#ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
new file mode 100644
index 0000000000..004c5c979e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+
+'''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format accessor functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+'''
+
+
+import sys
+import os.path
+
+sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util'))
+
+from u_format_access import *
+
+
+def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
+ '''Generate the function to read pixels from a particular format'''
+
+ name = short_name(format)
+
+ src_native_type = native_type(format)
+
+ print 'static void'
+ print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print '{'
+ print ' unsigned x, y;'
+ print ' const uint8_t *src_row = src + y0*src_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ names = ['']*4
+ if format.colorspace == 'rgb':
+ for i in range(4):
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ names[swizzle] += 'rgba'[i]
+ elif format.colorspace == 'zs':
+ swizzle = format.out_swizzle[0]
+ if swizzle < 4:
+ names[swizzle] = 'z'
+ else:
+ assert False
+ else:
+ assert False
+
+ if format.layout == ARITH:
+ print ' %s pixel = *src_pixel++;' % src_native_type
+ shift = 0;
+ for i in range(4):
+ src_type = format.in_types[i]
+ width = src_type.size
+ if names[i]:
+ value = 'pixel'
+ mask = (1 << width) - 1
+ if shift:
+ value = '(%s >> %u)' % (value, shift)
+ if shift + width < format.block_size():
+ value = '(%s & 0x%x)' % (value, mask)
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' %s %s = %s;' % (dst_native_type, names[i], value)
+ shift += width
+ elif format.layout == ARRAY:
+ for i in range(4):
+ src_type = format.in_types[i]
+ if names[i]:
+ value = '(*src_pixel++)'
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' %s %s = %s;' % (dst_native_type, names[i], value)
+ else:
+ assert False
+
+ for i in range(4):
+ if format.colorspace == 'rgb':
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ value = names[swizzle]
+ elif swizzle == SWIZZLE_0:
+ value = '0'
+ elif swizzle == SWIZZLE_1:
+ value = '1'
+ else:
+ assert False
+ elif format.colorspace == 'zs':
+ if i < 3:
+ value = 'z'
+ else:
+ value = '1'
+ else:
+ assert False
+ print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i])
+
+ print ' }'
+ print ' src_row += src_stride;'
+ print ' }'
+ print '}'
+ print
+
+
+def generate_format_write(format, src_type, src_native_type, src_suffix):
+ '''Generate the function to write pixels to a particular format'''
+
+ name = short_name(format)
+
+ dst_native_type = native_type(format)
+
+ print 'static void'
+ print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print '{'
+ print ' unsigned x, y;'
+ print ' uint8_t *dst_row = dst + y0*dst_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ inv_swizzle = [None]*4
+ if format.colorspace == 'rgb':
+ for i in range(4):
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ inv_swizzle[swizzle] = i
+ elif format.colorspace == 'zs':
+ swizzle = format.out_swizzle[0]
+ if swizzle < 4:
+ inv_swizzle[swizzle] = 0
+ else:
+ assert False
+
+ if format.layout == ARITH:
+ print ' %s pixel = 0;' % dst_native_type
+ shift = 0;
+ for i in range(4):
+ dst_type = format.in_types[i]
+ width = dst_type.size
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ if shift:
+ value = '(%s << %u)' % (value, shift)
+ print ' pixel |= %s;' % value
+ shift += width
+ print ' *dst_pixel++ = pixel;'
+ elif format.layout == ARRAY:
+ for i in range(4):
+ dst_type = format.in_types[i]
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' *dst_pixel++ = %s;' % value
+ else:
+ assert False
+
+ print ' }'
+ print ' dst_row += dst_stride;'
+ print ' }'
+ print '}'
+ print
+
+
+def generate_read(formats, dst_type, dst_native_type, dst_suffix):
+ '''Generate the dispatch function to read pixels from any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_read(format, dst_type, dst_native_type, dst_suffix)
+
+ print 'void'
+ print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print '{'
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("unsupported format\\n");'
+ print ' return;'
+ print ' }'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def generate_write(formats, src_type, src_native_type, src_suffix):
+ '''Generate the dispatch function to write pixels to any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_write(format, src_type, src_native_type, src_suffix)
+
+ print 'void'
+ print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+
+ print '{'
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("unsupported format\\n");'
+ print ' return;'
+ print ' }'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def main():
+ formats = []
+ for arg in sys.argv[1:]:
+ formats.extend(parse(arg))
+
+ print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */'
+ print
+ # This will print the copyright message on the top of this file
+ print __doc__.strip()
+ print
+ print '#include "pipe/p_compiler.h"'
+ print '#include "util/u_format.h"'
+ print '#include "util/u_math.h"'
+ print '#include "lp_tile_soa.h"'
+ print
+ print 'const unsigned char'
+ print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {'
+ print ' { 0, 1, 4, 5, 8, 9, 12, 13},'
+ print ' { 2, 3, 6, 7, 10, 11, 14, 15}'
+ print '};'
+ print
+
+ generate_clamp()
+
+ type = Type(UNSIGNED, True, 8)
+ native_type = 'uint8_t'
+ suffix = '4ub'
+
+ generate_read(formats, type, native_type, suffix)
+ generate_write(formats, type, native_type, suffix)
+
+
+if __name__ == '__main__':
+ main()