summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
authorNicolai Hähnle <nhaehnle@gmail.com>2009-09-21 13:26:50 +0200
committerNicolai Hähnle <nhaehnle@gmail.com>2009-09-21 13:26:50 +0200
commit81c7561d9d3faf70ac22c6a5f3fbea18f53eed92 (patch)
tree5ec097e0e1fe99804104d3a56923c16edf1533aa /src/gallium/drivers/llvmpipe
parentf02f63997ce65530788a6dfcb28f11790a14d938 (diff)
parent3083ba38f4c884b32cd0460607b5064b6b7008d2 (diff)
Merge branch 'master' into r300-compiler
There were additional non-textual conflicts. Conflicts: src/gallium/drivers/r300/r300_tgsi_to_rc.c src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c src/mesa/drivers/dri/r300/compiler/radeon_program.c src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile7
-rw-r--r--src/gallium/drivers/llvmpipe/README23
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c258
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.h32
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.h14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.c410
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.h42
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c208
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h135
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c416
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.h12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi.h39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c466
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h123
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c39
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c238
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c34
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c76
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c28
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h23
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c (renamed from src/gallium/drivers/llvmpipe/lp_tex_sample.c)133
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c196
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.c5
57 files changed, 2741 insertions, 726 deletions
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6e63a0c2b7..cd7b6356d2 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -3,6 +3,8 @@ include $(TOP)/configs/current
LIBNAME = llvmpipe
+CFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+
C_SOURCES = \
lp_bld_alpha.c \
lp_bld_arit.c \
@@ -15,9 +17,11 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
lp_bld_tgsi_soa.c \
@@ -44,7 +48,8 @@ C_SOURCES = \
lp_state_vs.c \
lp_surface.c \
lp_tex_cache.c \
- lp_tex_sample.c \
+ lp_tex_sample_c.c \
+ lp_tex_sample_llvm.c \
lp_texture.c \
lp_tile_cache.c \
lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 498d21dea6..89d08834a3 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -8,13 +8,16 @@ Done so far is:
- the whole fragment pipeline is code generated in a single function
+ - input interpolation
+
- depth testing
+ - texture sampling (not all state/formats are supported)
+
- fragment shader TGSI translation
- same level of support as the TGSI SSE2 exec machine, with the exception
we don't fallback to TGSI interpretation when an unsupported opcode is
found, but just ignore it
- - texture sampling via an intrinsic call
- done in SoA layout
- input interpolation also code generated
@@ -28,16 +31,17 @@ Done so far is:
any width and length
- not all operations are implemented for these types yet though
-Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
-which includes hand written fast implementations for common cases.
+Most mesa/progs/demos/* work.
To do (probably by this order):
- code generate stipple and stencil testing
- - code generate texture sampling
+ - translate the remaining bits of texture sampling state
- translate TGSI control flow instructions, and all other remaining opcodes
+
+ - integrate with the draw module for VS code generation
- code generate the triangle setup and rasterization
@@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
make linux-llvm
-but the rest of these instructions assume scons is used.
+but the rest of these instructions assume that scons is used.
Using
@@ -108,6 +112,9 @@ or
export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
+For performance evaluation pass debug=no to scons, and use the corresponding
+lib directory without the "-debug" suffix.
+
Unit testing
============
@@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
- lp_test_conv: SIMD vector conversion
- lp_test_format: pixel unpacking/packing
-Some of this tests can output results and benchmarks to a tab-seperated-file
+Some of this tests can output results and benchmarks to a tab-separated-file
for posterior analysis, e.g.:
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -133,10 +140,10 @@ Development Notes
at the top of the lp_bld_*.c functions.
- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be
- put in a standalone Gallium state -> LLVM IR translation module.
+ put in a stand-alone Gallium state -> LLVM IR translation module.
- We use LLVM-C bindings for now. They are not documented, but follow the C++
interfaces very closely, and appear to be complete enough for code
generation. See
http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- for a standalone example.
+ for a stand-alone example.
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5c29bdac56..f4a9a3b22e 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
env = env.Clone()
env.Tool('llvm')
-if 'LLVM_VERSION' not in env:
+if not env.has_key('LLVM_VERSION'):
print 'warning: LLVM not found: not building llvmpipe'
Return()
@@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
+ 'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_logic.c',
'lp_bld_swizzle.c',
@@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_state_vs.c',
'lp_surface.c',
'lp_tex_cache.c',
- 'lp_tex_sample.c',
+ 'lp_tex_sample_c.c',
+ 'lp_tex_sample_llvm.c',
'lp_texture.c',
'lp_tile_cache.c',
'lp_tile_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
index 49c2f911af..2b4bc5c819 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -45,7 +45,7 @@
void
lp_build_alpha_test(LLVMBuilderRef builder,
const struct pipe_alpha_state *state,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
LLVMValueRef ref)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
index 9dbcdb4daa..634575670d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -38,14 +38,14 @@
#include <llvm-c/Core.h>
struct pipe_alpha_state;
-union lp_type;
+struct lp_type;
struct lp_build_mask_context;
void
lp_build_alpha_test(LLVMBuilderRef builder,
const struct pipe_alpha_state *state,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef alpha,
LLVMValueRef ref);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 09a57ff33d..0b115fc9b0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -65,7 +65,7 @@ lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const char *intrinsic = NULL;
LLVMValueRef cond;
@@ -113,7 +113,7 @@ lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const char *intrinsic = NULL;
LLVMValueRef cond;
@@ -159,7 +159,7 @@ LLVMValueRef
lp_build_comp(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->one)
return bld->zero;
@@ -188,7 +188,7 @@ lp_build_add(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
if(a == bld->zero)
@@ -241,7 +241,7 @@ lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
if(b == bld->zero)
@@ -405,7 +405,7 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->zero;
@@ -477,7 +477,7 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->zero;
@@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld,
}
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+}
+
+
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11)
+{
+ LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
+ LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
+ return lp_build_lerp(bld, y, v0, v1);
+}
+
+
/**
* Generate min(a, b)
* Do checks for special cases.
@@ -565,21 +590,32 @@ LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
if(!type.sign)
return a;
- /* XXX: is this really necessary? */
+ if(type.floating) {
+ /* Mask out the sign bit */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
+
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(!type.floating && type.width*type.length == 128) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- if(type.width == 8)
+ if(type.width*type.length == 128) {
+ switch(type.width) {
+ case 8:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
- if(type.width == 16)
+ case 16:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
- if(type.width == 32)
+ case 32:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
+ }
}
#endif
@@ -588,10 +624,188 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef cond;
+ LLVMValueRef res;
+
+ /* Handle non-zero case */
+ if(!type.sign) {
+ /* if not zero then sign must be positive */
+ res = bld->one;
+ }
+ else if(type.floating) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef one;
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ one = LLVMConstBitCast(bld->one, int_vec_type);
+ res = LLVMBuildOr(bld->builder, sign, one, "");
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+ else
+ {
+ LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->one, minus_one);
+ }
+
+ /* Handle zero */
+ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->zero, bld->one);
+
+ return res;
+}
+
+
+enum lp_build_round_sse41_mode
+{
+ LP_BUILD_ROUND_SSE41_NEAREST = 0,
+ LP_BUILD_ROUND_SSE41_FLOOR = 1,
+ LP_BUILD_ROUND_SSE41_CEIL = 2,
+ LP_BUILD_ROUND_SSE41_TRUNCATE = 3
+};
+
+
+static INLINE LLVMValueRef
+lp_build_round_sse41(struct lp_build_context *bld,
+ LLVMValueRef a,
+ enum lp_build_round_sse41_mode mode)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ const char *intrinsic;
+
+ assert(type.floating);
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
+ LLVMConstInt(LLVMInt32Type(), mode, 0));
+}
+
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+ assert(type.floating);
+
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+}
+
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_floor(bld, a);
+ a = lp_build_int(bld, a);
+ return a;
+}
+
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -609,7 +823,7 @@ LLVMValueRef
lp_build_rcp(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
if(a == bld->zero)
return bld->undef;
@@ -640,7 +854,7 @@ LLVMValueRef
lp_build_rsqrt(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
assert(type.floating);
@@ -661,7 +875,7 @@ LLVMValueRef
lp_build_cos(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -681,7 +895,7 @@ LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
LLVMValueRef a)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
char intrinsic[32];
@@ -752,7 +966,7 @@ lp_build_polynomial(struct lp_build_context *bld,
const double *coeffs,
unsigned num_coeffs)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res = NULL;
unsigned i;
@@ -800,7 +1014,7 @@ lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef *p_frac_part,
LLVMValueRef *p_exp2)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef ipart = NULL;
@@ -893,7 +1107,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
LLVMValueRef *p_floor_log2,
LLVMValueRef *p_log2)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index fc8cb25966..d68a97c4b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1);
+
+/**
+ * Bilinear interpolation.
+ *
+ * Values indices are in v_{yx}.
+ */
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11);
+
+LLVMValueRef
lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index d19e18846c..da272e549f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -46,7 +46,7 @@
struct pipe_blend_state;
-union lp_type;
+struct lp_type;
struct lp_build_context;
@@ -74,7 +74,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
lp_build_blend_aos(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src,
LLVMValueRef dst,
LLVMValueRef const_,
@@ -84,7 +84,7 @@ lp_build_blend_aos(LLVMBuilderRef builder,
void
lp_build_blend_soa(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src[4],
LLVMValueRef dst[4],
LLVMValueRef const_[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index c11a9398f8..d14f468ba9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -303,7 +303,7 @@ lp_build_blend_func(struct lp_build_context *bld,
LLVMValueRef
lp_build_blend_aos(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src,
LLVMValueRef dst,
LLVMValueRef const_,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index b92254a7d6..9511299d55 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -199,7 +199,7 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
void
lp_build_blend_soa(LLVMBuilderRef builder,
const struct pipe_blend_state *blend,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef src[4],
LLVMValueRef dst[4],
LLVMValueRef con[4],
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.c b/src/gallium/drivers/llvmpipe/lp_bld_const.c
index 21487365ea..c8eaa8c394 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.c
@@ -42,7 +42,7 @@
unsigned
-lp_mantissa(union lp_type type)
+lp_mantissa(struct lp_type type)
{
assert(type.floating);
@@ -72,7 +72,7 @@ lp_mantissa(union lp_type type)
* Same as lp_const_scale(), but in terms of shifts.
*/
unsigned
-lp_const_shift(union lp_type type)
+lp_const_shift(struct lp_type type)
{
if(type.floating)
return 0;
@@ -86,7 +86,7 @@ lp_const_shift(union lp_type type)
unsigned
-lp_const_offset(union lp_type type)
+lp_const_offset(struct lp_type type)
{
if(type.floating || type.fixed)
return 0;
@@ -104,7 +104,7 @@ lp_const_offset(union lp_type type)
* else for the fixed points types and normalized integers.
*/
double
-lp_const_scale(union lp_type type)
+lp_const_scale(struct lp_type type)
{
unsigned long long llscale;
double dscale;
@@ -122,7 +122,7 @@ lp_const_scale(union lp_type type)
* Minimum value representable by the type.
*/
double
-lp_const_min(union lp_type type)
+lp_const_min(struct lp_type type)
{
unsigned bits;
@@ -158,7 +158,7 @@ lp_const_min(union lp_type type)
* Maximum value representable by the type.
*/
double
-lp_const_max(union lp_type type)
+lp_const_max(struct lp_type type)
{
unsigned bits;
@@ -190,7 +190,7 @@ lp_const_max(union lp_type type)
double
-lp_const_eps(union lp_type type)
+lp_const_eps(struct lp_type type)
{
if (type.floating) {
switch(type.width) {
@@ -211,7 +211,7 @@ lp_const_eps(union lp_type type)
LLVMValueRef
-lp_build_undef(union lp_type type)
+lp_build_undef(struct lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMGetUndef(vec_type);
@@ -219,7 +219,7 @@ lp_build_undef(union lp_type type)
LLVMValueRef
-lp_build_zero(union lp_type type)
+lp_build_zero(struct lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMConstNull(vec_type);
@@ -227,7 +227,7 @@ lp_build_zero(union lp_type type)
LLVMValueRef
-lp_build_one(union lp_type type)
+lp_build_one(struct lp_type type)
{
LLVMTypeRef elem_type;
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
@@ -269,7 +269,7 @@ lp_build_one(union lp_type type)
LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
double val)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
@@ -295,7 +295,7 @@ lp_build_const_scalar(union lp_type type,
LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
long long val)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
@@ -312,7 +312,7 @@ lp_build_int_const_scalar(union lp_type type,
LLVMValueRef
-lp_build_const_aos(union lp_type type,
+lp_build_const_aos(struct lp_type type,
double r, double g, double b, double a,
const unsigned char *swizzle)
{
@@ -352,8 +352,8 @@ lp_build_const_aos(union lp_type type,
LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
- boolean cond[4])
+lp_build_const_mask_aos(struct lp_type type,
+ const boolean cond[4])
{
LLVMTypeRef elem_type = LLVMIntType(type.width);
LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index 1934530ea3..ffb302f736 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,67 +42,67 @@
#include <pipe/p_compiler.h>
-union lp_type type;
+struct lp_type type;
unsigned
-lp_mantissa(union lp_type type);
+lp_mantissa(struct lp_type type);
unsigned
-lp_const_shift(union lp_type type);
+lp_const_shift(struct lp_type type);
unsigned
-lp_const_offset(union lp_type type);
+lp_const_offset(struct lp_type type);
double
-lp_const_scale(union lp_type type);
+lp_const_scale(struct lp_type type);
double
-lp_const_min(union lp_type type);
+lp_const_min(struct lp_type type);
double
-lp_const_max(union lp_type type);
+lp_const_max(struct lp_type type);
double
-lp_const_eps(union lp_type type);
+lp_const_eps(struct lp_type type);
LLVMValueRef
-lp_build_undef(union lp_type type);
+lp_build_undef(struct lp_type type);
LLVMValueRef
-lp_build_zero(union lp_type type);
+lp_build_zero(struct lp_type type);
LLVMValueRef
-lp_build_one(union lp_type type);
+lp_build_one(struct lp_type type);
LLVMValueRef
-lp_build_const_scalar(union lp_type type,
+lp_build_const_scalar(struct lp_type type,
double val);
LLVMValueRef
-lp_build_int_const_scalar(union lp_type type,
+lp_build_int_const_scalar(struct lp_type type,
long long val);
LLVMValueRef
-lp_build_const_aos(union lp_type type,
+lp_build_const_aos(struct lp_type type,
double r, double g, double b, double a,
const unsigned char *swizzle);
LLVMValueRef
-lp_build_const_mask_aos(union lp_type type,
- boolean cond[4]);
+lp_build_const_mask_aos(struct lp_type type,
+ const boolean cond[4]);
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index c8954c8a34..186cac70f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -86,7 +86,7 @@
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
- union lp_type src_type,
+ struct lp_type src_type,
unsigned dst_width,
LLVMValueRef src)
{
@@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
int shift = dst_width - n;
res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
- /* Fill in the empty lower bits for added precision? */
+ /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
{
LLVMValueRef msb;
@@ -152,7 +152,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
unsigned src_width,
- union lp_type dst_type,
+ struct lp_type dst_type,
LLVMValueRef src)
{
LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
@@ -244,12 +244,12 @@ lp_build_const_pack_shuffle(unsigned n)
* Expand the bit width.
*
* This will only change the number of bits the values are represented, not the
- * values themselved.
+ * values themselves.
*/
static void
lp_build_expand(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
LLVMValueRef src,
LLVMValueRef *dst, unsigned num_dsts)
{
@@ -266,7 +266,7 @@ lp_build_expand(LLVMBuilderRef builder,
dst[0] = src;
while(src_type.width < dst_type.width) {
- union lp_type new_type = src_type;
+ struct lp_type new_type = src_type;
LLVMTypeRef new_vec_type;
new_type.width *= 2;
@@ -314,8 +314,8 @@ lp_build_expand(LLVMBuilderRef builder,
*/
static LLVMValueRef
lp_build_pack2(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
boolean clamped,
LLVMValueRef lo,
LLVMValueRef hi)
@@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder,
* TODO: Handle saturation consistently.
*/
static LLVMValueRef
-lp_build_trunc(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
{
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned i;
@@ -410,7 +410,7 @@ lp_build_trunc(LLVMBuilderRef builder,
tmp[i] = src[i];
while(src_type.width > dst_type.width) {
- union lp_type new_type = src_type;
+ struct lp_type new_type = src_type;
new_type.width /= 2;
new_type.length *= 2;
@@ -442,12 +442,12 @@ lp_build_trunc(LLVMBuilderRef builder,
*/
void
lp_build_conv(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts)
{
- union lp_type tmp_type;
+ struct lp_type tmp_type;
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned num_tmps;
unsigned i;
@@ -470,7 +470,7 @@ lp_build_conv(LLVMBuilderRef builder,
* Clamp if necessary
*/
- if(src_type.value != dst_type.value) {
+ if(memcmp(&src_type, &dst_type, sizeof src_type) != 0) {
struct lp_build_context bld;
double src_min = lp_const_min(src_type);
double dst_min = lp_const_min(dst_type);
@@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width > dst_type.width) {
assert(num_dsts == 1);
- tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
+ tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = 1;
@@ -656,8 +656,8 @@ lp_build_conv(LLVMBuilderRef builder,
*/
void
lp_build_conv_mask(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts)
{
@@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
if(src_type.width > dst_type.width) {
assert(num_dsts == 1);
- dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs);
+ dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index 05c1ef2a10..ca378804d2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,33 +40,33 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
- union lp_type src_type,
+ struct lp_type src_type,
unsigned dst_width,
LLVMValueRef src);
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
unsigned src_width,
- union lp_type dst_type,
+ struct lp_type dst_type,
LLVMValueRef src);
void
lp_build_conv(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *srcs, unsigned num_srcs,
LLVMValueRef *dsts, unsigned num_dsts);
void
lp_build_conv_mask(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
const LLVMValueRef *src, unsigned num_srcs,
LLVMValueRef *dst, unsigned num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 30925b5f41..59d8f492e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -30,10 +30,27 @@
#include <udis86.h>
#endif
+#include "util/u_math.h"
#include "util/u_debug.h"
#include "lp_bld_debug.h"
+/**
+ * Check alignment.
+ *
+ * It is important that this check is not implemented as a macro or inlined
+ * function, as the compiler assumptions in respect to alignment of global
+ * and stack variables would often make the check a no op, defeating the
+ * whole purpose of the exercise.
+ */
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment)
+{
+ assert(util_is_pot(alignment));
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+
void
lp_disassemble(const void* func)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
index ecdafef76d..583e6132b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
@@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
}
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment);
+
+
void
lp_disassemble(const void* func);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 2cd6e6b921..21c665c4d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -71,11 +71,11 @@
/**
* Return a type appropriate for depth/stencil testing.
*/
-union lp_type
+struct lp_type
lp_depth_type(const struct util_format_description *format_desc,
unsigned length)
{
- union lp_type type;
+ struct lp_type type;
unsigned swizzle;
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
@@ -85,7 +85,7 @@ lp_depth_type(const struct util_format_description *format_desc,
swizzle = format_desc->swizzle[0];
assert(swizzle < 4);
- type.value = 0;
+ memset(&type, 0, sizeof type);
type.width = format_desc->block.bits;
if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
@@ -114,7 +114,7 @@ lp_depth_type(const struct util_format_description *format_desc,
void
lp_build_depth_test(LLVMBuilderRef builder,
const struct pipe_depth_state *state,
- union lp_type type,
+ struct lp_type type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef src,
@@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder,
padding_right = 0;
for(chan = 0; chan < z_swizzle; ++chan)
padding_right += format_desc->channel[chan].size;
- padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
+ padding_left = format_desc->block.bits -
+ (padding_right + format_desc->channel[z_swizzle].size);
if(padding_left || padding_right) {
- const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
- const long long mask_right = ((long long)1 << (padding_right)) - 1;
- z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right);
+ const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
+ const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
+ z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
}
if(padding_left)
@@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder,
LLVMBuildStore(builder, dst, dst_ptr);
}
+ /* FIXME */
assert(!state->occlusion_count);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 5d2e042fcc..79d6981bb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -41,11 +41,11 @@
struct pipe_depth_state;
struct util_format_description;
-union lp_type;
+struct lp_type;
struct lp_build_mask_context;
-union lp_type
+struct lp_type
lp_depth_type(const struct util_format_description *format_desc,
unsigned length);
@@ -53,7 +53,7 @@ lp_depth_type(const struct util_format_description *format_desc,
void
lp_build_depth_test(LLVMBuilderRef builder,
const struct pipe_depth_state *state,
- union lp_type type,
+ struct lp_type type,
const struct util_format_description *format_desc,
struct lp_build_mask_context *mask,
LLVMValueRef src,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 9d99e1a9d9..dcc25fbff8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -32,59 +32,261 @@
*/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#define LP_BUILD_FLOW_MAX_VARIABLES 32
+#define LP_BUILD_FLOW_MAX_DEPTH 32
+
+
+/**
+ * Enumeration of all possible flow constructs.
+ */
+enum lp_build_flow_construct_kind {
+ lP_BUILD_FLOW_SCOPE,
+ LP_BUILD_FLOW_SKIP,
+};
+
+
+/**
+ * Variable declaration scope.
+ */
+struct lp_build_flow_scope
+{
+ /** Number of variables declared in this scope */
+ unsigned num_variables;
+};
+
+
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_flow_skip
+{
+ /** Block to skip to */
+ LLVMBasicBlockRef block;
+
+ /** Number of variables declared at the beginning */
+ unsigned num_variables;
+
+ LLVMValueRef *phi;
+};
+
+
+/**
+ * Union of all possible flow constructs' data
+ */
+union lp_build_flow_construct_data
+{
+ struct lp_build_flow_scope scope;
+ struct lp_build_flow_skip skip;
+};
+
+
+/**
+ * Element of the flow construct stack.
+ */
+struct lp_build_flow_construct
+{
+ enum lp_build_flow_construct_kind kind;
+ union lp_build_flow_construct_data data;
+};
+
+
+/**
+ * All necessary data to generate LLVM control flow constructs.
+ *
+ * Besides keeping track of the control flow construct themselves we also
+ * need to keep track of variables in order to generate SSA Phi values.
+ */
+struct lp_build_flow_context
+{
+ LLVMBuilderRef builder;
+
+ /**
+ * Control flow stack.
+ */
+ struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
+ unsigned num_constructs;
+
+ /**
+ * Variable stack
+ */
+ LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
+ unsigned num_variables;
+};
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder)
+{
+ struct lp_build_flow_context *flow;
+
+ flow = CALLOC_STRUCT(lp_build_flow_context);
+ if(!flow)
+ return NULL;
+
+ flow->builder = builder;
+
+ return flow;
+}
+
+
void
-lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
- union lp_type type,
- LLVMValueRef value)
+lp_build_flow_destroy(struct lp_build_flow_context *flow)
{
- memset(mask, 0, sizeof *mask);
+ assert(flow->num_constructs == 0);
+ assert(flow->num_variables == 0);
+ FREE(flow);
+}
- mask->builder = builder;
- mask->reg_type = LLVMIntType(type.width * type.length);
- mask->value = value;
+
+static union lp_build_flow_construct_data *
+lp_build_flow_push(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
+ if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
+ return NULL;
+
+ flow->constructs[flow->num_constructs].kind = kind;
+ return &flow->constructs[flow->num_constructs++].data;
+}
+
+
+static union lp_build_flow_construct_data *
+lp_build_flow_peek(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[flow->num_constructs - 1].data;
}
+static union lp_build_flow_construct_data *
+lp_build_flow_pop(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[--flow->num_constructs].data;
+}
+
+
+/**
+ * Begin a variable scope.
+ *
+ *
+ */
void
-lp_build_mask_update(struct lp_build_mask_context *mask,
- LLVMValueRef value)
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
{
+ struct lp_build_flow_scope *scope;
- LLVMValueRef cond;
- LLVMBasicBlockRef current_block;
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef new_block;
+ scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(mask->value)
- mask->value = LLVMBuildAnd(mask->builder, mask->value, value, "");
- else
- mask->value = value;
+ scope->num_variables = 0;
+}
- /* FIXME: disabled until we have proper control flow helpers */
-#if 0
- cond = LLVMBuildICmp(mask->builder,
- LLVMIntEQ,
- LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
- LLVMConstNull(mask->reg_type),
- "");
- current_block = LLVMGetInsertBlock(mask->builder);
+/**
+ * Declare a variable.
+ *
+ * A variable is a named entity which can have different LLVMValueRef's at
+ * different points of the program. This is relevant for control flow because
+ * when there are mutiple branches to a same location we need to replace
+ * the variable's value with a Phi function as explained in
+ * http://en.wikipedia.org/wiki/Static_single_assignment_form .
+ *
+ * We keep track of variables by keeping around a pointer to where their
+ * current.
+ *
+ * There are a few cautions to observe:
+ *
+ * - Variable's value must not be NULL. If there is no initial value then
+ * LLVMGetUndef() should be used.
+ *
+ * - Variable's value must be kept up-to-date. If the variable is going to be
+ * modified by a function then a pointer should be passed so that its value
+ * is accurate. Failure to do this will cause some of the variables'
+ * transient values to be lost, leading to wrong results.
+ *
+ * - A program should be written from top to bottom, by always appending
+ * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
+ * modifying existing statements will most likely lead to wrong results.
+ *
+ */
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(!mask->skip_block) {
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- mask->skip_block = LLVMAppendBasicBlock(function, "skip");
+ assert(*variable);
+ if(!*variable)
+ return;
+
+ assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
+ if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
+ return;
+
+ flow->variables[flow->num_variables++] = variable;
+ ++scope->num_variables;
+}
+
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), "");
+ assert(flow->num_variables >= scope->num_variables);
+ if(flow->num_variables < scope->num_variables) {
+ flow->num_variables = 0;
+ return;
}
+ flow->num_variables -= scope->num_variables;
+}
+
+
+static LLVMBasicBlockRef
+lp_build_flow_insert_block(struct lp_build_flow_context *flow)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
next_block = LLVMGetNextBasicBlock(current_block);
- assert(next_block);
if(next_block) {
new_block = LLVMInsertBasicBlock(next_block, "");
}
@@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
new_block = LLVMAppendBasicBlock(function, "");
}
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
+ return new_block;
+}
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBuilderRef builder;
+ unsigned i;
+
+ skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ skip->block = lp_build_flow_insert_block(flow);
+ skip->num_variables = flow->num_variables;
+ if(!skip->num_variables) {
+ skip->phi = NULL;
+ return;
+ }
- LLVMPositionBuilderAtEnd(mask->builder, new_block);
-#endif
+ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
+ if(!skip->phi) {
+ skip->num_variables = 0;
+ return;
+ }
+
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, skip->block);
+
+ for(i = 0; i < skip->num_variables; ++i)
+ skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
+
+ LLVMDisposeBuilder(builder);
}
-LLVMValueRef
-lp_build_mask_end(struct lp_build_mask_context *mask)
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef new_block;
+ unsigned i;
+
+ skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
+ new_block = lp_build_flow_insert_block(flow);
+
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+ }
+
+ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+
+ LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ }
+
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow)
{
- if(mask->skip_block) {
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder);
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ unsigned i;
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildBr(mask->builder, mask->skip_block);
+ skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
- LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block);
+ current_block = LLVMGetInsertBlock(flow->builder);
- mask->value = mask->phi;
- mask->phi = NULL;
- mask->skip_block = NULL;
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+ *flow->variables[i] = skip->phi[i];
}
+ LLVMBuildBr(flow->builder, skip->block);
+ LLVMPositionBuilderAtEnd(flow->builder, skip->block);
+
+ FREE(skip->phi);
+}
+
+
+static void
+lp_build_mask_check(struct lp_build_mask_context *mask)
+{
+ LLVMBuilderRef builder = mask->flow->builder;
+ LLVMValueRef cond;
+
+ cond = LLVMBuildICmp(builder,
+ LLVMIntEQ,
+ LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+ LLVMConstNull(mask->reg_type),
+ "");
+
+ lp_build_flow_skip_cond_break(mask->flow, cond);
+}
+
+
+void
+lp_build_mask_begin(struct lp_build_mask_context *mask,
+ struct lp_build_flow_context *flow,
+ struct lp_type type,
+ LLVMValueRef value)
+{
+ memset(mask, 0, sizeof *mask);
+
+ mask->flow = flow;
+ mask->reg_type = LLVMIntType(type.width * type.length);
+ mask->value = value;
+
+ lp_build_flow_scope_begin(flow);
+ lp_build_flow_scope_declare(flow, &mask->value);
+ lp_build_flow_skip_begin(flow);
+
+ lp_build_mask_check(mask);
+}
+
+
+void
+lp_build_mask_update(struct lp_build_mask_context *mask,
+ LLVMValueRef value)
+{
+ mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
+
+ lp_build_mask_check(mask);
+}
+
+
+LLVMValueRef
+lp_build_mask_end(struct lp_build_mask_context *mask)
+{
+ lp_build_flow_skip_end(mask->flow);
+ lp_build_flow_scope_end(mask->flow);
return mask->value;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
index 1b634ff038..e61999ff06 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -38,27 +38,53 @@
#include <llvm-c/Core.h>
-union lp_type;
+struct lp_type;
+
+
+struct lp_build_flow_context;
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder);
+
+void
+lp_build_flow_destroy(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable);
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond);
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow);
struct lp_build_mask_context
{
- LLVMBuilderRef builder;
+ struct lp_build_flow_context *flow;
LLVMTypeRef reg_type;
LLVMValueRef value;
-
- LLVMValueRef phi;
-
- LLVMBasicBlockRef skip_block;
};
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_build_flow_context *flow,
+ struct lp_type type,
LLVMValueRef value);
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 01c8a752d1..6d3f692619 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -31,21 +31,15 @@
/**
* @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
+ * Pixel format helpers.
*/
#include <llvm-c/Core.h>
-
-#include "pipe/p_format.h"
+#include "pipe/p_format.h"
-union lp_type;
+struct util_format_description;
+struct lp_type;
/**
@@ -56,9 +50,9 @@ union lp_type;
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed);
/**
@@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba);
/**
@@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr);
/**
@@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba);
#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index dcbc0076c7..b9b5d84bed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -32,9 +32,9 @@
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed)
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba)
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
packed = LLVMBuildLoad(builder, ptr, "");
- return lp_build_unpack_rgba(builder, format, packed);
+ return lp_build_unpack_rgba_aos(builder, format, packed);
}
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
type = LLVMIntType(desc->block.bits);
- packed = lp_build_pack_rgba(builder, format, rgba);
+ packed = lp_build_pack_rgba_aos(builder, format, rgba);
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
new file mode 100644
index 0000000000..b5ff434e1a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -0,0 +1,208 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+static LLVMValueRef
+lp_build_format_swizzle(struct lp_type type,
+ const LLVMValueRef *inputs,
+ enum util_format_swizzle swizzle)
+{
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return inputs[swizzle];
+ case UTIL_FORMAT_SWIZZLE_0:
+ return lp_build_zero(type);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return lp_build_one(type);
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ return lp_build_undef(type);
+ default:
+ assert(0);
+ return lp_build_undef(type);
+ }
+}
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef inputs[4];
+ unsigned start;
+ unsigned chan;
+
+ /* FIXME: Support more formats */
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ /* Decode the input vector components */
+ start = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned width = format_desc->channel[chan].size;
+ unsigned stop = start + width;
+ LLVMValueRef input;
+
+ input = packed;
+
+ switch(format_desc->channel[chan].type) {
+ case UTIL_FORMAT_TYPE_VOID:
+ input = NULL;
+ break;
+
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(type.floating) {
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
+ if(stop < format_desc->block.bits) {
+ unsigned mask = ((unsigned long long)1 << width) - 1;
+ input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
+ }
+
+ if(format_desc->channel[chan].normalized)
+ input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+ else
+ input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+ break;
+
+ default:
+ /* fall through */
+ input = lp_build_undef(type);
+ break;
+ }
+
+ inputs[chan] = input;
+
+ start = stop;
+ }
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
+ rgba[2] = rgba[1] = rgba[0] = depth;
+ rgba[3] = lp_build_one(type);
+ }
+ else {
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
+ }
+ }
+}
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef packed;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ packed = lp_build_gather(builder,
+ type.length, format_desc->block.bits, type.width,
+ base_ptr, offsets);
+
+ lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index cfe20a0d75..338dbca6d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -292,7 +292,7 @@ void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct tgsi_token *tokens,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9194f6233a..9c57a10879 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -83,7 +83,7 @@ void
lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
const struct tgsi_token *tokens,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef a0_ptr,
LLVMValueRef dadx_ptr,
LLVMValueRef dady_ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 8631efd6c3..6b6f820769 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -45,7 +45,7 @@ lp_build_cmp(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
@@ -301,7 +301,7 @@ lp_build_select(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
- union lp_type type = bld->type;
+ struct lp_type type = bld->type;
LLVMValueRef res;
if(a == b)
@@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld,
b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
}
- /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
-
a = LLVMBuildAnd(bld->builder, a, mask, "");
/* This often gets translated to PANDN, but sometimes the NOT is
@@ -339,9 +337,9 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4])
+ const boolean cond[4])
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
@@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld,
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
}
+ else {
#if 0
- else if(0) {
- /* FIXME: Unfortunately select of vectors do not work */
+ /* XXX: Unfortunately select of vectors do not work */
/* Use a select */
LLVMTypeRef elem_type = LLVMInt1Type();
LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
@@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld,
cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
- }
-#endif
- else {
+#else
LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
return lp_build_select(bld, mask, a, b);
+#endif
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index 29b9e1c45b..a4ee7723b5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -66,7 +66,7 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4]);
+ const boolean cond[4]);
#endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
new file mode 100644
index 0000000000..403d0e4836
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_H
+#define LP_BLD_SAMPLE_H
+
+
+#include <llvm-c/Core.h>
+
+struct pipe_texture;
+struct pipe_sampler_state;
+struct lp_type;
+
+
+/**
+ * Sampler static state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are embedded in the generated code.
+ */
+struct lp_sampler_static_state
+{
+ /* pipe_texture's state */
+ enum pipe_format format;
+ unsigned target:2;
+ unsigned pot_width:1;
+ unsigned pot_height:1;
+ unsigned pot_depth:1;
+
+ /* pipe_sampler_state's state */
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ unsigned wrap_r:3;
+ unsigned min_img_filter:2;
+ unsigned min_mip_filter:2;
+ unsigned mag_img_filter:2;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned normalized_coords:1;
+ unsigned prefilter:4;
+};
+
+
+/**
+ * Sampler dynamic state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are computed in runtime.
+ *
+ * There are obtained through callbacks, as we don't want to tie the texture
+ * sampling code generation logic to any particular texture layout or pipe
+ * driver.
+ */
+struct lp_sampler_dynamic_state
+{
+
+ /** Obtain the base texture width. */
+ LLVMValueRef
+ (*width)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ /** Obtain the base texture height. */
+ LLVMValueRef
+ (*height)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*data_ptr)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+};
+
+
+/**
+ * Derive the sampler static state.
+ */
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler);
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct lp_type fp_type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+
+
+
+#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
new file mode 100644
index 0000000000..8ca1be6f1b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -0,0 +1,416 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width[0]);
+ state->pot_height = util_is_pot(texture->height[0]);
+ state->pot_depth = util_is_pot(texture->depth[0]);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ if(sampler->compare_mode) {
+ state->compare_mode = sampler->compare_mode;
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+ LLVMBuilderRef builder;
+
+ const struct lp_sampler_static_state *static_state;
+
+ struct lp_sampler_dynamic_state *dynamic_state;
+
+ const struct util_format_description *format_desc;
+
+ /** Incoming coordinates type and build context */
+ struct lp_type coord_type;
+ struct lp_build_context coord_bld;
+
+ /** Integer coordinates */
+ struct lp_type int_coord_type;
+ struct lp_build_context int_coord_bld;
+
+ /** Output texels type and build context */
+ struct lp_type texel_type;
+ struct lp_build_context texel_bld;
+};
+
+
+static void
+lp_build_sample_texel(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
+
+ if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
+
+ x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(int_coord_bld, x, x_stride);
+ y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+
+ offset = lp_build_add(int_coord_bld, x_offset, y_offset);
+ }
+
+ lp_build_load_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ data_ptr,
+ offset,
+ texel);
+}
+
+
+static LLVMValueRef
+lp_build_sample_wrap(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if(is_pot)
+ coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+ else
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord = LLVMBuildURem(bld->builder, coord, length, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* FIXME */
+ _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ return coord;
+}
+
+
+static void
+lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef x;
+ LLVMValueRef y;
+
+ x = lp_build_ifloor(&bld->coord_bld, s);
+ y = lp_build_ifloor(&bld->coord_bld, t);
+
+ x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+}
+
+
+static void
+lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef half;
+ LLVMValueRef s_ipart;
+ LLVMValueRef t_ipart;
+ LLVMValueRef s_fpart;
+ LLVMValueRef t_fpart;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2][4];
+ unsigned chan;
+
+ half = lp_build_const_scalar(bld->coord_type, 0.5);
+ s = lp_build_sub(&bld->coord_bld, s, half);
+ t = lp_build_sub(&bld->coord_bld, t, half);
+
+ s_ipart = lp_build_floor(&bld->coord_bld, s);
+ t_ipart = lp_build_floor(&bld->coord_bld, t);
+
+ s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
+ t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
+
+ x0 = lp_build_int(&bld->coord_bld, s_ipart);
+ y0 = lp_build_int(&bld->coord_bld, t_ipart);
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+
+ /* TODO: Don't interpolate missing channels */
+ for(chan = 0; chan < 4; ++chan) {
+ texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan]);
+ }
+}
+
+
+static void
+lp_build_sample_compare(struct lp_build_sample_context *bld,
+ LLVMValueRef p,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ LLVMValueRef res;
+ unsigned chan;
+
+ if(!bld->static_state->compare_mode)
+ return;
+
+ /* TODO: Compare before swizzling, to avoid redundant computations */
+ res = NULL;
+ for(chan = 0; chan < 4; ++chan) {
+ LLVMValueRef cmp;
+ cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
+ cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
+
+ if(res)
+ res = lp_build_add(texel_bld, res, cmp);
+ else
+ res = cmp;
+ }
+
+ assert(res);
+ res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+
+ /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+ for(chan = 0; chan < 3; ++chan)
+ texel[chan] = res;
+ texel[3] = texel_bld->one;
+}
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_build_sample_context bld;
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef stride;
+ LLVMValueRef data_ptr;
+ LLVMValueRef s;
+ LLVMValueRef t;
+ LLVMValueRef p;
+
+ /* Setup our build context */
+ memset(&bld, 0, sizeof bld);
+ bld.builder = builder;
+ bld.static_state = static_state;
+ bld.dynamic_state = dynamic_state;
+ bld.format_desc = util_format_description(static_state->format);
+ bld.coord_type = type;
+ bld.int_coord_type = lp_int_type(type);
+ bld.texel_type = type;
+ lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+ lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
+
+ /* Get the dynamic state */
+ width = dynamic_state->width(dynamic_state, builder, unit);
+ height = dynamic_state->height(dynamic_state, builder, unit);
+ stride = dynamic_state->stride(dynamic_state, builder, unit);
+ data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+
+ s = coords[0];
+ t = coords[1];
+ p = coords[2];
+
+ width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
+ height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
+ stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+
+ if(static_state->target == PIPE_TEXTURE_1D)
+ t = bld.coord_bld.zero;
+
+ if(static_state->normalized_coords) {
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
+ LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
+ s = lp_build_mul(&bld.coord_bld, s, fp_width);
+ t = lp_build_mul(&bld.coord_bld, t, fp_height);
+ }
+
+ switch (static_state->min_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* FIXME: respect static_state->min_mip_filter */;
+ /* FIXME: respect static_state->mag_img_filter */;
+ /* FIXME: respect static_state->prefilter */;
+
+ lp_build_sample_compare(&bld, p, texel);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
index 14d2b10df9..3998ac374f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
@@ -42,17 +42,30 @@
LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name)
+{
+ LLVMValueRef indices[2];
+ LLVMValueRef member_ptr;
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
+ member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name);
+ return member_ptr;
+}
+
+
+LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
unsigned member,
const char *name)
{
- LLVMValueRef indices[2];
LLVMValueRef member_ptr;
LLVMValueRef res;
- indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
- member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
res = LLVMBuildLoad(builder, member_ptr, "");
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
return res;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
index cbefdc9f81..740392f561 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
@@ -53,6 +53,18 @@
offsetof(_ctype, _cmember))
+/**
+ * Get value pointer to a structure member.
+ */
+LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name);
+
+/**
+ * Get the value of a structure member.
+ */
LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index ac7eed9379..64e81f7b1f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -64,7 +64,7 @@ LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
LLVMValueRef res;
unsigned i;
@@ -83,7 +83,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
@@ -115,7 +115,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
- union lp_type type4 = type;
+ struct lp_type type4 = type;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
@@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -192,7 +192,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld,
return lp_build_swizzle1_aos(bld, a, swizzle);
if(a == b) {
- swizzle[0] %= 4;
- swizzle[1] %= 4;
- swizzle[2] %= 4;
- swizzle[3] %= 4;
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ unsigned char swizzle1[4];
+ swizzle1[0] = swizzle[0] % 4;
+ swizzle1[1] = swizzle[1] % 4;
+ swizzle1[2] = swizzle[2] % 4;
+ swizzle1[3] = swizzle[3] % 4;
+ return lp_build_swizzle1_aos(bld, a, swizzle1);
}
if(swizzle[0] % 4 == 0 &&
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index d7dd6a8a60..1f6da80448 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-union lp_type type;
+struct lp_type type;
struct lp_build_context;
@@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
/**
@@ -85,7 +85,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
#endif /* !LP_BLD_SWIZZLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 912db24aec..eddb7a83fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -39,31 +39,46 @@
struct tgsi_token;
-union lp_type;
+struct lp_type;
struct lp_build_context;
struct lp_build_mask_context;
-typedef void
-(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel);
+/**
+ * Sampler code generation interface.
+ *
+ * Although texture sampling is a requirement for TGSI translation, it is
+ * a very different problem with several different approaches to it. This
+ * structure establishes an interface for texture sampling code generation, so
+ * that we can easily use different texture sampling strategies.
+ */
+struct lp_build_sampler_soa
+{
+ void
+ (*destroy)( struct lp_build_sampler_soa *sampler );
+
+ void
+ (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+};
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context);
+ struct lp_build_sampler_soa *sampler);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d4d18febec..adc81569ed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -78,6 +78,11 @@
#define CHAN_Z 2
#define CHAN_W 3
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
struct lp_build_tgsi_soa_context
{
@@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- lp_emit_fetch_texel_soa_callback emit_fetch_texel;
- void *emit_fetch_texel_context;
+ struct lp_build_sampler_soa *sampler;
LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
@@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context
};
+static const unsigned char
+swizzle_left[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_LEFT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+ QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
+ QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+ LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+ return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+ LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+ return lp_build_sub(&bld->base, src_top, src_bottom);
+}
+
+
/**
* Register fetch.
*/
@@ -168,6 +217,7 @@ emit_fetch(
break;
case TGSI_UTIL_SIGN_SET:
+ /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
res = LLVMBuildNeg( bld->base.builder, res, "" );
break;
@@ -185,6 +235,36 @@ emit_fetch(
/**
+ * Register fetch with derivatives.
+ */
+static void
+emit_fetch_deriv(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index,
+ LLVMValueRef *res,
+ LLVMValueRef *ddx,
+ LLVMValueRef *ddy)
+{
+ LLVMValueRef src;
+
+ src = emit_fetch(bld, inst, index, chan_index);
+
+ if(res)
+ *res = src;
+
+ /* TODO: use interpolation coeffs for inputs */
+
+ if(ddx)
+ *ddx = emit_ddx(bld, src);
+
+ if(ddy)
+ *ddy = emit_ddy(bld, src);
+}
+
+
+/**
* Register store.
*/
static void
@@ -239,17 +319,18 @@ emit_store(
* High-level instruction translators.
*/
+
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
boolean apply_lodbias,
- boolean projected)
+ boolean projected,
+ LLVMValueRef *texel)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
LLVMValueRef lodbias;
LLVMValueRef oow;
LLVMValueRef coords[3];
- LLVMValueRef texel[4];
unsigned num_coords;
unsigned i;
@@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
- bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
- unit, num_coords, coords, lodbias, texel);
-
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
- emit_store( bld, inst, 0, i, texel[i] );
- }
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ unit, num_coords, coords, lodbias,
+ texel);
}
@@ -347,14 +427,6 @@ emit_kil(
}
-static void
-emit_kilp(
- struct lp_build_tgsi_soa_context *bld )
-{
- /* XXX todo / fix me */
-}
-
-
/**
* Check if inst src/dest regs use indirect addressing into temporary
* register file.
@@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
static int
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
- struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info)
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- LLVMValueRef dst0;
+ LLVMValueRef res;
+ LLVMValueRef dst0[NUM_CHANNELS];
/* we can't handle indirect addressing into temp register file yet */
if (indirect_temp_reference(inst))
return FALSE;
+ assert(info->num_dst <= 1);
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.undef;
+ }
+ }
+
switch (inst->Instruction.Opcode) {
#if 0
case TGSI_OPCODE_ARL:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_flr(bld, 0, 0);
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
@@ -408,19 +490,17 @@ emit_instruction(
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
break;
case TGSI_OPCODE_LIT:
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
- emit_store( bld, inst, 0, CHAN_X, bld->base.one);
+ dst0[CHAN_X] = bld->base.one;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Y, dst0);
+ dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
/* XMM[1] = SrcReg[0].yyyy */
@@ -432,20 +512,19 @@ emit_instruction(
tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
- dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Z, dst0);
+ dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
- emit_store( bld, inst, 0, CHAN_W, bld->base.one);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_RCP:
/* TGSI_OPCODE_RECIP */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_rcp(&bld->base, src0);
+ res = lp_build_rcp(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -453,9 +532,9 @@ emit_instruction(
/* TGSI_OPCODE_RECIPSQRT */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src0 = lp_build_abs(&bld->base, src0);
- dst0 = lp_build_rsqrt(&bld->base, src0);
+ res = lp_build_rsqrt(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -479,16 +558,15 @@ emit_instruction(
lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = tmp1;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -514,20 +592,18 @@ emit_instruction(
/* dst.x = floor(lg2(abs(src.x))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
/* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
- tmp1 = lp_build_div( &bld->base, src0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
}
/* dst.z = lg2(abs(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -535,8 +611,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_mul(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
}
break;
@@ -544,8 +619,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_add(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
}
break;
@@ -563,7 +637,7 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -585,28 +659,24 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DST:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = bld->base.one;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
}
break;
@@ -614,8 +684,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_min( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
}
break;
@@ -623,8 +692,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_max( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
}
break;
@@ -634,8 +702,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -645,8 +712,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -658,7 +724,7 @@ emit_instruction(
tmp2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -666,8 +732,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
tmp1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
}
break;
@@ -678,13 +743,19 @@ emit_instruction(
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_sub( &bld->base, src1, src2 );
tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
- dst0 = lp_build_add( &bld->base, tmp0, src2 );
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
}
break;
case TGSI_OPCODE_CND:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
+ }
break;
case TGSI_OPCODE_DP2A:
@@ -698,45 +769,49 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
-#if 0
case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_frc( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp0 = lp_build_floor(&bld->base, src0);
+ tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_CLAMP:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_max(&bld->base, tmp0, src1);
+ tmp0 = lp_build_min(&bld->base, tmp0, src2);
+ dst0[chan_index] = tmp0;
+ }
break;
case TGSI_OPCODE_FLR:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_flr( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
}
break;
case TGSI_OPCODE_ROUND:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_round(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_EX2: {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_exp2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
}
@@ -745,16 +820,16 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_log2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_POW:
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src1 = emit_fetch( bld, inst, 1, CHAN_X );
- dst0 = lp_build_pow( &bld->base, src0, src1 );
+ res = lp_build_pow( &bld->base, src0, src1 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -775,7 +850,7 @@ emit_instruction(
tmp5 = tmp3;
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
- emit_store( bld, inst, 0, CHAN_X, tmp2);
+ dst0[CHAN_X] = tmp2;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
@@ -786,31 +861,30 @@ emit_instruction(
tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp3);
+ dst0[CHAN_Y] = tmp3;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
- emit_store( bld, inst, 0, CHAN_Z, tmp5);
+ dst0[CHAN_Z] = tmp5;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_ABS:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
}
break;
case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
return 0;
- break;
case TGSI_OPCODE_DPH:
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -827,7 +901,7 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -835,25 +909,27 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_cos( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DDX:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+ }
break;
case TGSI_OPCODE_DDY:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+ }
break;
-#if 0
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld );
- return 0; /* XXX fix me */
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_KIL:
/* conditional kill */
@@ -885,13 +961,14 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_SFL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
break;
case TGSI_OPCODE_SGT:
@@ -899,8 +976,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -908,7 +984,7 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_sin( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -917,8 +993,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -927,85 +1002,99 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_STR:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.one;
+ }
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE );
+ emit_tex( bld, inst, FALSE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXD:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
return 0;
break;
case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
return 0;
break;
#if 0
case TGSI_OPCODE_ARR:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_rnd( bld, 0, 0 );
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CAL:
+ /* FIXME */
return 0;
break;
-#if 0
case TGSI_OPCODE_RET:
- emit_ret( bld );
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_END:
break;
-#if 0
case TGSI_OPCODE_SSG:
/* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_sgn( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
}
break;
-#endif
case TGSI_OPCODE_CMP:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1013,34 +1102,29 @@ emit_instruction(
src1 = emit_fetch( bld, inst, 1, chan_index );
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
- dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
}
break;
case TGSI_OPCODE_SCS:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_cos( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_sin( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = bld->base.zero;
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = bld->base.zero;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1099,38 +1183,35 @@ emit_instruction(
/* dst.x = xmm1 * src.x */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
- tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
- emit_store(bld, inst, 0, CHAN_X, tmp4);
+ dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
}
/* dst.y = xmm1 * src.y */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
- tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
- emit_store(bld, inst, 0, CHAN_Y, tmp5);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
}
/* dst.z = xmm1 * src.z */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
- tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
- emit_store(bld, inst, 0, CHAN_Z, tmp6);
+ dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
}
/* dst.w = xmm1 * src.w */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
- tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
- emit_store(bld, inst, 0, CHAN_W, tmp7);
+ dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
}
}
- /* dst0.w = 1.0 */
+ /* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
- tmp0 = bld->base.one;
- emit_store(bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
}
break;
case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert( 0 );
return 0;
break;
@@ -1143,118 +1224,157 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE );
+ emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
case TGSI_OPCODE_BRK:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_IF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_BGNFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_REP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ELSE:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDIF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ENDREP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CEIL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
+ }
break;
case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
-#if 0
case TGSI_OPCODE_TRUNC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_f2it( bld, 0 );
- emit_i2f( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SHR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CONT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
@@ -1266,10 +1386,28 @@ emit_instruction(
return 0;
break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
default:
return 0;
}
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ }
+ }
+
return 1;
}
@@ -1277,14 +1415,13 @@ emit_instruction(
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
- union lp_type type,
+ struct lp_type type,
struct lp_build_mask_context *mask,
LLVMValueRef consts_ptr,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context)
+ struct lp_build_sampler_soa *sampler)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
@@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
- bld.emit_fetch_texel = emit_fetch_texel;
- bld.emit_fetch_texel_context = emit_fetch_texel_context;
+ bld.sampler = sampler;
tgsi_parse_init( &parse, tokens );
@@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- /* Input already interpolated */
+ /* Inputs already interpolated */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
+ {
unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- info ? info->mnemonic : "<invalid>");
- }
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ info ? info->mnemonic : "<invalid>");
+ }
+
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 8e0026fd97..606243d6c5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -33,7 +33,7 @@
LLVMTypeRef
-lp_build_elem_type(union lp_type type)
+lp_build_elem_type(struct lp_type type)
{
if (type.floating) {
switch(type.width) {
@@ -55,7 +55,7 @@ lp_build_elem_type(union lp_type type)
LLVMTypeRef
-lp_build_vec_type(union lp_type type)
+lp_build_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_elem_type(type);
return LLVMVectorType(elem_type, type.length);
@@ -69,7 +69,7 @@ lp_build_vec_type(union lp_type type)
* type and check for identity.
*/
boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type)
{
LLVMTypeKind elem_kind;
@@ -107,7 +107,7 @@ lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type)
{
LLVMTypeRef elem_type;
@@ -128,7 +128,7 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
boolean
-lp_check_value(union lp_type type, LLVMValueRef val)
+lp_check_value(struct lp_type type, LLVMValueRef val)
{
LLVMTypeRef vec_type;
@@ -143,24 +143,36 @@ lp_check_value(union lp_type type, LLVMValueRef val)
LLVMTypeRef
-lp_build_int_elem_type(union lp_type type)
+lp_build_int_elem_type(struct lp_type type)
{
return LLVMIntType(type.width);
}
LLVMTypeRef
-lp_build_int_vec_type(union lp_type type)
+lp_build_int_vec_type(struct lp_type type)
{
LLVMTypeRef elem_type = lp_build_int_elem_type(type);
return LLVMVectorType(elem_type, type.length);
}
+struct lp_type
+lp_int_type(struct lp_type type)
+{
+ struct lp_type int_type;
+
+ memset(&int_type, 0, sizeof int_type);
+ int_type.width = type.width;
+ int_type.length = type.length;
+ return int_type;
+}
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
- union lp_type type)
+ struct lp_type type)
{
bld->builder = builder;
bld->type = type;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 3ce566be64..ee5ca3483c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -56,58 +56,55 @@
* on the types used for intermediate computations, such as signed vs unsigned,
* normalized values, or fixed point.
*/
-union lp_type {
- struct {
- /**
- * Floating-point. Cannot be used with fixed. Integer numbers are
- * represented by this zero.
- */
- unsigned floating:1;
-
- /**
- * Fixed-point. Cannot be used with floating. Integer numbers are
- * represented by this zero.
- */
- unsigned fixed:1;
-
- /**
- * Whether it can represent negative values or not.
- *
- * If this is not set for floating point, it means that all values are
- * assumed to be positive.
- */
- unsigned sign:1;
-
- /**
- * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
- * interval for signed types.
- *
- * For integer types it means the representable integer range should be
- * interpreted as the interval above.
- *
- * For floating and fixed point formats it means the values should be
- * clamped to the interval above.
- */
- unsigned norm:1;
-
- /**
- * Element width.
- *
- * For fixed point values, the fixed point is assumed to be at half the
- * width.
- */
- unsigned width:14;
-
- /**
- * Vector length.
- *
- * width*length should be a power of two greater or equal to eight.
- *
- * @sa LP_MAX_VECTOR_LENGTH
- */
- unsigned length:14;
- };
- uint32_t value;
+struct lp_type {
+ /**
+ * Floating-point. Cannot be used with fixed. Integer numbers are
+ * represented by this zero.
+ */
+ unsigned floating:1;
+
+ /**
+ * Fixed-point. Cannot be used with floating. Integer numbers are
+ * represented by this zero.
+ */
+ unsigned fixed:1;
+
+ /**
+ * Whether it can represent negative values or not.
+ *
+ * If this is not set for floating point, it means that all values are
+ * assumed to be positive.
+ */
+ unsigned sign:1;
+
+ /**
+ * Whether values are normalized to fit [0, 1] interval, or [-1, 1]
+ * interval for signed types.
+ *
+ * For integer types it means the representable integer range should be
+ * interpreted as the interval above.
+ *
+ * For floating and fixed point formats it means the values should be
+ * clamped to the interval above.
+ */
+ unsigned norm:1;
+
+ /**
+ * Element width.
+ *
+ * For fixed point values, the fixed point is assumed to be at half the
+ * width.
+ */
+ unsigned width:14;
+
+ /**
+ * Vector length.
+ *
+ * width*length should be a power of two greater or equal to eight.
+ *
+ * @sa LP_MAX_VECTOR_LENGTH
+ */
+ unsigned length:14;
};
@@ -124,7 +121,7 @@ struct lp_build_context
* This not only describes the input/output LLVM types, but also whether
* to normalize/clamp the results.
*/
- union lp_type type;
+ struct lp_type type;
/** Same as lp_build_undef(type) */
LLVMValueRef undef;
@@ -138,37 +135,41 @@ struct lp_build_context
LLVMTypeRef
-lp_build_elem_type(union lp_type type);
+lp_build_elem_type(struct lp_type type);
LLVMTypeRef
-lp_build_vec_type(union lp_type type);
+lp_build_vec_type(struct lp_type type);
boolean
-lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type);
+lp_check_elem_type(struct lp_type type, LLVMTypeRef elem_type);
boolean
-lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
+lp_check_vec_type(struct lp_type type, LLVMTypeRef vec_type);
boolean
-lp_check_value(union lp_type type, LLVMValueRef val);
+lp_check_value(struct lp_type type, LLVMValueRef val);
LLVMTypeRef
-lp_build_int_elem_type(union lp_type type);
+lp_build_int_elem_type(struct lp_type type);
LLVMTypeRef
-lp_build_int_vec_type(union lp_type type);
+lp_build_int_vec_type(struct lp_type type);
+
+
+struct lp_type
+lp_int_type(struct lp_type type);
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
- union lp_type type);
+ struct lp_type type);
#endif /* !LP_BLD_TYPE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index 580cca5b46..bdcff94b9b 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
util_pack_color(rgba, ps->format, &cv);
lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
}
+ llvmpipe->dirty_render_cache = TRUE;
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 233d1df0e1..a4b2bd8c2a 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
return PIPE_REFERENCED_FOR_WRITE;
}
- /* FIXME: we also need to do the same for the texture cache */
-
return PIPE_UNREFERENCED;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index d288460a1b..b4a22ff4a9 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -44,15 +44,47 @@
static void
lp_jit_init_globals(struct llvmpipe_screen *screen)
{
- /* struct lp_jit_context */
+ LLVMTypeRef texture_type;
+
+ /* struct lp_jit_texture */
{
LLVMTypeRef elem_types[4];
+
+ elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+ screen->target, texture_type);
+
+ LLVMAddTypeName(screen->module, "texture", texture_type);
+ }
+
+ /* struct lp_jit_context */
+ {
+ LLVMTypeRef elem_types[5];
LLVMTypeRef context_type;
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
+ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
screen->target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type, 3);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+ screen->target, context_type,
+ LP_JIT_CONTEXT_TEXTURES_INDEX);
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
screen->target, context_type);
@@ -117,7 +152,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
- fprintf(stderr, "%s\n", error);
+ _debug_printf("%s\n", error);
LLVMDisposeMessage(error);
abort();
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index a7fb60f9f5..58f716ede2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -38,11 +38,31 @@
#include "lp_bld_struct.h"
+#include "pipe/p_state.h"
+
struct tgsi_sampler;
struct llvmpipe_screen;
+struct lp_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ const void *data;
+};
+
+
+enum {
+ LP_JIT_TEXTURE_WIDTH = 0,
+ LP_JIT_TEXTURE_HEIGHT,
+ LP_JIT_TEXTURE_STRIDE,
+ LP_JIT_TEXTURE_DATA
+};
+
+
+
/**
* This structure is passed directly to the generated fragment shader.
*
@@ -60,11 +80,12 @@ struct lp_jit_context
struct tgsi_sampler **samplers;
- /* TODO: alpha reference value */
float alpha_ref_value;
- /* TODO: blend constant color */
+ /* FIXME: store (also?) in floats */
uint8_t *blend_color;
+
+ struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
};
@@ -80,6 +101,11 @@ struct lp_jit_context
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+
+#define lp_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
typedef void
(*lp_jit_frag_func)(struct lp_jit_context *context,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 125035771e..ff7ef8658a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,8 +27,6 @@
#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -67,8 +65,6 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
return 1;
case PIPE_CAP_GLSL:
return 1;
- case PIPE_CAP_S3TC:
- return 0;
case PIPE_CAP_ANISOTROPIC_FILTER:
return 0;
case PIPE_CAP_POINT_SPRITE:
@@ -196,8 +192,7 @@ static void
llvmpipe_destroy_screen( struct pipe_screen *_screen )
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
-
- struct pipe_winsys *winsys = _screen->winsys;
+ struct llvmpipe_winsys *winsys = screen->winsys;
lp_jit_screen_cleanup(screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index d145f6d6bb..2d2fc19a65 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -44,6 +44,7 @@
#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "lp_bld_debug.h"
#include "lp_tile_cache.h"
#include "lp_tile_soa.h"
@@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe,
else
depth = NULL;
- /* TODO: blend color */
+ /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
+ assert(lp_check_alignment(mask, 16));
- assert((((uintptr_t)mask) & 0xf) == 0);
- assert((((uintptr_t)depth) & 0xf) == 0);
- assert((((uintptr_t)color) & 0xf) == 0);
- assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0);
+ assert(lp_check_alignment(depth, 16));
+ assert(lp_check_alignment(color, 16));
+ assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
/* run shader */
fs->current->jit_function( &llvmpipe->jit_context,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index fb10329887..7b26ce61a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -36,6 +36,7 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "lp_jit.h"
+#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */
#define LP_NEW_VIEWPORT 0x1
@@ -57,16 +58,20 @@
struct tgsi_sampler;
struct vertex_info;
-
+struct pipe_context;
+struct llvmpipe_context;
struct lp_fragment_shader;
struct lp_fragment_shader_variant_key
{
+ enum pipe_format zsbuf_format;
struct pipe_depth_state depth;
struct pipe_alpha_state alpha;
struct pipe_blend_state blend;
+
+ struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6fbb057937..30fb41ea65 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -93,6 +93,23 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
for (i = 0; i < lpfs->info.num_inputs; i++) {
int src;
+ enum interp_mode interp;
+
+ switch (lpfs->info.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ interp = INTERP_LINEAR;
+ }
+
switch (lpfs->info.input_semantic_name[i]) {
case TGSI_SEMANTIC_POSITION:
src = draw_find_vs_output(llvmpipe->draw,
@@ -108,7 +125,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
case TGSI_SEMANTIC_FOG:
src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
case TGSI_SEMANTIC_GENERIC:
@@ -116,7 +133,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
/* this includes texcoords and varying vars */
src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
lpfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
break;
default:
@@ -250,7 +267,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_BLEND |
- LP_NEW_DEPTH_STENCIL_ALPHA))
+ LP_NEW_DEPTH_STENCIL_ALPHA |
+ LP_NEW_SAMPLER |
+ LP_NEW_TEXTURE))
llvmpipe_update_fs( llvmpipe );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 94170bd716..9faed5a0b1 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -85,6 +85,7 @@
#include "lp_context.h"
#include "lp_state.h"
#include "lp_quad.h"
+#include "lp_tex_sample.h"
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -130,21 +131,20 @@ generate_pos0(LLVMBuilderRef builder,
* Generate the depth test.
*/
static void
-generate_depth(struct llvmpipe_context *lp,
- LLVMBuilderRef builder,
- const struct pipe_depth_state *state,
- union lp_type src_type,
+generate_depth(LLVMBuilderRef builder,
+ const struct lp_fragment_shader_variant_key *key,
+ struct lp_type src_type,
struct lp_build_mask_context *mask,
LLVMValueRef src,
LLVMValueRef dst_ptr)
{
const struct util_format_description *format_desc;
- union lp_type dst_type;
+ struct lp_type dst_type;
- if(!lp->framebuffer.zsbuf)
+ if(!key->depth.enabled)
return;
- format_desc = util_format_description(lp->framebuffer.zsbuf->format);
+ format_desc = util_format_description(key->zsbuf_format);
assert(format_desc);
/* Pick the depth type. */
@@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp,
#endif
lp_build_depth_test(builder,
- state,
+ &key->depth,
dst_type,
format_desc,
mask,
@@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp,
}
-struct build_fetch_texel_context
-{
- LLVMValueRef context_ptr;
-
- LLVMValueRef samplers_ptr;
-
- /** Coords/texels store */
- LLVMValueRef store_ptr;
-};
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store )
-{
- struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
- uint j;
-
- debug_printf("%s sampler: %p (%p) store: %p\n",
- __FUNCTION__,
- sampler, *sampler,
- store );
-
- debug_printf("lodbias %f\n", store[12]);
-
- for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
- j,
- store[0+j],
- store[4+j]);
-#endif
-
- {
- float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler,
- &store[0],
- &store[4],
- &store[8],
- 0.0f, /*store[12], lodbias */
- rgba);
- memcpy(store, rgba, sizeof rgba);
- }
-
-#if 0
- for (j = 0; j < 4; j++)
- debug_printf("sample %d result %f %f %f %f\n",
- j,
- store[0+j],
- store[4+j],
- store[8+j],
- store[12+j]);
-#endif
-}
-
-
-static void
-emit_fetch_texel( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
-{
- struct build_fetch_texel_context *bld = context;
- LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
- LLVMValueRef args[3];
- unsigned i;
-
- if(!bld->samplers_ptr)
- bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr);
-
- if(!bld->store_ptr)
- bld->store_ptr = LLVMBuildArrayAlloca(builder,
- vec_type,
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "texel_store");
-
- for (i = 0; i < num_coords; i++) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- LLVMBuildStore(builder, coords[i], coord_ptr);
- }
-
- args[0] = bld->samplers_ptr;
- args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
- args[2] = bld->store_ptr;
-
- lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
- for (i = 0; i < NUM_CHANNELS; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
- }
-}
-
-
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
*/
@@ -282,11 +181,11 @@ generate_fs(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef context_ptr,
unsigned i,
const struct lp_build_interp_soa_context *interp,
- struct build_fetch_texel_context *sampler,
+ struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef *color,
LLVMValueRef depth_ptr)
@@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef z = interp->pos[2];
+ struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
boolean early_depth_test;
unsigned attrib;
@@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp,
consts_ptr = lp_jit_context_constants(builder, context_ptr);
- lp_build_mask_begin(&mask, builder, type, *pmask);
+ flow = lp_build_flow_create(builder);
+
+ memset(outputs, 0, sizeof outputs);
+
+ lp_build_flow_scope_begin(flow);
+
+ /* Declare the color and z variables */
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ color[chan] = LLVMGetUndef(vec_type);
+ lp_build_flow_scope_declare(flow, &color[chan]);
+ }
+ lp_build_flow_scope_declare(flow, &z);
+
+ lp_build_mask_begin(&mask, flow, type, *pmask);
early_depth_test =
- lp->depth_stencil->depth.enabled &&
- lp->framebuffer.zsbuf &&
- !lp->depth_stencil->alpha.enabled &&
- !lp->fs->info.uses_kill &&
- !lp->fs->info.writes_z;
+ key->depth.enabled &&
+ !key->alpha.enabled &&
+ !shader->info.uses_kill &&
+ !shader->info.writes_z;
if(early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
- memset(outputs, 0, sizeof outputs);
-
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
- outputs, emit_fetch_texel, sampler);
+ outputs, sampler);
for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp,
}
if(!early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
lp_build_mask_end(&mask);
+ lp_build_flow_scope_end(flow);
+
+ lp_build_flow_destroy(flow);
+
*pmask = mask.value;
}
@@ -385,13 +299,15 @@ generate_fs(struct llvmpipe_context *lp,
static void
generate_blend(const struct pipe_blend_state *blend,
LLVMBuilderRef builder,
- union lp_type type,
+ struct lp_type type,
LLVMValueRef context_ptr,
LLVMValueRef mask,
LLVMValueRef *src,
LLVMValueRef dst_ptr)
{
struct lp_build_context bld;
+ struct lp_build_flow_context *flow;
+ struct lp_build_mask_context mask_ctx;
LLVMTypeRef vec_type;
LLVMTypeRef int_vec_type;
LLVMValueRef const_ptr;
@@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef res[4];
unsigned chan;
+ lp_build_context_init(&bld, builder, type);
+
+ flow = lp_build_flow_create(builder);
+ lp_build_mask_begin(&mask_ctx, flow, type, mask);
+
vec_type = lp_build_vec_type(type);
int_vec_type = lp_build_int_vec_type(type);
- lp_build_context_init(&bld, builder, type);
-
const_ptr = lp_jit_context_blend_color(builder, context_ptr);
const_ptr = LLVMBuildBitCast(builder, const_ptr,
LLVMPointerType(vec_type, 0), "");
@@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_blend_soa(builder, blend, type, src, dst, con, res);
for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
- lp_build_name(res[chan], "res.%c", "rgba"[chan]);
- res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
- LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ if(blend->colormask & (1 << chan)) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+ res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+ LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ }
}
+
+ lp_build_mask_end(&mask_ctx);
+ lp_build_flow_destroy(flow);
}
@@ -440,8 +364,8 @@ generate_fragment(struct llvmpipe_context *lp,
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader_variant *variant;
- union lp_type fs_type;
- union lp_type blend_type;
+ struct lp_type fs_type;
+ struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_vec_type;
LLVMTypeRef fs_int_vec_type;
@@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuilderRef builder;
LLVMValueRef x0;
LLVMValueRef y0;
- struct build_fetch_texel_context sampler;
+ struct lp_build_sampler_soa *sampler;
struct lp_build_interp_soa_context interp;
LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
@@ -507,7 +431,7 @@ generate_fragment(struct llvmpipe_context *lp,
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
- fs_type.value = 0;
+ memset(&fs_type, 0, sizeof fs_type);
fs_type.floating = TRUE; /* floating point values */
fs_type.sign = TRUE; /* values are signed */
fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
@@ -515,7 +439,7 @@ generate_fragment(struct llvmpipe_context *lp,
fs_type.length = 4; /* 4 element per vector */
num_fs = 4;
- blend_type.value = 0;
+ memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
blend_type.sign = FALSE; /* values are unsigned */
blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
@@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr, dadx_ptr, dady_ptr,
x0, y0, 2, 0);
- memset(&sampler, 0, sizeof sampler);
- sampler.context_ptr = context_ptr;
+#if 0
+ /* C texture sampling */
+ sampler = lp_c_sampler_soa_create(context_ptr);
+#else
+ /* code generated texture sampling */
+ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+#endif
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp,
context_ptr,
i,
&interp,
- &sampler,
+ sampler,
&fs_mask[i],
out_color,
depth_ptr_i);
@@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp,
fs_out_color[chan][i] = out_color[chan];
}
+ sampler->destroy(sampler);
+
/*
* Convert the fs's output color and mask to fit to the blending type.
*/
@@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
*/
static void
make_variant_key(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant_key *key)
{
+ unsigned i;
+
memset(key, 0, sizeof *key);
- memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ if(lp->framebuffer.zsbuf &&
+ lp->depth_stencil->depth.enabled) {
+ key->zsbuf_format = lp->framebuffer.zsbuf->format;
+ memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ }
key->alpha.enabled = lp->depth_stencil->alpha.enabled;
if(key->alpha.enabled)
key->alpha.func = lp->depth_stencil->alpha.func;
/* alpha.ref_value is passed in jit_context */
- memcpy(&key->blend, lp->blend, sizeof key->blend);
+ if(lp->framebuffer.cbufs[0]) {
+ const struct util_format_description *format_desc;
+ unsigned chan;
+
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+ format_desc = util_format_description(lp->framebuffer.cbufs[0]->format);
+ assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB);
+
+ /* mask out color channels not present in the color buffer */
+ for(chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ if(swizzle > 4)
+ key->blend.colormask &= ~(1 << chan);
+ }
+ }
+
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+ lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]);
}
@@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant;
- make_variant_key(lp, &key);
+ make_variant_key(lp, shader, &key);
variant = shader->variants;
while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 4fef541b1e..c69d90c723 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
pipe_texture_reference(&llvmpipe->texture[i], tex);
lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+
+ if(tex) {
+ struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
+ struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
+ jit_tex->width = tex->width[0];
+ jit_tex->height = tex->height[0];
+ jit_tex->stride = lp_tex->stride[0];
+ if(!lp_tex->dt)
+ jit_tex->data = lp_tex->data;
+ }
}
llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 69aaae26e0..a88e110c66 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -86,43 +86,43 @@ random_float(void);
void
-dump_type(FILE *fp, union lp_type type);
+dump_type(FILE *fp, struct lp_type type);
double
-read_elem(union lp_type type, const void *src, unsigned index);
+read_elem(struct lp_type type, const void *src, unsigned index);
void
-write_elem(union lp_type type, void *dst, unsigned index, double src);
+write_elem(struct lp_type type, void *dst, unsigned index, double src);
void
-random_elem(union lp_type type, void *dst, unsigned index);
+random_elem(struct lp_type type, void *dst, unsigned index);
void
-read_vec(union lp_type type, const void *src, double *dst);
+read_vec(struct lp_type type, const void *src, double *dst);
void
-write_vec(union lp_type type, void *dst, const double *src);
+write_vec(struct lp_type type, void *dst, const double *src);
void
-random_vec(union lp_type type, void *dst);
+random_vec(struct lp_type type, void *dst);
boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps);
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);
boolean
-compare_vec(union lp_type type, const void *res, const void *ref);
+compare_vec(struct lp_type type, const void *res, const void *ref);
void
-dump_vec(FILE *fp, union lp_type type, const void *src);
+dump_vec(FILE *fp, struct lp_type type, const void *src);
#endif /* !LP_TEST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 8dfad468e3..94b661dcba 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -80,7 +80,7 @@ static void
write_tsv_row(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type,
+ struct lp_type type,
double cycles,
boolean success)
{
@@ -125,7 +125,7 @@ static void
dump_blend_type(FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
fprintf(fp, "%s", mode ? "soa" : "aos");
@@ -153,7 +153,7 @@ static LLVMValueRef
add_blend_test(LLVMModuleRef module,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
LLVMTypeRef ret_type;
LLVMTypeRef vec_type;
@@ -467,7 +467,7 @@ test_one(unsigned verbose,
FILE *fp,
const struct pipe_blend_state *blend,
enum vector_mode mode,
- union lp_type type)
+ struct lp_type type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
@@ -765,10 +765,10 @@ blend_funcs[] = {
};
-const union lp_type blend_types[] = {
+const struct lp_type blend_types[] = {
/* float, fixed, sign, norm, width, len */
- {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }}, /* f32 x 4 */
- {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
};
@@ -788,7 +788,7 @@ test_all(unsigned verbose, FILE *fp)
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
- const union lp_type *type;
+ const struct lp_type *type;
bool success = TRUE;
for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
@@ -841,27 +841,27 @@ test_some(unsigned verbose, FILE *fp, unsigned long n)
const unsigned *alpha_dst_factor;
struct pipe_blend_state blend;
enum vector_mode mode;
- const union lp_type *type;
+ const struct lp_type *type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
- rgb_func = &blend_funcs[random() % num_funcs];
- alpha_func = &blend_funcs[random() % num_funcs];
- rgb_src_factor = &blend_factors[random() % num_factors];
- alpha_src_factor = &blend_factors[random() % num_factors];
+ rgb_func = &blend_funcs[rand() % num_funcs];
+ alpha_func = &blend_funcs[rand() % num_funcs];
+ rgb_src_factor = &blend_factors[rand() % num_factors];
+ alpha_src_factor = &blend_factors[rand() % num_factors];
do {
- rgb_dst_factor = &blend_factors[random() % num_factors];
+ rgb_dst_factor = &blend_factors[rand() % num_factors];
} while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
do {
- alpha_dst_factor = &blend_factors[random() % num_factors];
+ alpha_dst_factor = &blend_factors[rand() % num_factors];
} while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
- mode = random() & 1;
+ mode = rand() & 1;
- type = &blend_types[random() % num_types];
+ type = &blend_types[rand() % num_types];
memset(&blend, 0, sizeof blend);
blend.blend_enable = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index e6489834af..9dcf58e5dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -59,8 +59,8 @@ write_tsv_header(FILE *fp)
static void
write_tsv_row(FILE *fp,
- union lp_type src_type,
- union lp_type dst_type,
+ struct lp_type src_type,
+ struct lp_type dst_type,
double cycles,
boolean success)
{
@@ -80,8 +80,8 @@ write_tsv_row(FILE *fp,
static void
dump_conv_types(FILE *fp,
- union lp_type src_type,
- union lp_type dst_type)
+ struct lp_type src_type,
+ struct lp_type dst_type)
{
fprintf(fp, "src_type=");
dump_type(fp, src_type);
@@ -96,8 +96,8 @@ dump_conv_types(FILE *fp,
static LLVMValueRef
add_conv_test(LLVMModuleRef module,
- union lp_type src_type, unsigned num_srcs,
- union lp_type dst_type, unsigned num_dsts)
+ struct lp_type src_type, unsigned num_srcs,
+ struct lp_type dst_type, unsigned num_dsts)
{
LLVMTypeRef args[2];
LLVMValueRef func;
@@ -145,8 +145,8 @@ add_conv_test(LLVMModuleRef module,
static boolean
test_one(unsigned verbose,
FILE *fp,
- union lp_type src_type,
- union lp_type dst_type)
+ struct lp_type src_type,
+ struct lp_type dst_type)
{
LLVMModuleRef module = NULL;
LLVMValueRef func = NULL;
@@ -343,35 +343,35 @@ test_one(unsigned verbose,
}
-const union lp_type conv_types[] = {
+const struct lp_type conv_types[] = {
/* float, fixed, sign, norm, width, len */
- {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }},
- {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }},
- {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }},
- {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }},
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 4 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 4 },
/* TODO: test fixed formats too */
- {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }},
- {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }},
- {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }},
- {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }},
- {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }},
-
- {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }},
- {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }},
- {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }},
- {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }},
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 32, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 4 },
+
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 16 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 16 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 16 },
};
@@ -381,8 +381,8 @@ const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
boolean
test_all(unsigned verbose, FILE *fp)
{
- const union lp_type *src_type;
- const union lp_type *dst_type;
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
bool success = TRUE;
for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) {
@@ -407,16 +407,16 @@ test_all(unsigned verbose, FILE *fp)
boolean
test_some(unsigned verbose, FILE *fp, unsigned long n)
{
- const union lp_type *src_type;
- const union lp_type *dst_type;
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
unsigned long i;
bool success = TRUE;
for(i = 0; i < n; ++i) {
- src_type = &conv_types[random() % num_types];
+ src_type = &conv_types[rand() % num_types];
do {
- dst_type = &conv_types[random() % num_types];
+ dst_type = &conv_types[rand() % num_types];
} while (src_type == dst_type || src_type->norm != dst_type->norm);
if(!test_one(verbose, fp, *src_type, *dst_type))
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 1d192355ee..d8455e5649 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module,
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
- rgba = lp_build_load_rgba(builder, format, ptr);
+ rgba = lp_build_load_rgba_aos(builder, format, ptr);
LLVMBuildStore(builder, rgba, rgba_ptr);
lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba(builder, format, ptr, rgba);
+ lp_build_store_rgba_aos(builder, format, ptr, rgba);
LLVMBuildRetVoid(builder);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 49213fb4f0..4592dc0b2d 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -40,7 +40,7 @@
void
dump_type(FILE *fp,
- union lp_type type)
+ struct lp_type type)
{
fprintf(fp, "%s%s%u%sx%u",
type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
@@ -52,7 +52,7 @@ dump_type(FILE *fp,
double
-read_elem(union lp_type type, const void *src, unsigned index)
+read_elem(struct lp_type type, const void *src, unsigned index)
{
double scale = lp_const_scale(type);
double value;
@@ -115,7 +115,7 @@ read_elem(union lp_type type, const void *src, unsigned index)
void
-write_elem(union lp_type type, void *dst, unsigned index, double value)
+write_elem(struct lp_type type, void *dst, unsigned index, double value)
{
assert(index < type.length);
if(!type.sign && value < 0.0)
@@ -184,11 +184,11 @@ write_elem(union lp_type type, void *dst, unsigned index, double value)
void
-random_elem(union lp_type type, void *dst, unsigned index)
+random_elem(struct lp_type type, void *dst, unsigned index)
{
double value;
assert(index < type.length);
- value = (double)random()/(double)RAND_MAX;
+ value = (double)rand()/(double)RAND_MAX;
if(!type.norm) {
unsigned long long mask;
if (type.floating)
@@ -199,17 +199,17 @@ random_elem(union lp_type type, void *dst, unsigned index)
mask = ((unsigned long long)1 << (type.width - 1)) - 1;
else
mask = ((unsigned long long)1 << type.width) - 1;
- value += (double)(mask & random());
+ value += (double)(mask & rand());
}
if(!type.sign)
- if(random() & 1)
+ if(rand() & 1)
value = -value;
write_elem(type, dst, index, value);
}
void
-read_vec(union lp_type type, const void *src, double *dst)
+read_vec(struct lp_type type, const void *src, double *dst)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -218,7 +218,7 @@ read_vec(union lp_type type, const void *src, double *dst)
void
-write_vec(union lp_type type, void *dst, const double *src)
+write_vec(struct lp_type type, void *dst, const double *src)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -229,12 +229,12 @@ write_vec(union lp_type type, void *dst, const double *src)
float
random_float(void)
{
- return (float)((double)random()/(double)RAND_MAX);
+ return (float)((double)rand()/(double)RAND_MAX);
}
void
-random_vec(union lp_type type, void *dst)
+random_vec(struct lp_type type, void *dst)
{
unsigned i;
for (i = 0; i < type.length; ++i)
@@ -243,7 +243,7 @@ random_vec(union lp_type type, void *dst)
boolean
-compare_vec_with_eps(union lp_type type, const void *res, const void *ref, double eps)
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
{
unsigned i;
for (i = 0; i < type.length; ++i) {
@@ -259,7 +259,7 @@ compare_vec_with_eps(union lp_type type, const void *res, const void *ref, doubl
boolean
-compare_vec(union lp_type type, const void *res, const void *ref)
+compare_vec(struct lp_type type, const void *res, const void *ref)
{
double eps = lp_const_eps(type);
return compare_vec_with_eps(type, res, ref, eps);
@@ -267,7 +267,7 @@ compare_vec(union lp_type type, const void *res, const void *ref)
void
-dump_vec(FILE *fp, union lp_type type, const void *src)
+dump_vec(FILE *fp, struct lp_type type, const void *src)
{
unsigned i;
for (i = 0; i < type.length; ++i) {
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 23a94b5b0d..773e848242 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
if (lpt->timestamp != tc->timestamp) {
/* texture was modified, invalidate all cached tiles */
uint i;
- _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
+ debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
for (i = 0; i < NUM_ENTRIES; i++) {
tc->entries[i].addr.bits.invalid = 1;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 628ec3f1ef..9ad1bde956 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -29,10 +29,13 @@
#define LP_TEX_SAMPLE_H
+#include <llvm-c/Core.h>
+
#include "tgsi/tgsi_exec.h"
struct llvmpipe_tex_tile_cache;
+struct lp_sampler_static_state;
/**
@@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
+/**
+ * Texture sampling code generator that just calls lp_get_samples C function
+ * for the actual sampling computation.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr);
+
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+ LLVMValueRef context_ptr);
+
+
#endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 94eb6dad5a..a1365a045f 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1578,3 +1578,136 @@ out:
tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
}
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+ uint32_t unit,
+ float *store )
+{
+ struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+ uint j;
+
+ debug_printf("%s sampler: %p (%p) store: %p\n",
+ __FUNCTION__,
+ sampler, *sampler,
+ store );
+
+ debug_printf("lodbias %f\n", store[12]);
+
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d texcoord %f %f\n",
+ j,
+ store[0+j],
+ store[4+j]);
+#endif
+
+ {
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+ sampler->get_samples(sampler,
+ &store[0],
+ &store[4],
+ &store[8],
+ 0.0f, /*store[12], lodbias */
+ rgba);
+ memcpy(store, rgba, sizeof rgba);
+ }
+
+#if 0
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d result %f %f %f %f\n",
+ j,
+ store[0+j],
+ store[4+j],
+ store[8+j],
+ store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ LLVMValueRef context_ptr;
+
+ LLVMValueRef samplers_ptr;
+
+ /** Coords/texels store */
+ LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+ LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+ LLVMValueRef args[3];
+ unsigned i;
+
+ if(!sampler->samplers_ptr)
+ sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+ if(!sampler->store_ptr)
+ sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+ vec_type,
+ LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "texel_store");
+
+ for (i = 0; i < num_coords; i++) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ LLVMBuildStore(builder, coords[i], coord_ptr);
+ }
+
+ args[0] = sampler->samplers_ptr;
+ args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ args[2] = sampler->store_ptr;
+
+ lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+ }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+ struct lp_c_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_c_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+ sampler->context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
new file mode 100644
index 0000000000..d2a6ae21f5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_tgsi.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in lp_jit_context
+ * and lp_jit_texture and the sampler code generator. It provides the
+ * texture layout information required by the texture sampler code generator
+ * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name)
+{
+ struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to fetch
+ * the members of lp_jit_texture to fulfill the sampler code generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture sampler code
+ * generator a reusable module without dependencies to llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \
+ static LLVMValueRef \
+ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \
+ }
+
+
+LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH)
+LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT)
+LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords,
+ coords,
+ lodbias,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct lp_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = lp_llvm_texture_width;
+ sampler->dynamic_state.base.height = lp_llvm_texture_height;
+ sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
+ sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 143afec3d3..2e576e6039 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -225,11 +225,14 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
tc->clear_val);
screen->transfer_unmap(screen, pt);
+
+ tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
case LP_TILE_STATUS_DEFINED:
lp_put_tile_rgba_soa(pt, x, y, tile->color);
+ tile->status = LP_TILE_STATUS_UNDEFINED;
break;
}
}
@@ -257,7 +260,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
case LP_TILE_STATUS_UNDEFINED:
/* get new tile data from transfer */
- lp_get_tile_rgba_soa(pt, x, y, tile->color);
+ lp_get_tile_rgba_soa(pt, x & ~(TILE_SIZE - 1), y & ~(TILE_SIZE - 1), tile->color);
tile->status = LP_TILE_STATUS_DEFINED;
break;