summaryrefslogtreecommitdiff
path: root/src/gallium/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r--src/gallium/drivers/i915simple/i915_prim_vbuf.c94
-rw-r--r--src/gallium/drivers/i915simple/intel_winsys.h11
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile5
-rw-r--r--src/gallium/drivers/llvmpipe/README23
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c226
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.c17
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.c410
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_flow.h38
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h62
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c208
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h135
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c411
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.c21
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_struct.h12
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.c15
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi.h35
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c464
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c11
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h7
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c222
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h23
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c (renamed from src/gallium/drivers/llvmpipe/lp_tex_sample.c)133
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c196
-rw-r--r--src/gallium/drivers/nv50/nv50_miptree.c1
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c6
-rw-r--r--src/gallium/drivers/nv50/nv50_surface.c6
-rw-r--r--src/gallium/drivers/r300/r300_context.h11
-rw-r--r--src/gallium/drivers/r300/r300_emit.c4
-rw-r--r--src/gallium/drivers/r300/r300_render.c68
-rw-r--r--src/gallium/drivers/r300/r300_screen.c7
-rw-r--r--src/gallium/drivers/r300/r300_surface.c6
-rw-r--r--src/gallium/drivers/r300/r300_texture.c39
-rw-r--r--src/gallium/drivers/r300/r300_texture.h4
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c1
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.c6
-rw-r--r--src/gallium/drivers/softpipe/sp_clear.h1
-rw-r--r--src/gallium/drivers/softpipe/sp_context.c27
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.c33
-rw-r--r--src/gallium/drivers/softpipe/sp_texture.h3
59 files changed, 2664 insertions, 584 deletions
diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
index 508f4560e4..b3a7774fd6 100644
--- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c
@@ -44,6 +44,7 @@
#include "pipe/p_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_fifo.h"
#include "i915_context.h"
#include "i915_reg.h"
@@ -76,8 +77,13 @@ struct i915_vbuf_render {
size_t vbo_size;
size_t vbo_offset;
void *vbo_ptr;
- size_t vbo_alloc_size;
size_t vbo_max_used;
+
+ /* stuff for the pool */
+ struct util_fifo *pool_fifo;
+ unsigned pool_used;
+ unsigned pool_buffer_size;
+ boolean pool_not_used;
};
@@ -106,33 +112,72 @@ i915_vbuf_render_get_vertex_info(struct vbuf_render *render)
}
static boolean
+i915_vbuf_render_reserve(struct i915_vbuf_render *i915_render, size_t size)
+{
+ struct i915_context *i915 = i915_render->i915;
+
+ if (i915_render->vbo_size < size + i915_render->vbo_offset)
+ return FALSE;
+
+ if (i915->vbo_flushed)
+ return FALSE;
+
+ return TRUE;
+}
+
+static void
+i915_vbuf_render_new_buf(struct i915_vbuf_render *i915_render, size_t size)
+{
+ struct i915_context *i915 = i915_render->i915;
+ struct intel_winsys *iws = i915->iws;
+
+ if (i915_render->vbo) {
+ if (i915_render->pool_not_used)
+ iws->buffer_destroy(iws, i915_render->vbo);
+ else
+ u_fifo_add(i915_render->pool_fifo, i915_render->vbo);
+ i915_render->vbo = NULL;
+ }
+
+ i915->vbo_flushed = 0;
+
+ i915_render->vbo_size = MAX2(size, i915_render->pool_buffer_size);
+ i915_render->vbo_offset = 0;
+
+ if (i915_render->vbo_size != i915_render->pool_buffer_size) {
+ i915_render->pool_not_used = TRUE;
+ i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
+ INTEL_NEW_VERTEX);
+ } else {
+ i915_render->pool_not_used = FALSE;
+
+ if (i915_render->pool_used >= 2) {
+ FLUSH_BATCH(NULL);
+ i915->vbo_flushed = 0;
+ i915_render->pool_used = 0;
+ }
+ u_fifo_pop(i915_render->pool_fifo, (void**)&i915_render->vbo);
+ }
+}
+
+static boolean
i915_vbuf_render_allocate_vertices(struct vbuf_render *render,
ushort vertex_size,
ushort nr_vertices)
{
struct i915_vbuf_render *i915_render = i915_vbuf_render(render);
struct i915_context *i915 = i915_render->i915;
- struct intel_winsys *iws = i915->iws;
size_t size = (size_t)vertex_size * (size_t)nr_vertices;
/* FIXME: handle failure */
assert(!i915->vbo);
- if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) {
- } else {
- i915->vbo_flushed = 0;
- if (i915_render->vbo) {
- iws->buffer_destroy(iws, i915_render->vbo);
- i915_render->vbo = NULL;
- }
- }
+ if (!i915_vbuf_render_reserve(i915_render, size)) {
- if (!i915_render->vbo) {
- i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size);
- i915_render->vbo_offset = 0;
- i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
- INTEL_NEW_VERTEX);
+ if (i915->vbo_flushed)
+ i915_render->pool_used = 0;
+ i915_vbuf_render_new_buf(i915_render, size);
}
i915_render->vertex_size = vertex_size;
@@ -504,6 +549,7 @@ i915_vbuf_render_create(struct i915_context *i915)
{
struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render);
struct intel_winsys *iws = i915->iws;
+ int i;
i915_render->i915 = i915;
@@ -524,14 +570,24 @@ i915_vbuf_render_create(struct i915_context *i915)
i915_render->base.release_vertices = i915_vbuf_render_release_vertices;
i915_render->base.destroy = i915_vbuf_render_destroy;
- i915_render->vbo_alloc_size = 128 * 4096;
- i915_render->vbo_size = i915_render->vbo_alloc_size;
+
+ i915_render->vbo = NULL;
+ i915_render->vbo_size = 0;
i915_render->vbo_offset = 0;
- i915_render->vbo = iws->buffer_create(iws, i915_render->vbo_size, 64,
- INTEL_NEW_VERTEX);
+
+ i915_render->pool_used = FALSE;
+ i915_render->pool_buffer_size = 128 * 4096;
+ i915_render->pool_fifo = u_fifo_create(6);
+ for (i = 0; i < 6; i++)
+ u_fifo_add(i915_render->pool_fifo,
+ iws->buffer_create(iws, i915_render->pool_buffer_size, 64,
+ INTEL_NEW_VERTEX));
+
+#if 0
/* TODO JB: is this realy needed? */
i915_render->vbo_ptr = iws->buffer_map(iws, i915_render->vbo, TRUE);
iws->buffer_unmap(iws, i915_render->vbo);
+#endif
return &i915_render->base;
}
diff --git a/src/gallium/drivers/i915simple/intel_winsys.h b/src/gallium/drivers/i915simple/intel_winsys.h
index f949f52a9c..42c5e7470e 100644
--- a/src/gallium/drivers/i915simple/intel_winsys.h
+++ b/src/gallium/drivers/i915simple/intel_winsys.h
@@ -150,6 +150,17 @@ struct intel_winsys {
void (*buffer_unmap)(struct intel_winsys *iws,
struct intel_buffer *buffer);
+ /**
+ * Write to a buffer.
+ *
+ * Arguments follows pwrite(2)
+ */
+ int (*buffer_write)(struct intel_winsys *iws,
+ struct intel_buffer *dst,
+ const void *src,
+ size_t size,
+ size_t offset);
+
void (*buffer_destroy)(struct intel_winsys *iws,
struct intel_buffer *buffer);
/*@}*/
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 6e63a0c2b7..06c586e6bb 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -15,9 +15,11 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
lp_bld_tgsi_soa.c \
@@ -44,7 +46,8 @@ C_SOURCES = \
lp_state_vs.c \
lp_surface.c \
lp_tex_cache.c \
- lp_tex_sample.c \
+ lp_tex_sample_c.c \
+ lp_tex_sample_llvm.c \
lp_texture.c \
lp_tile_cache.c \
lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 498d21dea6..89d08834a3 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -8,13 +8,16 @@ Done so far is:
- the whole fragment pipeline is code generated in a single function
+ - input interpolation
+
- depth testing
+ - texture sampling (not all state/formats are supported)
+
- fragment shader TGSI translation
- same level of support as the TGSI SSE2 exec machine, with the exception
we don't fallback to TGSI interpretation when an unsupported opcode is
found, but just ignore it
- - texture sampling via an intrinsic call
- done in SoA layout
- input interpolation also code generated
@@ -28,16 +31,17 @@ Done so far is:
any width and length
- not all operations are implemented for these types yet though
-Most mesa/progs/demos/* work. Speed is on par with Keith's softpipe-opt branch,
-which includes hand written fast implementations for common cases.
+Most mesa/progs/demos/* work.
To do (probably by this order):
- code generate stipple and stencil testing
- - code generate texture sampling
+ - translate the remaining bits of texture sampling state
- translate TGSI control flow instructions, and all other remaining opcodes
+
+ - integrate with the draw module for VS code generation
- code generate the triangle setup and rasterization
@@ -93,7 +97,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as
make linux-llvm
-but the rest of these instructions assume scons is used.
+but the rest of these instructions assume that scons is used.
Using
@@ -108,6 +112,9 @@ or
export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
+For performance evaluation pass debug=no to scons, and use the corresponding
+lib directory without the "-debug" suffix.
+
Unit testing
============
@@ -119,7 +126,7 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
- lp_test_conv: SIMD vector conversion
- lp_test_format: pixel unpacking/packing
-Some of this tests can output results and benchmarks to a tab-seperated-file
+Some of this tests can output results and benchmarks to a tab-separated-file
for posterior analysis, e.g.:
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -133,10 +140,10 @@ Development Notes
at the top of the lp_bld_*.c functions.
- All lp_bld_*.[ch] are isolated from the rest of the driver, and could/may be
- put in a standalone Gallium state -> LLVM IR translation module.
+ put in a stand-alone Gallium state -> LLVM IR translation module.
- We use LLVM-C bindings for now. They are not documented, but follow the C++
interfaces very closely, and appear to be complete enough for code
generation. See
http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- for a standalone example.
+ for a stand-alone example.
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 5c29bdac56..dea4b703c4 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -3,7 +3,7 @@ Import('*')
env = env.Clone()
env.Tool('llvm')
-if 'LLVM_VERSION' not in env:
+if env.has_key('LLVM_VERSION') is False:
print 'warning: LLVM not found: not building llvmpipe'
Return()
@@ -23,8 +23,10 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
+ 'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_logic.c',
'lp_bld_swizzle.c',
@@ -52,7 +54,8 @@ llvmpipe = env.ConvenienceLibrary(
'lp_state_vs.c',
'lp_surface.c',
'lp_tex_cache.c',
- 'lp_tex_sample.c',
+ 'lp_tex_sample_c.c',
+ 'lp_tex_sample_llvm.c',
'lp_texture.c',
'lp_tile_cache.c',
'lp_tile_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 09a57ff33d..ce3e5f91c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -502,6 +502,31 @@ lp_build_div(struct lp_build_context *bld,
}
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+}
+
+
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11)
+{
+ LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
+ LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
+ return lp_build_lerp(bld, y, v0, v1);
+}
+
+
/**
* Generate min(a, b)
* Do checks for special cases.
@@ -566,20 +591,31 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a)
{
const union lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
if(!type.sign)
return a;
- /* XXX: is this really necessary? */
+ if(type.floating) {
+ /* Mask out the sign bit */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+ a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+ return a;
+ }
+
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(!type.floating && type.width*type.length == 128) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- if(type.width == 8)
+ if(type.width*type.length == 128) {
+ switch(type.width) {
+ case 8:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
- if(type.width == 16)
+ case 16:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
- if(type.width == 32)
+ case 32:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
+ }
}
#endif
@@ -588,6 +624,184 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef cond;
+ LLVMValueRef res;
+
+ /* Handle non-zero case */
+ if(!type.sign) {
+ /* if not zero then sign must be positive */
+ res = bld->one;
+ }
+ else if(type.floating) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef one;
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ one = LLVMConstBitCast(bld->one, int_vec_type);
+ res = LLVMBuildOr(bld->builder, sign, one, "");
+ res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ }
+ else
+ {
+ LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+ cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->one, minus_one);
+ }
+
+ /* Handle zero */
+ cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
+ res = lp_build_select(bld, cond, bld->zero, bld->one);
+
+ return res;
+}
+
+
+enum lp_build_round_sse41_mode
+{
+ LP_BUILD_ROUND_SSE41_NEAREST = 0,
+ LP_BUILD_ROUND_SSE41_FLOOR = 1,
+ LP_BUILD_ROUND_SSE41_CEIL = 2,
+ LP_BUILD_ROUND_SSE41_TRUNCATE = 3
+};
+
+
+static INLINE LLVMValueRef
+lp_build_round_sse41(struct lp_build_context *bld,
+ LLVMValueRef a,
+ enum lp_build_round_sse41_mode mode)
+{
+ const union lp_type type = bld->type;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ const char *intrinsic;
+
+ assert(type.floating);
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
+ LLVMConstInt(LLVMInt32Type(), mode, 0));
+}
+
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+
+ assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+#endif
+
+ /* FIXME */
+ assert(0);
+ return bld->undef;
+}
+
+
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const union lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+ assert(type.floating);
+
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+}
+
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ a = lp_build_floor(bld, a);
+ a = lp_build_int(bld, a);
+ return a;
+}
+
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index fc8cb25966..5e083b847f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -72,6 +72,26 @@ lp_build_div(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1);
+
+/**
+ * Bilinear interpolation.
+ *
+ * Values indices are in v_{yx}.
+ */
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef v00,
+ LLVMValueRef v01,
+ LLVMValueRef v10,
+ LLVMValueRef v11);
+
+LLVMValueRef
lp_build_min(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -86,6 +106,34 @@ lp_build_abs(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index c8954c8a34..c5a71d2c72 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -122,7 +122,7 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
int shift = dst_width - n;
res = LLVMBuildShl(builder, res, lp_build_int_const_scalar(src_type, shift), "");
- /* Fill in the empty lower bits for added precision? */
+ /* TODO: Fill in the empty lower bits for additional precision? */
#if 0
{
LLVMValueRef msb;
@@ -244,7 +244,7 @@ lp_build_const_pack_shuffle(unsigned n)
* Expand the bit width.
*
* This will only change the number of bits the values are represented, not the
- * values themselved.
+ * values themselves.
*/
static void
lp_build_expand(LLVMBuilderRef builder,
@@ -391,11 +391,11 @@ lp_build_pack2(LLVMBuilderRef builder,
* TODO: Handle saturation consistently.
*/
static LLVMValueRef
-lp_build_trunc(LLVMBuilderRef builder,
- union lp_type src_type,
- union lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
+lp_build_pack(LLVMBuilderRef builder,
+ union lp_type src_type,
+ union lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
{
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
unsigned i;
@@ -565,7 +565,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width > dst_type.width) {
assert(num_dsts == 1);
- tmp[0] = lp_build_trunc(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
+ tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = 1;
@@ -689,7 +689,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
if(src_type.width > dst_type.width) {
assert(num_dsts == 1);
- dst[0] = lp_build_trunc(builder, src_type, dst_type, TRUE, src, num_srcs);
+ dst[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 30925b5f41..59d8f492e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -30,10 +30,27 @@
#include <udis86.h>
#endif
+#include "util/u_math.h"
#include "util/u_debug.h"
#include "lp_bld_debug.h"
+/**
+ * Check alignment.
+ *
+ * It is important that this check is not implemented as a macro or inlined
+ * function, as the compiler assumptions in respect to alignment of global
+ * and stack variables would often make the check a no op, defeating the
+ * whole purpose of the exercise.
+ */
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment)
+{
+ assert(util_is_pot(alignment));
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+
void
lp_disassemble(const void* func)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.h b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
index ecdafef76d..583e6132b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.h
@@ -53,6 +53,10 @@ lp_build_name(LLVMValueRef val, const char *format, ...)
}
+boolean
+lp_check_alignment(const void *ptr, unsigned alignment);
+
+
void
lp_disassemble(const void* func);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 2cd6e6b921..e5fe81193f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -179,12 +179,13 @@ lp_build_depth_test(LLVMBuilderRef builder,
padding_right = 0;
for(chan = 0; chan < z_swizzle; ++chan)
padding_right += format_desc->channel[chan].size;
- padding_left = format_desc->block.bits - format_desc->channel[z_swizzle].size;
+ padding_left = format_desc->block.bits -
+ (padding_right + format_desc->channel[z_swizzle].size);
if(padding_left || padding_right) {
- const long long mask_left = ((long long)1 << (format_desc->block.bits - padding_left)) - 1;
- const long long mask_right = ((long long)1 << (padding_right)) - 1;
- z_bitmask = lp_build_int_const_scalar(type, mask_left & mask_right);
+ const unsigned long long mask_left = ((unsigned long long)1 << (format_desc->block.bits - padding_left)) - 1;
+ const unsigned long long mask_right = ((unsigned long long)1 << (padding_right)) - 1;
+ z_bitmask = lp_build_int_const_scalar(type, mask_left ^ mask_right);
}
if(padding_left)
@@ -210,5 +211,6 @@ lp_build_depth_test(LLVMBuilderRef builder,
LLVMBuildStore(builder, dst, dst_ptr);
}
+ /* FIXME */
assert(!state->occlusion_count);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index 9d99e1a9d9..69ed014ff3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -32,59 +32,261 @@
*/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#define LP_BUILD_FLOW_MAX_VARIABLES 32
+#define LP_BUILD_FLOW_MAX_DEPTH 32
+
+
+/**
+ * Enumeration of all possible flow constructs.
+ */
+enum lp_build_flow_construct_kind {
+ lP_BUILD_FLOW_SCOPE,
+ LP_BUILD_FLOW_SKIP,
+};
+
+
+/**
+ * Variable declaration scope.
+ */
+struct lp_build_flow_scope
+{
+ /** Number of variables declared in this scope */
+ unsigned num_variables;
+};
+
+
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_flow_skip
+{
+ /** Block to skip to */
+ LLVMBasicBlockRef block;
+
+ /** Number of variables declared at the beginning */
+ unsigned num_variables;
+
+ LLVMValueRef *phi;
+};
+
+
+/**
+ * Union of all possible flow constructs' data
+ */
+union lp_build_flow_construct_data
+{
+ struct lp_build_flow_scope scope;
+ struct lp_build_flow_skip skip;
+};
+
+
+/**
+ * Element of the flow construct stack.
+ */
+struct lp_build_flow_construct
+{
+ enum lp_build_flow_construct_kind kind;
+ union lp_build_flow_construct_data data;
+};
+
+
+/**
+ * All necessary data to generate LLVM control flow constructs.
+ *
+ * Besides keeping track of the control flow construct themselves we also
+ * need to keep track of variables in order to generate SSA Phi values.
+ */
+struct lp_build_flow_context
+{
+ LLVMBuilderRef builder;
+
+ /**
+ * Control flow stack.
+ */
+ struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
+ unsigned num_constructs;
+
+ /**
+ * Variable stack
+ */
+ LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
+ unsigned num_variables;
+};
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder)
+{
+ struct lp_build_flow_context *flow;
+
+ flow = CALLOC_STRUCT(lp_build_flow_context);
+ if(!flow)
+ return NULL;
+
+ flow->builder = builder;
+
+ return flow;
+}
+
+
void
-lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
- union lp_type type,
- LLVMValueRef value)
+lp_build_flow_destroy(struct lp_build_flow_context *flow)
{
- memset(mask, 0, sizeof *mask);
+ assert(flow->num_constructs == 0);
+ assert(flow->num_variables == 0);
+ FREE(flow);
+}
- mask->builder = builder;
- mask->reg_type = LLVMIntType(type.width * type.length);
- mask->value = value;
+
+static union lp_build_flow_construct_data *
+lp_build_flow_push(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
+ if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
+ return NULL;
+
+ flow->constructs[flow->num_constructs].kind = kind;
+ return &flow->constructs[flow->num_constructs++].data;
+}
+
+
+static union lp_build_flow_construct_data *
+lp_build_flow_peek(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[flow->num_constructs - 1].data;
}
+static union lp_build_flow_construct_data *
+lp_build_flow_pop(struct lp_build_flow_context *flow,
+ enum lp_build_flow_construct_kind kind)
+{
+ assert(flow->num_constructs);
+ if(!flow->num_constructs)
+ return NULL;
+
+ assert(flow->constructs[flow->num_constructs - 1].kind == kind);
+ if(flow->constructs[flow->num_constructs - 1].kind != kind)
+ return NULL;
+
+ return &flow->constructs[--flow->num_constructs].data;
+}
+
+
+/**
+ * Begin a variable scope.
+ *
+ *
+ */
void
-lp_build_mask_update(struct lp_build_mask_context *mask,
- LLVMValueRef value)
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
{
+ struct lp_build_flow_scope *scope;
- LLVMValueRef cond;
- LLVMBasicBlockRef current_block;
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef new_block;
+ scope = &lp_build_flow_push(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(mask->value)
- mask->value = LLVMBuildAnd(mask->builder, mask->value, value, "");
- else
- mask->value = value;
+ scope->num_variables = 0;
+}
- /* FIXME: disabled until we have proper control flow helpers */
-#if 0
- cond = LLVMBuildICmp(mask->builder,
- LLVMIntEQ,
- LLVMBuildBitCast(mask->builder, mask->value, mask->reg_type, ""),
- LLVMConstNull(mask->reg_type),
- "");
- current_block = LLVMGetInsertBlock(mask->builder);
+/**
+ * Declare a variable.
+ *
+ * A variable is a named entity which can have different LLVMValueRef's at
+ * different points of the program. This is relevant for control flow because
+ * when there are mutiple branches to a same location we need to replace
+ * the variable's value with a Phi function as explained in
+ * http://en.wikipedia.org/wiki/Static_single_assignment_form .
+ *
+ * We keep track of variables by keeping around a pointer to where their
+ * current.
+ *
+ * There are a few cautions to observe:
+ *
+ * - Variable's value must not be NULL. If there is no initial value then
+ * LLVMGetUndef() should be used.
+ *
+ * - Variable's value must be kept up-to-date. If the variable is going to be
+ * modified by a function then a pointer should be passed so that its value
+ * is accurate. Failure to do this will cause some of the variables'
+ * transient values to be lost, leading to wrong results.
+ *
+ * - A program should be written from top to bottom, by always appending
+ * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
+ * modifying existing statements will most likely lead to wrong results.
+ *
+ */
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_peek(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- if(!mask->skip_block) {
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- mask->skip_block = LLVMAppendBasicBlock(function, "skip");
+ assert(*variable);
+ if(!*variable)
+ return;
+
+ assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
+ if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
+ return;
+
+ flow->variables[flow->num_variables++] = variable;
+ ++scope->num_variables;
+}
+
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_scope *scope;
+
+ scope = &lp_build_flow_pop(flow, lP_BUILD_FLOW_SCOPE)->scope;
+ if(!scope)
+ return;
- mask->phi = LLVMBuildPhi(mask->builder, LLVMTypeOf(mask->value), "");
+ assert(flow->num_variables >= scope->num_variables);
+ if(flow->num_variables < scope->num_variables) {
+ flow->num_variables = 0;
+ return;
}
+ flow->num_variables -= scope->num_variables;
+}
+
+
+static LLVMBasicBlockRef
+lp_build_flow_insert_block(struct lp_build_flow_context *flow)
+{
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef new_block;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
next_block = LLVMGetNextBasicBlock(current_block);
- assert(next_block);
if(next_block) {
new_block = LLVMInsertBasicBlock(next_block, "");
}
@@ -93,30 +295,148 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
new_block = LLVMAppendBasicBlock(function, "");
}
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildCondBr(mask->builder, cond, mask->skip_block, new_block);
+ return new_block;
+}
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBuilderRef builder;
+ unsigned i;
+
+ skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ skip->block = lp_build_flow_insert_block(flow);
+ skip->num_variables = flow->num_variables;
+ if(!skip->num_variables) {
+ skip->phi = NULL;
+ return;
+ }
- LLVMPositionBuilderAtEnd(mask->builder, new_block);
-#endif
+ skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
+ if(!skip->phi) {
+ skip->num_variables = 0;
+ return;
+ }
+
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, skip->block);
+
+ for(i = 0; i < skip->num_variables; ++i)
+ skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
+
+ LLVMDisposeBuilder(builder);
}
-LLVMValueRef
-lp_build_mask_end(struct lp_build_mask_context *mask)
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond)
{
- if(mask->skip_block) {
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(mask->builder);
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ LLVMBasicBlockRef new_block;
+ unsigned i;
- LLVMAddIncoming(mask->phi, &mask->value, &current_block, 1);
- LLVMBuildBr(mask->builder, mask->skip_block);
+ skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
- LLVMPositionBuilderAtEnd(mask->builder, mask->skip_block);
+ current_block = LLVMGetInsertBlock(flow->builder);
- mask->value = mask->phi;
- mask->phi = NULL;
- mask->skip_block = NULL;
+ new_block = lp_build_flow_insert_block(flow);
+
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
}
+ LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+
+ LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ }
+
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow)
+{
+ struct lp_build_flow_skip *skip;
+ LLVMBasicBlockRef current_block;
+ unsigned i;
+
+ skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
+ if(!skip)
+ return;
+
+ current_block = LLVMGetInsertBlock(flow->builder);
+
+ for(i = 0; i < skip->num_variables; ++i) {
+ assert(*flow->variables[i]);
+ LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
+ *flow->variables[i] = skip->phi[i];
+ }
+
+ LLVMBuildBr(flow->builder, skip->block);
+ LLVMPositionBuilderAtEnd(flow->builder, skip->block);
+
+ FREE(skip->phi);
+}
+
+
+static void
+lp_build_mask_check(struct lp_build_mask_context *mask)
+{
+ LLVMBuilderRef builder = mask->flow->builder;
+ LLVMValueRef cond;
+
+ cond = LLVMBuildICmp(builder,
+ LLVMIntEQ,
+ LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+ LLVMConstNull(mask->reg_type),
+ "");
+
+ lp_build_flow_skip_cond_break(mask->flow, cond);
+}
+
+
+void
+lp_build_mask_begin(struct lp_build_mask_context *mask,
+ struct lp_build_flow_context *flow,
+ union lp_type type,
+ LLVMValueRef value)
+{
+ memset(mask, 0, sizeof *mask);
+
+ mask->flow = flow;
+ mask->reg_type = LLVMIntType(type.width * type.length);
+ mask->value = value;
+
+ lp_build_flow_scope_begin(flow);
+ lp_build_flow_scope_declare(flow, &mask->value);
+ lp_build_flow_skip_begin(flow);
+
+ lp_build_mask_check(mask);
+}
+
+
+void
+lp_build_mask_update(struct lp_build_mask_context *mask,
+ LLVMValueRef value)
+{
+ mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
+
+ lp_build_mask_check(mask);
+}
+
+
+LLVMValueRef
+lp_build_mask_end(struct lp_build_mask_context *mask)
+{
+ lp_build_flow_skip_end(mask->flow);
+ lp_build_flow_scope_end(mask->flow);
return mask->value;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.h b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
index 1b634ff038..9d76e3064d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.h
@@ -41,23 +41,49 @@
union lp_type;
+struct lp_build_flow_context;
+
+
+struct lp_build_flow_context *
+lp_build_flow_create(LLVMBuilderRef builder);
+
+void
+lp_build_flow_destroy(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
+ LLVMValueRef *variable);
+
+void
+lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+
+void
+lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+ LLVMValueRef cond);
+
+void
+lp_build_flow_skip_end(struct lp_build_flow_context *flow);
+
+
struct lp_build_mask_context
{
- LLVMBuilderRef builder;
+ struct lp_build_flow_context *flow;
LLVMTypeRef reg_type;
LLVMValueRef value;
-
- LLVMValueRef phi;
-
- LLVMBasicBlockRef skip_block;
};
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- LLVMBuilderRef builder,
+ struct lp_build_flow_context *flow,
union lp_type type,
LLVMValueRef value);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 01c8a752d1..5ee0656093 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -31,20 +31,14 @@
/**
* @file
- * LLVM IR building helpers interfaces.
- *
- * We use LLVM-C bindings for now. They are not documented, but follow the C++
- * interfaces very closely, and appear to be complete enough for code
- * genration. See
- * http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
- * for a standalone example.
+ * Pixel format helpers.
*/
#include <llvm-c/Core.h>
-
-#include "pipe/p_format.h"
+#include "pipe/p_format.h"
+struct util_format_description;
union lp_type;
@@ -56,9 +50,9 @@ union lp_type;
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed);
/**
@@ -67,9 +61,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba);
/**
@@ -81,9 +75,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
* @return RGBA in a 4 floats vector.
*/
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr);
/**
@@ -92,10 +86,34 @@ lp_build_load_rgba(LLVMBuilderRef builder,
* @param rgba 4 float vector with the unpacked components.
*/
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ union lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ union lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba);
#endif /* !LP_BLD_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index dcbc0076c7..b9b5d84bed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -32,9 +32,9 @@
LLVMValueRef
-lp_build_unpack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed)
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef packed)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -152,9 +152,9 @@ lp_build_unpack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_pack_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba)
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -250,9 +250,9 @@ lp_build_pack_rgba(LLVMBuilderRef builder,
LLVMValueRef
-lp_build_load_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
+lp_build_load_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -272,15 +272,15 @@ lp_build_load_rgba(LLVMBuilderRef builder,
packed = LLVMBuildLoad(builder, ptr, "");
- return lp_build_unpack_rgba(builder, format, packed);
+ return lp_build_unpack_rgba_aos(builder, format, packed);
}
void
-lp_build_store_rgba(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
+lp_build_store_rgba_aos(LLVMBuilderRef builder,
+ enum pipe_format format,
+ LLVMValueRef ptr,
+ LLVMValueRef rgba)
{
const struct util_format_description *desc;
LLVMTypeRef type;
@@ -294,7 +294,7 @@ lp_build_store_rgba(LLVMBuilderRef builder,
type = LLVMIntType(desc->block.bits);
- packed = lp_build_pack_rgba(builder, format, rgba);
+ packed = lp_build_pack_rgba_aos(builder, format, rgba);
ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
new file mode 100644
index 0000000000..569e8d10a3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -0,0 +1,208 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+static LLVMValueRef
+lp_build_format_swizzle(union lp_type type,
+ const LLVMValueRef *inputs,
+ enum util_format_swizzle swizzle)
+{
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return inputs[swizzle];
+ case UTIL_FORMAT_SWIZZLE_0:
+ return lp_build_zero(type);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return lp_build_one(type);
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ return lp_build_undef(type);
+ default:
+ assert(0);
+ return lp_build_undef(type);
+ }
+}
+
+
+void
+lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ union lp_type type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef inputs[4];
+ unsigned start;
+ unsigned chan;
+
+ /* FIXME: Support more formats */
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ /* Decode the input vector components */
+ start = 0;
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned width = format_desc->channel[chan].size;
+ unsigned stop = start + width;
+ LLVMValueRef input;
+
+ input = packed;
+
+ switch(format_desc->channel[chan].type) {
+ case UTIL_FORMAT_TYPE_VOID:
+ input = NULL;
+ break;
+
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(type.floating) {
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(type, start), "");
+ if(stop < format_desc->block.bits) {
+ unsigned mask = ((unsigned long long)1 << width) - 1;
+ input = LLVMBuildAnd(builder, input, lp_build_int_const_scalar(type, mask), "");
+ }
+
+ if(format_desc->channel[chan].normalized)
+ input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+ else
+ input = LLVMBuildFPToSI(builder, input, lp_build_vec_type(type), "");
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ input = lp_build_undef(type);
+ }
+ break;
+
+ default:
+ /* fall through */
+ input = lp_build_undef(type);
+ break;
+ }
+
+ inputs[chan] = input;
+
+ start = stop;
+ }
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
+ rgba[2] = rgba[1] = rgba[0] = depth;
+ rgba[3] = lp_build_one(type);
+ }
+ else {
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
+ }
+ }
+}
+
+
+void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ union lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef packed;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+ assert(format_desc->block.bits <= 32);
+
+ packed = lp_build_gather(builder,
+ type.length, format_desc->block.bits, type.width,
+ base_ptr, offsets);
+
+ lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 8631efd6c3..995a69c0f4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -313,8 +313,6 @@ lp_build_select(struct lp_build_context *bld,
b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
}
- /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
-
a = LLVMBuildAnd(bld->builder, a, mask, "");
/* This often gets translated to PANDN, but sometimes the NOT is
@@ -339,7 +337,7 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4])
+ const boolean cond[4])
{
const union lp_type type = bld->type;
const unsigned n = type.length;
@@ -376,9 +374,9 @@ lp_build_select_aos(struct lp_build_context *bld,
return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
}
+ else {
#if 0
- else if(0) {
- /* FIXME: Unfortunately select of vectors do not work */
+ /* XXX: Unfortunately select of vectors do not work */
/* Use a select */
LLVMTypeRef elem_type = LLVMInt1Type();
LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
@@ -388,10 +386,9 @@ lp_build_select_aos(struct lp_build_context *bld,
cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
- }
-#endif
- else {
+#else
LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
return lp_build_select(bld, mask, a, b);
+#endif
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index 29b9e1c45b..9099e0fb5b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -66,7 +66,7 @@ LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- boolean cond[4]);
+ const boolean cond[4]);
#endif /* !LP_BLD_LOGIC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
new file mode 100644
index 0000000000..6f565af76d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_SAMPLE_H
+#define LP_BLD_SAMPLE_H
+
+
+#include <llvm-c/Core.h>
+
+struct pipe_texture;
+struct pipe_sampler_state;
+union lp_type;
+
+
+/**
+ * Sampler static state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are embedded in the generated code.
+ */
+struct lp_sampler_static_state
+{
+ /* pipe_texture's state */
+ enum pipe_format format;
+ unsigned target:2;
+ unsigned pot_width:1;
+ unsigned pot_height:1;
+ unsigned pot_depth:1;
+
+ /* pipe_sampler_state's state */
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ unsigned wrap_r:3;
+ unsigned min_img_filter:2;
+ unsigned min_mip_filter:2;
+ unsigned mag_img_filter:2;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned normalized_coords:1;
+ unsigned prefilter:4;
+};
+
+
+/**
+ * Sampler dynamic state.
+ *
+ * These are the bits of state from pipe_texture and pipe_sampler_state that
+ * are computed in runtime.
+ *
+ * There are obtained through callbacks, as we don't want to tie the texture
+ * sampling code generation logic to any particular texture layout or pipe
+ * driver.
+ */
+struct lp_sampler_dynamic_state
+{
+
+ /** Obtain the base texture width. */
+ LLVMValueRef
+ (*width)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ /** Obtain the base texture height. */
+ LLVMValueRef
+ (*height)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*stride)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+ LLVMValueRef
+ (*data_ptr)( struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder,
+ unsigned unit);
+
+};
+
+
+/**
+ * Derive the sampler static state.
+ */
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler);
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ union lp_type fp_type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+
+
+
+#endif /* LP_BLD_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
new file mode 100644
index 0000000000..3ca25b0e76
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -0,0 +1,411 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_format.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width[0]);
+ state->pot_height = util_is_pot(texture->height[0]);
+ state->pot_depth = util_is_pot(texture->depth[0]);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ if(sampler->compare_mode) {
+ state->compare_mode = sampler->compare_mode;
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+
+/**
+ * Keep all information for sampling code generation in a single place.
+ */
+struct lp_build_sample_context
+{
+ LLVMBuilderRef builder;
+
+ const struct lp_sampler_static_state *static_state;
+
+ struct lp_sampler_dynamic_state *dynamic_state;
+
+ const struct util_format_description *format_desc;
+
+ /** Incoming coordinates type and build context */
+ union lp_type coord_type;
+ struct lp_build_context coord_bld;
+
+ /** Integer coordinates */
+ union lp_type int_coord_type;
+ struct lp_build_context int_coord_bld;
+
+ /** Output texels type and build context */
+ union lp_type texel_type;
+ struct lp_build_context texel_bld;
+};
+
+
+static void
+lp_build_sample_texel(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
+
+ if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
+
+ x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(int_coord_bld, x, x_stride);
+ y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+
+ offset = lp_build_add(int_coord_bld, x_offset, y_offset);
+ }
+
+ lp_build_load_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ data_ptr,
+ offset,
+ texel);
+}
+
+
+static LLVMValueRef
+lp_build_sample_wrap(struct lp_build_sample_context *bld,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ boolean is_pot,
+ unsigned wrap_mode)
+{
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if(is_pot)
+ coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
+ else
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord = LLVMBuildURem(bld->builder, coord, length, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
+ coord = lp_build_min(int_coord_bld, coord, length_minus_one);
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ /* FIXME */
+ default:
+ assert(0);
+ }
+
+ return coord;
+}
+
+
+static void
+lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef x;
+ LLVMValueRef y;
+
+ x = lp_build_ifloor(&bld->coord_bld, s);
+ y = lp_build_ifloor(&bld->coord_bld, t);
+
+ x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+}
+
+
+static void
+lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMValueRef half;
+ LLVMValueRef s_ipart;
+ LLVMValueRef t_ipart;
+ LLVMValueRef s_fpart;
+ LLVMValueRef t_fpart;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2][4];
+ unsigned chan;
+
+ half = lp_build_const_scalar(bld->coord_type, 0.5);
+ s = lp_build_sub(&bld->coord_bld, s, half);
+ t = lp_build_sub(&bld->coord_bld, t, half);
+
+ s_ipart = lp_build_floor(&bld->coord_bld, s);
+ t_ipart = lp_build_floor(&bld->coord_bld, t);
+
+ s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
+ t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
+
+ x0 = lp_build_int(&bld->coord_bld, s_ipart);
+ y0 = lp_build_int(&bld->coord_bld, t_ipart);
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+
+ /* TODO: Don't interpolate missing channels */
+ for(chan = 0; chan < 4; ++chan) {
+ texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan]);
+ }
+}
+
+
+static void
+lp_build_sample_compare(struct lp_build_sample_context *bld,
+ LLVMValueRef p,
+ LLVMValueRef *texel)
+{
+ struct lp_build_context *texel_bld = &bld->texel_bld;
+ LLVMValueRef res;
+ unsigned chan;
+
+ if(!bld->static_state->compare_mode)
+ return;
+
+ /* TODO: Compare before swizzling, to avoid redundant computations */
+ res = NULL;
+ for(chan = 0; chan < 4; ++chan) {
+ LLVMValueRef cmp;
+ cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
+ cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
+
+ if(res)
+ res = lp_build_add(texel_bld, res, cmp);
+ else
+ res = cmp;
+ }
+
+ assert(res);
+ res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+
+ /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
+ for(chan = 0; chan < 3; ++chan)
+ texel[chan] = res;
+ texel[3] = texel_bld->one;
+}
+
+
+void
+lp_build_sample_soa(LLVMBuilderRef builder,
+ const struct lp_sampler_static_state *static_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ union lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_build_sample_context bld;
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef stride;
+ LLVMValueRef data_ptr;
+ LLVMValueRef s;
+ LLVMValueRef t;
+ LLVMValueRef p;
+
+ /* Setup our build context */
+ memset(&bld, 0, sizeof bld);
+ bld.builder = builder;
+ bld.static_state = static_state;
+ bld.dynamic_state = dynamic_state;
+ bld.format_desc = util_format_description(static_state->format);
+ bld.coord_type = type;
+ bld.int_coord_type = lp_int_type(type);
+ bld.texel_type = type;
+ lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
+ lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
+
+ /* Get the dynamic state */
+ width = dynamic_state->width(dynamic_state, builder, unit);
+ height = dynamic_state->height(dynamic_state, builder, unit);
+ stride = dynamic_state->stride(dynamic_state, builder, unit);
+ data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
+
+ s = coords[0];
+ t = coords[1];
+ p = coords[2];
+
+ width = lp_build_broadcast_scalar(&bld.int_coord_bld, width);
+ height = lp_build_broadcast_scalar(&bld.int_coord_bld, height);
+ stride = lp_build_broadcast_scalar(&bld.int_coord_bld, stride);
+
+ if(static_state->target == PIPE_TEXTURE_1D)
+ t = bld.coord_bld.zero;
+
+ if(static_state->normalized_coords) {
+ LLVMTypeRef coord_vec_type = lp_build_vec_type(bld.coord_type);
+ LLVMValueRef fp_width = LLVMBuildSIToFP(builder, width, coord_vec_type, "");
+ LLVMValueRef fp_height = LLVMBuildSIToFP(builder, height, coord_vec_type, "");
+ s = lp_build_mul(&bld.coord_bld, s, fp_width);
+ t = lp_build_mul(&bld.coord_bld, t, fp_height);
+ }
+
+ switch (static_state->min_img_filter) {
+ case PIPE_TEX_FILTER_NEAREST:
+ lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ case PIPE_TEX_FILTER_ANISO:
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* FIXME: respect static_state->min_mip_filter */;
+ /* FIXME: respect static_state->mag_img_filter */;
+ /* FIXME: respect static_state->prefilter */;
+
+ lp_build_sample_compare(&bld, p, texel);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.c b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
index 14d2b10df9..3998ac374f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.c
@@ -42,17 +42,30 @@
LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name)
+{
+ LLVMValueRef indices[2];
+ LLVMValueRef member_ptr;
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
+ member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ lp_build_name(member_ptr, "%s.%s_ptr", LLVMGetValueName(ptr), name);
+ return member_ptr;
+}
+
+
+LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
unsigned member,
const char *name)
{
- LLVMValueRef indices[2];
LLVMValueRef member_ptr;
LLVMValueRef res;
- indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
- indices[1] = LLVMConstInt(LLVMInt32Type(), member, 0);
- member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+ member_ptr = lp_build_struct_get_ptr(builder, ptr, member, name);
res = LLVMBuildLoad(builder, member_ptr, "");
lp_build_name(res, "%s.%s", LLVMGetValueName(ptr), name);
return res;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_struct.h b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
index cbefdc9f81..740392f561 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_struct.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_struct.h
@@ -53,6 +53,18 @@
offsetof(_ctype, _cmember))
+/**
+ * Get value pointer to a structure member.
+ */
+LLVMValueRef
+lp_build_struct_get_ptr(LLVMBuilderRef builder,
+ LLVMValueRef ptr,
+ unsigned member,
+ const char *name);
+
+/**
+ * Get the value of a structure member.
+ */
LLVMValueRef
lp_build_struct_get(LLVMBuilderRef builder,
LLVMValueRef ptr,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
index ac7eed9379..f35638be44 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
@@ -161,7 +161,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -192,7 +192,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
@@ -201,11 +201,12 @@ lp_build_swizzle2_aos(struct lp_build_context *bld,
return lp_build_swizzle1_aos(bld, a, swizzle);
if(a == b) {
- swizzle[0] %= 4;
- swizzle[1] %= 4;
- swizzle[2] %= 4;
- swizzle[3] %= 4;
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ unsigned char swizzle1[4];
+ swizzle1[0] = swizzle[0] % 4;
+ swizzle1[1] = swizzle[1] % 4;
+ swizzle1[2] = swizzle[2] % 4;
+ swizzle1[3] = swizzle[3] % 4;
+ return lp_build_swizzle1_aos(bld, a, swizzle1);
}
if(swizzle[0] % 4 == 0 &&
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index d7dd6a8a60..cb0b6707ec 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -73,7 +73,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
/**
@@ -85,7 +85,7 @@ LLVMValueRef
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4]);
+ const unsigned char swizzle[4]);
#endif /* !LP_BLD_SWIZZLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
index 912db24aec..10c251c416 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi.h
@@ -44,14 +44,30 @@ struct lp_build_context;
struct lp_build_mask_context;
-typedef void
-(*lp_emit_fetch_texel_soa_callback)( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel);
+/**
+ * Sampler code generation interface.
+ *
+ * Although texture sampling is a requirement for TGSI translation, it is
+ * a very different problem with several different approaches to it. This
+ * structure establishes an interface for texture sampling code generation, so
+ * that we can easily use different texture sampling strategies.
+ */
+struct lp_build_sampler_soa
+{
+ void
+ (*destroy)( struct lp_build_sampler_soa *sampler );
+
+ void
+ (*emit_fetch_texel)( struct lp_build_sampler_soa *sampler,
+ LLVMBuilderRef builder,
+ union lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel);
+};
+
void
lp_build_tgsi_soa(LLVMBuilderRef builder,
@@ -62,8 +78,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context);
+ struct lp_build_sampler_soa *sampler);
#endif /* LP_BLD_TGSI_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d4d18febec..b106ce2317 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -78,6 +78,11 @@
#define CHAN_Z 2
#define CHAN_W 3
+#define QUAD_TOP_LEFT 0
+#define QUAD_TOP_RIGHT 1
+#define QUAD_BOTTOM_LEFT 2
+#define QUAD_BOTTOM_RIGHT 3
+
struct lp_build_tgsi_soa_context
{
@@ -88,8 +93,7 @@ struct lp_build_tgsi_soa_context
const LLVMValueRef (*inputs)[NUM_CHANNELS];
LLVMValueRef (*outputs)[NUM_CHANNELS];
- lp_emit_fetch_texel_soa_callback emit_fetch_texel;
- void *emit_fetch_texel_context;
+ struct lp_build_sampler_soa *sampler;
LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS];
LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS];
@@ -98,6 +102,51 @@ struct lp_build_tgsi_soa_context
};
+static const unsigned char
+swizzle_left[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_LEFT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT
+};
+
+static const unsigned char
+swizzle_right[4] = {
+ QUAD_TOP_RIGHT, QUAD_TOP_RIGHT,
+ QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT
+};
+
+static const unsigned char
+swizzle_top[4] = {
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT,
+ QUAD_TOP_LEFT, QUAD_TOP_RIGHT
+};
+
+static const unsigned char
+swizzle_bottom[4] = {
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT,
+ QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT
+};
+
+
+static LLVMValueRef
+emit_ddx(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left);
+ LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right);
+ return lp_build_sub(&bld->base, src_right, src_left);
+}
+
+
+static LLVMValueRef
+emit_ddy(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef src)
+{
+ LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top);
+ LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom);
+ return lp_build_sub(&bld->base, src_top, src_bottom);
+}
+
+
/**
* Register fetch.
*/
@@ -168,6 +217,7 @@ emit_fetch(
break;
case TGSI_UTIL_SIGN_SET:
+ /* TODO: Use bitwese OR for floating point */
res = lp_build_abs( &bld->base, res );
res = LLVMBuildNeg( bld->base.builder, res, "" );
break;
@@ -185,6 +235,36 @@ emit_fetch(
/**
+ * Register fetch with derivatives.
+ */
+static void
+emit_fetch_deriv(
+ struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ unsigned index,
+ const unsigned chan_index,
+ LLVMValueRef *res,
+ LLVMValueRef *ddx,
+ LLVMValueRef *ddy)
+{
+ LLVMValueRef src;
+
+ src = emit_fetch(bld, inst, index, chan_index);
+
+ if(res)
+ *res = src;
+
+ /* TODO: use interpolation coeffs for inputs */
+
+ if(ddx)
+ *ddx = emit_ddx(bld, src);
+
+ if(ddy)
+ *ddy = emit_ddy(bld, src);
+}
+
+
+/**
* Register store.
*/
static void
@@ -239,17 +319,18 @@ emit_store(
* High-level instruction translators.
*/
+
static void
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
boolean apply_lodbias,
- boolean projected)
+ boolean projected,
+ LLVMValueRef *texel)
{
const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
LLVMValueRef lodbias;
LLVMValueRef oow;
LLVMValueRef coords[3];
- LLVMValueRef texel[4];
unsigned num_coords;
unsigned i;
@@ -289,12 +370,11 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
coords[i] = lp_build_mul(&bld->base, coords[i], oow);
}
- bld->emit_fetch_texel(bld->base.builder, bld->emit_fetch_texel_context,
- unit, num_coords, coords, lodbias, texel);
-
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, i ) {
- emit_store( bld, inst, 0, i, texel[i] );
- }
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->base.builder,
+ bld->base.type,
+ unit, num_coords, coords, lodbias,
+ texel);
}
@@ -347,14 +427,6 @@ emit_kil(
}
-static void
-emit_kilp(
- struct lp_build_tgsi_soa_context *bld )
-{
- /* XXX todo / fix me */
-}
-
-
/**
* Check if inst src/dest regs use indirect addressing into temporary
* register file.
@@ -382,25 +454,35 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
static int
emit_instruction(
struct lp_build_tgsi_soa_context *bld,
- struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ const struct tgsi_opcode_info *info)
{
unsigned chan_index;
LLVMValueRef src0, src1, src2;
LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- LLVMValueRef dst0;
+ LLVMValueRef res;
+ LLVMValueRef dst0[NUM_CHANNELS];
/* we can't handle indirect addressing into temp register file yet */
if (indirect_temp_reference(inst))
return FALSE;
+ assert(info->num_dst <= 1);
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.undef;
+ }
+ }
+
switch (inst->Instruction.Opcode) {
#if 0
case TGSI_OPCODE_ARL:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_flr(bld, 0, 0);
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
@@ -408,19 +490,17 @@ emit_instruction(
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
break;
case TGSI_OPCODE_LIT:
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) {
- emit_store( bld, inst, 0, CHAN_X, bld->base.one);
+ dst0[CHAN_X] = bld->base.one;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) {
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_max( &bld->base, src0, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Y, dst0);
+ dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
/* XMM[1] = SrcReg[0].yyyy */
@@ -432,20 +512,19 @@ emit_instruction(
tmp1 = lp_build_pow( &bld->base, tmp1, tmp2);
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero);
- dst0 = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
- emit_store( bld, inst, 0, CHAN_Z, dst0);
+ dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero);
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) {
- emit_store( bld, inst, 0, CHAN_W, bld->base.one);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_RCP:
/* TGSI_OPCODE_RECIP */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
- dst0 = lp_build_rcp(&bld->base, src0);
+ res = lp_build_rcp(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -453,9 +532,9 @@ emit_instruction(
/* TGSI_OPCODE_RECIPSQRT */
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src0 = lp_build_abs(&bld->base, src0);
- dst0 = lp_build_rsqrt(&bld->base, src0);
+ res = lp_build_rsqrt(&bld->base, src0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -479,16 +558,15 @@ emit_instruction(
lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2);
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ))
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = tmp1;
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -514,20 +592,18 @@ emit_instruction(
/* dst.x = floor(lg2(abs(src.x))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ))
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = tmp0;
/* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) {
- tmp1 = lp_build_div( &bld->base, src0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp1);
+ dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1);
}
/* dst.z = lg2(abs(src.x)) */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ))
- emit_store( bld, inst, 0, CHAN_Z, tmp2);
+ dst0[CHAN_Z] = tmp2;
}
/* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
@@ -535,8 +611,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_mul(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_mul(&bld->base, src0, src1);
}
break;
@@ -544,8 +619,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_add(&bld->base, src0, src1);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_add(&bld->base, src0, src1);
}
break;
@@ -563,7 +637,7 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -585,28 +659,24 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2);
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DST:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = bld->base.one;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_Y );
tmp1 = emit_fetch( bld, inst, 1, CHAN_Y );
- tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1);
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = emit_fetch( bld, inst, 0, CHAN_Z );
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = emit_fetch( bld, inst, 1, CHAN_W );
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W );
}
break;
@@ -614,8 +684,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_min( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_min( &bld->base, src0, src1 );
}
break;
@@ -623,8 +692,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
- dst0 = lp_build_max( &bld->base, src0, src1 );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_max( &bld->base, src0, src1 );
}
break;
@@ -634,8 +702,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -645,8 +712,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -658,7 +724,7 @@ emit_instruction(
tmp2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_mul( &bld->base, tmp0, tmp1);
tmp0 = lp_build_add( &bld->base, tmp0, tmp2);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -666,8 +732,7 @@ emit_instruction(
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
tmp1 = emit_fetch( bld, inst, 1, chan_index );
- tmp0 = lp_build_sub( &bld->base, tmp0, tmp1);
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1);
}
break;
@@ -678,13 +743,19 @@ emit_instruction(
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_sub( &bld->base, src1, src2 );
tmp0 = lp_build_mul( &bld->base, src0, tmp0 );
- dst0 = lp_build_add( &bld->base, tmp0, src2 );
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 );
}
break;
case TGSI_OPCODE_CND:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp1 = lp_build_const_scalar(bld->base.type, 0.5);
+ tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 );
+ }
break;
case TGSI_OPCODE_DP2A:
@@ -698,45 +769,49 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
-#if 0
case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_frc( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ src0 = emit_fetch( bld, inst, 0, chan_index );
+ tmp0 = lp_build_floor(&bld->base, src0);
+ tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_CLAMP:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ src1 = emit_fetch( bld, inst, 1, chan_index );
+ src2 = emit_fetch( bld, inst, 2, chan_index );
+ tmp0 = lp_build_max(&bld->base, tmp0, src1);
+ tmp0 = lp_build_min(&bld->base, tmp0, src2);
+ dst0[chan_index] = tmp0;
+ }
break;
case TGSI_OPCODE_FLR:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_flr( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_floor(&bld->base, tmp0);
}
break;
case TGSI_OPCODE_ROUND:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_rnd( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_round(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_EX2: {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_exp2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
}
@@ -745,16 +820,16 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_log2( &bld->base, tmp0);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_POW:
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src1 = emit_fetch( bld, inst, 1, CHAN_X );
- dst0 = lp_build_pow( &bld->base, src0, src1 );
+ res = lp_build_pow( &bld->base, src0, src1 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, dst0 );
+ dst0[chan_index] = res;
}
break;
@@ -775,7 +850,7 @@ emit_instruction(
tmp5 = tmp3;
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp2 = lp_build_sub( &bld->base, tmp2, tmp5);
- emit_store( bld, inst, 0, CHAN_X, tmp2);
+ dst0[CHAN_X] = tmp2;
}
if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) {
@@ -786,31 +861,30 @@ emit_instruction(
tmp3 = lp_build_mul( &bld->base, tmp3, tmp2);
tmp1 = lp_build_mul( &bld->base, tmp1, tmp5);
tmp3 = lp_build_sub( &bld->base, tmp3, tmp1);
- emit_store( bld, inst, 0, CHAN_Y, tmp3);
+ dst0[CHAN_Y] = tmp3;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
tmp5 = lp_build_mul( &bld->base, tmp5, tmp4);
tmp0 = lp_build_mul( &bld->base, tmp0, tmp2);
tmp5 = lp_build_sub( &bld->base, tmp5, tmp0);
- emit_store( bld, inst, 0, CHAN_Z, tmp5);
+ dst0[CHAN_Z] = tmp5;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_ABS:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- tmp0 = lp_build_abs( &bld->base, tmp0 ) ;
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_abs( &bld->base, tmp0 );
}
break;
case TGSI_OPCODE_RCC:
+ /* deprecated? */
+ assert(0);
return 0;
- break;
case TGSI_OPCODE_DPH:
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
@@ -827,7 +901,7 @@ emit_instruction(
tmp1 = emit_fetch( bld, inst, 1, CHAN_W );
tmp0 = lp_build_add( &bld->base, tmp0, tmp1);
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -835,25 +909,27 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_cos( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
case TGSI_OPCODE_DDX:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL);
+ }
break;
case TGSI_OPCODE_DDY:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]);
+ }
break;
-#if 0
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld );
- return 0; /* XXX fix me */
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_KIL:
/* conditional kill */
@@ -885,13 +961,14 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_SFL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
break;
case TGSI_OPCODE_SGT:
@@ -899,8 +976,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -908,7 +984,7 @@ emit_instruction(
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
tmp0 = lp_build_sin( &bld->base, tmp0 );
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
@@ -917,8 +993,7 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
@@ -927,85 +1002,99 @@ emit_instruction(
src0 = emit_fetch( bld, inst, 0, chan_index );
src1 = emit_fetch( bld, inst, 1, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 );
- dst0 = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero );
}
break;
case TGSI_OPCODE_STR:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.one;
+ }
break;
case TGSI_OPCODE_TEX:
- emit_tex( bld, inst, FALSE, FALSE );
+ emit_tex( bld, inst, FALSE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXD:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_UP2H:
+ /* deprecated */
+ assert (0);
return 0;
break;
case TGSI_OPCODE_UP2US:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4B:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_UP4UB:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_X2D:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ARA:
+ /* deprecated */
+ assert(0);
return 0;
break;
#if 0
case TGSI_OPCODE_ARR:
+ /* FIXME */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
emit_rnd( bld, 0, 0 );
emit_f2it( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = tmp0;
}
break;
#endif
case TGSI_OPCODE_BRA:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CAL:
+ /* FIXME */
return 0;
break;
-#if 0
case TGSI_OPCODE_RET:
- emit_ret( bld );
+ /* FIXME */
+ return 0;
break;
-#endif
case TGSI_OPCODE_END:
break;
-#if 0
case TGSI_OPCODE_SSG:
/* TGSI_OPCODE_SGN */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_sgn( bld, 0, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 );
}
break;
-#endif
case TGSI_OPCODE_CMP:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
@@ -1013,34 +1102,29 @@ emit_instruction(
src1 = emit_fetch( bld, inst, 1, chan_index );
src2 = emit_fetch( bld, inst, 2, chan_index );
tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero );
- dst0 = lp_build_select( &bld->base, tmp0, src1, src2);
- emit_store( bld, inst, 0, chan_index, dst0);
+ dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2);
}
break;
case TGSI_OPCODE_SCS:
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_cos( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_X, tmp0);
+ dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) {
tmp0 = emit_fetch( bld, inst, 0, CHAN_X );
- tmp0 = lp_build_sin( &bld->base, tmp0 );
- emit_store( bld, inst, 0, CHAN_Y, tmp0);
+ dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 );
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) {
- tmp0 = bld->base.zero;
- emit_store( bld, inst, 0, CHAN_Z, tmp0);
+ dst0[CHAN_Z] = bld->base.zero;
}
IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) {
- tmp0 = bld->base.one;
- emit_store( bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
break;
case TGSI_OPCODE_TXB:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_NRM:
@@ -1099,38 +1183,35 @@ emit_instruction(
/* dst.x = xmm1 * src.x */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) {
- tmp4 = lp_build_mul( &bld->base, tmp4, tmp1);
- emit_store(bld, inst, 0, CHAN_X, tmp4);
+ dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1);
}
/* dst.y = xmm1 * src.y */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) {
- tmp5 = lp_build_mul( &bld->base, tmp5, tmp1);
- emit_store(bld, inst, 0, CHAN_Y, tmp5);
+ dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1);
}
/* dst.z = xmm1 * src.z */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) {
- tmp6 = lp_build_mul( &bld->base, tmp6, tmp1);
- emit_store(bld, inst, 0, CHAN_Z, tmp6);
+ dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1);
}
/* dst.w = xmm1 * src.w */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) {
- tmp7 = lp_build_mul( &bld->base, tmp7, tmp1);
- emit_store(bld, inst, 0, CHAN_W, tmp7);
+ dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1);
}
}
- /* dst0.w = 1.0 */
+ /* dst.w = 1.0 */
if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) {
- tmp0 = bld->base.one;
- emit_store(bld, inst, 0, CHAN_W, tmp0);
+ dst0[CHAN_W] = bld->base.one;
}
}
break;
case TGSI_OPCODE_DIV:
+ /* deprecated */
+ assert( 0 );
return 0;
break;
@@ -1143,118 +1224,157 @@ emit_instruction(
tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */
tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store( bld, inst, 0, chan_index, tmp0); /* dest[ch] = xmm0 */
+ dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */
}
break;
case TGSI_OPCODE_TXL:
- emit_tex( bld, inst, TRUE, FALSE );
+ emit_tex( bld, inst, TRUE, FALSE, dst0 );
break;
case TGSI_OPCODE_TXP:
- emit_tex( bld, inst, FALSE, TRUE );
+ emit_tex( bld, inst, FALSE, TRUE, dst0 );
break;
case TGSI_OPCODE_BRK:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_IF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_BGNFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_REP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ELSE:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDIF:
+ /* FIXME */
return 0;
break;
case TGSI_OPCODE_ENDFOR:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_ENDREP:
+ /* deprecated */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_PUSHA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_POPA:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CEIL:
- return 0;
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ tmp0 = emit_fetch( bld, inst, 0, chan_index );
+ dst0[chan_index] = lp_build_ceil(&bld->base, tmp0);
+ }
break;
case TGSI_OPCODE_I2F:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_NOT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
-#if 0
case TGSI_OPCODE_TRUNC:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
tmp0 = emit_fetch( bld, inst, 0, chan_index );
- emit_f2it( bld, 0 );
- emit_i2f( bld, 0 );
- emit_store( bld, inst, 0, chan_index, tmp0);
+ dst0[chan_index] = lp_build_trunc(&bld->base, tmp0);
}
break;
-#endif
case TGSI_OPCODE_SHL:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SHR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_AND:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_OR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_MOD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_XOR:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_SAD:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXF:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_TXQ:
+ /* deprecated? */
+ assert(0);
return 0;
break;
case TGSI_OPCODE_CONT:
+ /* deprecated? */
+ assert(0);
return 0;
break;
@@ -1266,10 +1386,28 @@ emit_instruction(
return 0;
break;
+ case TGSI_OPCODE_NOISE1:
+ case TGSI_OPCODE_NOISE2:
+ case TGSI_OPCODE_NOISE3:
+ case TGSI_OPCODE_NOISE4:
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ dst0[chan_index] = bld->base.zero;
+ }
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
default:
return 0;
}
+ if(info->num_dst) {
+ FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+ emit_store( bld, inst, 0, chan_index, dst0[chan_index]);
+ }
+ }
+
return 1;
}
@@ -1283,8 +1421,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
const LLVMValueRef *pos,
const LLVMValueRef (*inputs)[NUM_CHANNELS],
LLVMValueRef (*outputs)[NUM_CHANNELS],
- lp_emit_fetch_texel_soa_callback emit_fetch_texel,
- void *emit_fetch_texel_context)
+ struct lp_build_sampler_soa *sampler)
{
struct lp_build_tgsi_soa_context bld;
struct tgsi_parse_context parse;
@@ -1299,8 +1436,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.inputs = inputs;
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
- bld.emit_fetch_texel = emit_fetch_texel;
- bld.emit_fetch_texel_context = emit_fetch_texel_context;
+ bld.sampler = sampler;
tgsi_parse_init( &parse, tokens );
@@ -1309,16 +1445,18 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
switch( parse.FullToken.Token.Type ) {
case TGSI_TOKEN_TYPE_DECLARATION:
- /* Input already interpolated */
+ /* Inputs already interpolated */
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (!emit_instruction( &bld, &parse.FullToken.FullInstruction )) {
+ {
unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
- _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
- info ? info->mnemonic : "<invalid>");
- }
+ if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info ))
+ _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n",
+ info ? info->mnemonic : "<invalid>");
+ }
+
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 8e0026fd97..577644b7ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -157,6 +157,17 @@ lp_build_int_vec_type(union lp_type type)
}
+union lp_type
+lp_int_type(union lp_type type)
+{
+ union lp_type int_type;
+ int_type.value = 0;
+ int_type.width = type.width;
+ int_type.length = type.length;
+ return int_type;
+}
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 3ce566be64..9933e0b45c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -165,6 +165,10 @@ LLVMTypeRef
lp_build_int_vec_type(union lp_type type);
+union lp_type
+lp_int_type(union lp_type type);
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index 580cca5b46..bdcff94b9b 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -67,6 +67,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
util_pack_color(rgba, ps->format, &cv);
lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
}
+ llvmpipe->dirty_render_cache = TRUE;
}
if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 233d1df0e1..a4b2bd8c2a 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -141,8 +141,6 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
return PIPE_REFERENCED_FOR_WRITE;
}
- /* FIXME: we also need to do the same for the texture cache */
-
return PIPE_UNREFERENCED;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index d288460a1b..9465f763d5 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -44,15 +44,47 @@
static void
lp_jit_init_globals(struct llvmpipe_screen *screen)
{
- /* struct lp_jit_context */
+ LLVMTypeRef texture_type;
+
+ /* struct lp_jit_texture */
{
LLVMTypeRef elem_types[4];
+
+ elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, stride,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+ screen->target, texture_type);
+
+ LLVMAddTypeName(screen->module, "texture", texture_type);
+ }
+
+ /* struct lp_jit_context */
+ {
+ LLVMTypeRef elem_types[5];
LLVMTypeRef context_type;
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
elem_types[2] = LLVMFloatType(); /* alpha_ref_value */
elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0); /* blend_color */
+ elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -64,6 +96,9 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
screen->target, context_type, 2);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type, 3);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+ screen->target, context_type,
+ LP_JIT_CONTEXT_TEXTURES_INDEX);
LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
screen->target, context_type);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index a7fb60f9f5..58f716ede2 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -38,11 +38,31 @@
#include "lp_bld_struct.h"
+#include "pipe/p_state.h"
+
struct tgsi_sampler;
struct llvmpipe_screen;
+struct lp_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ const void *data;
+};
+
+
+enum {
+ LP_JIT_TEXTURE_WIDTH = 0,
+ LP_JIT_TEXTURE_HEIGHT,
+ LP_JIT_TEXTURE_STRIDE,
+ LP_JIT_TEXTURE_DATA
+};
+
+
+
/**
* This structure is passed directly to the generated fragment shader.
*
@@ -60,11 +80,12 @@ struct lp_jit_context
struct tgsi_sampler **samplers;
- /* TODO: alpha reference value */
float alpha_ref_value;
- /* TODO: blend constant color */
+ /* FIXME: store (also?) in floats */
uint8_t *blend_color;
+
+ struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
};
@@ -80,6 +101,11 @@ struct lp_jit_context
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+
+#define lp_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+
typedef void
(*lp_jit_frag_func)(struct lp_jit_context *context,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 125035771e..0ce1a37bd4 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,8 +27,6 @@
#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -196,8 +194,7 @@ static void
llvmpipe_destroy_screen( struct pipe_screen *_screen )
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
-
- struct pipe_winsys *winsys = _screen->winsys;
+ struct llvmpipe_winsys *winsys = screen->winsys;
lp_jit_screen_cleanup(screen);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index d145f6d6bb..2d2fc19a65 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -44,6 +44,7 @@
#include "pipe/p_thread.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "lp_bld_debug.h"
#include "lp_tile_cache.h"
#include "lp_tile_soa.h"
@@ -162,12 +163,12 @@ shade_quads(struct llvmpipe_context *llvmpipe,
else
depth = NULL;
- /* TODO: blend color */
+ /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
+ assert(lp_check_alignment(mask, 16));
- assert((((uintptr_t)mask) & 0xf) == 0);
- assert((((uintptr_t)depth) & 0xf) == 0);
- assert((((uintptr_t)color) & 0xf) == 0);
- assert((((uintptr_t)llvmpipe->jit_context.blend_color) & 0xf) == 0);
+ assert(lp_check_alignment(depth, 16));
+ assert(lp_check_alignment(color, 16));
+ assert(lp_check_alignment(llvmpipe->jit_context.blend_color, 16));
/* run shader */
fs->current->jit_function( &llvmpipe->jit_context,
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index fb10329887..7b26ce61a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -36,6 +36,7 @@
#include "pipe/p_state.h"
#include "tgsi/tgsi_scan.h"
#include "lp_jit.h"
+#include "lp_bld_sample.h" /* for struct lp_sampler_static_state */
#define LP_NEW_VIEWPORT 0x1
@@ -57,16 +58,20 @@
struct tgsi_sampler;
struct vertex_info;
-
+struct pipe_context;
+struct llvmpipe_context;
struct lp_fragment_shader;
struct lp_fragment_shader_variant_key
{
+ enum pipe_format zsbuf_format;
struct pipe_depth_state depth;
struct pipe_alpha_state alpha;
struct pipe_blend_state blend;
+
+ struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
};
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 6fbb057937..e87976b9f3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -250,7 +250,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_BLEND |
- LP_NEW_DEPTH_STENCIL_ALPHA))
+ LP_NEW_DEPTH_STENCIL_ALPHA |
+ LP_NEW_SAMPLER |
+ LP_NEW_TEXTURE))
llvmpipe_update_fs( llvmpipe );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 94170bd716..618cf1ffb8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -85,6 +85,7 @@
#include "lp_context.h"
#include "lp_state.h"
#include "lp_quad.h"
+#include "lp_tex_sample.h"
static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -130,9 +131,8 @@ generate_pos0(LLVMBuilderRef builder,
* Generate the depth test.
*/
static void
-generate_depth(struct llvmpipe_context *lp,
- LLVMBuilderRef builder,
- const struct pipe_depth_state *state,
+generate_depth(LLVMBuilderRef builder,
+ const struct lp_fragment_shader_variant_key *key,
union lp_type src_type,
struct lp_build_mask_context *mask,
LLVMValueRef src,
@@ -141,10 +141,10 @@ generate_depth(struct llvmpipe_context *lp,
const struct util_format_description *format_desc;
union lp_type dst_type;
- if(!lp->framebuffer.zsbuf)
+ if(!key->depth.enabled)
return;
- format_desc = util_format_description(lp->framebuffer.zsbuf->format);
+ format_desc = util_format_description(key->zsbuf_format);
assert(format_desc);
/* Pick the depth type. */
@@ -164,7 +164,7 @@ generate_depth(struct llvmpipe_context *lp,
#endif
lp_build_depth_test(builder,
- state,
+ &key->depth,
dst_type,
format_desc,
mask,
@@ -173,107 +173,6 @@ generate_depth(struct llvmpipe_context *lp,
}
-struct build_fetch_texel_context
-{
- LLVMValueRef context_ptr;
-
- LLVMValueRef samplers_ptr;
-
- /** Coords/texels store */
- LLVMValueRef store_ptr;
-};
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
- uint32_t unit,
- float *store )
-{
- struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
- uint j;
-
- debug_printf("%s sampler: %p (%p) store: %p\n",
- __FUNCTION__,
- sampler, *sampler,
- store );
-
- debug_printf("lodbias %f\n", store[12]);
-
- for (j = 0; j < 4; j++)
- debug_printf("sample %d texcoord %f %f\n",
- j,
- store[0+j],
- store[4+j]);
-#endif
-
- {
- float rgba[NUM_CHANNELS][QUAD_SIZE];
- sampler->get_samples(sampler,
- &store[0],
- &store[4],
- &store[8],
- 0.0f, /*store[12], lodbias */
- rgba);
- memcpy(store, rgba, sizeof rgba);
- }
-
-#if 0
- for (j = 0; j < 4; j++)
- debug_printf("sample %d result %f %f %f %f\n",
- j,
- store[0+j],
- store[4+j],
- store[8+j],
- store[12+j]);
-#endif
-}
-
-
-static void
-emit_fetch_texel( LLVMBuilderRef builder,
- void *context,
- unsigned unit,
- unsigned num_coords,
- const LLVMValueRef *coords,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
-{
- struct build_fetch_texel_context *bld = context;
- LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
- LLVMValueRef args[3];
- unsigned i;
-
- if(!bld->samplers_ptr)
- bld->samplers_ptr = lp_jit_context_samplers(builder, bld->context_ptr);
-
- if(!bld->store_ptr)
- bld->store_ptr = LLVMBuildArrayAlloca(builder,
- vec_type,
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "texel_store");
-
- for (i = 0; i < num_coords; i++) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef coord_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- LLVMBuildStore(builder, coords[i], coord_ptr);
- }
-
- args[0] = bld->samplers_ptr;
- args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
- args[2] = bld->store_ptr;
-
- lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
- for (i = 0; i < NUM_CHANNELS; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef texel_ptr = LLVMBuildGEP(builder, bld->store_ptr, &index, 1, "");
- texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
- }
-}
-
-
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
*/
@@ -286,7 +185,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef context_ptr,
unsigned i,
const struct lp_build_interp_soa_context *interp,
- struct build_fetch_texel_context *sampler,
+ struct lp_build_sampler_soa *sampler,
LLVMValueRef *pmask,
LLVMValueRef *color,
LLVMValueRef depth_ptr)
@@ -298,6 +197,7 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef consts_ptr;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef z = interp->pos[2];
+ struct lp_build_flow_context *flow;
struct lp_build_mask_context mask;
boolean early_depth_test;
unsigned attrib;
@@ -309,25 +209,35 @@ generate_fs(struct llvmpipe_context *lp,
consts_ptr = lp_jit_context_constants(builder, context_ptr);
- lp_build_mask_begin(&mask, builder, type, *pmask);
+ flow = lp_build_flow_create(builder);
+
+ memset(outputs, 0, sizeof outputs);
+
+ lp_build_flow_scope_begin(flow);
+
+ /* Declare the color and z variables */
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ color[chan] = LLVMGetUndef(vec_type);
+ lp_build_flow_scope_declare(flow, &color[chan]);
+ }
+ lp_build_flow_scope_declare(flow, &z);
+
+ lp_build_mask_begin(&mask, flow, type, *pmask);
early_depth_test =
- lp->depth_stencil->depth.enabled &&
- lp->framebuffer.zsbuf &&
- !lp->depth_stencil->alpha.enabled &&
- !lp->fs->info.uses_kill &&
- !lp->fs->info.writes_z;
+ key->depth.enabled &&
+ !key->alpha.enabled &&
+ !shader->info.uses_kill &&
+ !shader->info.writes_z;
if(early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
- memset(outputs, 0, sizeof outputs);
-
lp_build_tgsi_soa(builder, tokens, type, &mask,
consts_ptr, interp->pos, interp->inputs,
- outputs, emit_fetch_texel, sampler);
+ outputs, sampler);
for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
@@ -368,12 +278,16 @@ generate_fs(struct llvmpipe_context *lp,
}
if(!early_depth_test)
- generate_depth(lp, builder, &key->depth,
+ generate_depth(builder, key,
type, &mask,
z, depth_ptr);
lp_build_mask_end(&mask);
+ lp_build_flow_scope_end(flow);
+
+ lp_build_flow_destroy(flow);
+
*pmask = mask.value;
}
@@ -392,6 +306,8 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef dst_ptr)
{
struct lp_build_context bld;
+ struct lp_build_flow_context *flow;
+ struct lp_build_mask_context mask_ctx;
LLVMTypeRef vec_type;
LLVMTypeRef int_vec_type;
LLVMValueRef const_ptr;
@@ -400,11 +316,14 @@ generate_blend(const struct pipe_blend_state *blend,
LLVMValueRef res[4];
unsigned chan;
+ lp_build_context_init(&bld, builder, type);
+
+ flow = lp_build_flow_create(builder);
+ lp_build_mask_begin(&mask_ctx, flow, type, mask);
+
vec_type = lp_build_vec_type(type);
int_vec_type = lp_build_int_vec_type(type);
- lp_build_context_init(&bld, builder, type);
-
const_ptr = lp_jit_context_blend_color(builder, context_ptr);
const_ptr = LLVMBuildBitCast(builder, const_ptr,
LLVMPointerType(vec_type, 0), "");
@@ -422,11 +341,16 @@ generate_blend(const struct pipe_blend_state *blend,
lp_build_blend_soa(builder, blend, type, src, dst, con, res);
for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
- lp_build_name(res[chan], "res.%c", "rgba"[chan]);
- res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
- LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ if(blend->colormask & (1 << chan)) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+ res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+ LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ }
}
+
+ lp_build_mask_end(&mask_ctx);
+ lp_build_flow_destroy(flow);
}
@@ -462,7 +386,7 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMBuilderRef builder;
LLVMValueRef x0;
LLVMValueRef y0;
- struct build_fetch_texel_context sampler;
+ struct lp_build_sampler_soa *sampler;
struct lp_build_interp_soa_context interp;
LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
@@ -586,8 +510,13 @@ generate_fragment(struct llvmpipe_context *lp,
a0_ptr, dadx_ptr, dady_ptr,
x0, y0, 2, 0);
- memset(&sampler, 0, sizeof sampler);
- sampler.context_ptr = context_ptr;
+#if 0
+ /* C texture sampling */
+ sampler = lp_c_sampler_soa_create(context_ptr);
+#else
+ /* code generated texture sampling */
+ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+#endif
for(i = 0; i < num_fs; ++i) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
@@ -606,7 +535,7 @@ generate_fragment(struct llvmpipe_context *lp,
context_ptr,
i,
&interp,
- &sampler,
+ sampler,
&fs_mask[i],
out_color,
depth_ptr_i);
@@ -615,6 +544,8 @@ generate_fragment(struct llvmpipe_context *lp,
fs_out_color[chan][i] = out_color[chan];
}
+ sampler->destroy(sampler);
+
/*
* Convert the fs's output color and mask to fit to the blending type.
*/
@@ -765,18 +696,45 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
*/
static void
make_variant_key(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant_key *key)
{
+ unsigned i;
+
memset(key, 0, sizeof *key);
- memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ if(lp->framebuffer.zsbuf &&
+ lp->depth_stencil->depth.enabled) {
+ key->zsbuf_format = lp->framebuffer.zsbuf->format;
+ memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ }
key->alpha.enabled = lp->depth_stencil->alpha.enabled;
if(key->alpha.enabled)
key->alpha.func = lp->depth_stencil->alpha.func;
/* alpha.ref_value is passed in jit_context */
- memcpy(&key->blend, lp->blend, sizeof key->blend);
+ if(lp->framebuffer.cbufs[0]) {
+ const struct util_format_description *format_desc;
+ unsigned chan;
+
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
+
+ format_desc = util_format_description(lp->framebuffer.cbufs[0]->format);
+ assert(format_desc->layout == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->layout == UTIL_FORMAT_COLORSPACE_SRGB);
+
+ /* mask out color channels not present in the color buffer */
+ for(chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ if(swizzle > 4)
+ key->blend.colormask &= ~(1 << chan);
+ }
+ }
+
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+ lp_sampler_static_state(&key->sampler[i], lp->texture[i], lp->sampler[i]);
}
@@ -787,7 +745,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant;
- make_variant_key(lp, &key);
+ make_variant_key(lp, shader, &key);
variant = shader->variants;
while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 4fef541b1e..c69d90c723 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -98,6 +98,16 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
pipe_texture_reference(&llvmpipe->texture[i], tex);
lp_tex_tile_cache_set_texture(llvmpipe->tex_cache[i], tex);
+
+ if(tex) {
+ struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
+ struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
+ jit_tex->width = tex->width[0];
+ jit_tex->height = tex->height[0];
+ jit_tex->stride = lp_tex->stride[0];
+ if(!lp_tex->dt)
+ jit_tex->data = lp_tex->data;
+ }
}
llvmpipe->num_textures = num;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 1d192355ee..d8455e5649 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -119,7 +119,7 @@ add_load_rgba_test(LLVMModuleRef module,
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
- rgba = lp_build_load_rgba(builder, format, ptr);
+ rgba = lp_build_load_rgba_aos(builder, format, ptr);
LLVMBuildStore(builder, rgba, rgba_ptr);
lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
@@ -160,7 +160,7 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba(builder, format, ptr, rgba);
+ lp_build_store_rgba_aos(builder, format, ptr, rgba);
LLVMBuildRetVoid(builder);
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 23a94b5b0d..773e848242 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -154,7 +154,7 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
if (lpt->timestamp != tc->timestamp) {
/* texture was modified, invalidate all cached tiles */
uint i;
- _debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
+ debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
for (i = 0; i < NUM_ENTRIES; i++) {
tc->entries[i].addr.bits.invalid = 1;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 628ec3f1ef..9ad1bde956 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -29,10 +29,13 @@
#define LP_TEX_SAMPLE_H
+#include <llvm-c/Core.h>
+
#include "tgsi/tgsi_exec.h"
struct llvmpipe_tex_tile_cache;
+struct lp_sampler_static_state;
/**
@@ -75,4 +78,24 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
float rgba[NUM_CHANNELS][QUAD_SIZE]);
+/**
+ * Texture sampling code generator that just calls lp_get_samples C function
+ * for the actual sampling computation.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr);
+
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+ LLVMValueRef context_ptr);
+
+
#endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 94eb6dad5a..9a876f404d 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1578,3 +1578,136 @@ out:
tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
}
+
+void PIPE_CDECL
+lp_fetch_texel_soa( struct tgsi_sampler **samplers,
+ uint32_t unit,
+ float *store )
+{
+ struct tgsi_sampler *sampler = samplers[unit];
+
+#if 0
+ uint j;
+
+ debug_printf("%s sampler: %p (%p) store: %p\n",
+ __FUNCTION__,
+ sampler, *sampler,
+ store );
+
+ debug_printf("lodbias %f\n", store[12]);
+
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d texcoord %f %f\n",
+ j,
+ store[0+j],
+ store[4+j]);
+#endif
+
+ {
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+ sampler->get_samples(sampler,
+ &store[0],
+ &store[4],
+ &store[8],
+ 0.0f, /*store[12], lodbias */
+ rgba);
+ memcpy(store, rgba, sizeof rgba);
+ }
+
+#if 0
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d result %f %f %f %f\n",
+ j,
+ store[0+j],
+ store[4+j],
+ store[8+j],
+ store[12+j]);
+#endif
+}
+
+
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_tgsi.h"
+
+
+struct lp_c_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ LLVMValueRef context_ptr;
+
+ LLVMValueRef samplers_ptr;
+
+ /** Coords/texels store */
+ LLVMValueRef store_ptr;
+};
+
+
+static void
+lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
+ LLVMBuilderRef builder,
+ union lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
+ LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
+ LLVMValueRef args[3];
+ unsigned i;
+
+ if(!sampler->samplers_ptr)
+ sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
+
+ if(!sampler->store_ptr)
+ sampler->store_ptr = LLVMBuildArrayAlloca(builder,
+ vec_type,
+ LLVMConstInt(LLVMInt32Type(), 4, 0),
+ "texel_store");
+
+ for (i = 0; i < num_coords; i++) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ LLVMBuildStore(builder, coords[i], coord_ptr);
+ }
+
+ args[0] = sampler->samplers_ptr;
+ args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ args[2] = sampler->store_ptr;
+
+ lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
+
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
+ texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
+ }
+}
+
+
+struct lp_build_sampler_soa *
+lp_c_sampler_soa_create(LLVMValueRef context_ptr)
+{
+ struct lp_c_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_c_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_c_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
+ sampler->context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
new file mode 100644
index 0000000000..7d31705d01
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_llvm.c
@@ -0,0 +1,196 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_type.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_sample.h"
+#include "lp_bld_tgsi.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in lp_jit_context
+ * and lp_jit_texture and the sampler code generator. It provides the
+ * texture layout information required by the texture sampler code generator
+ * in terms of the state stored in lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name)
+{
+ struct llvmpipe_sampler_dynamic_state *state = (struct llvmpipe_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CONTEXT_TEXTURES_INDEX, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to fetch
+ * the members of lp_jit_texture to fulfill the sampler code generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture sampler code
+ * generator a reusable module without dependencies to llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index) \
+ static LLVMValueRef \
+ lp_llvm_texture_##_name( struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return lp_llvm_texture_member(base, builder, unit, _index, #_name ); \
+ }
+
+
+LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH)
+LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT)
+LP_LLVM_TEXTURE_MEMBER(stride, LP_JIT_TEXTURE_STRIDE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ union lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ LLVMValueRef lodbias,
+ LLVMValueRef *texel)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords,
+ coords,
+ lodbias,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct lp_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = lp_llvm_texture_width;
+ sampler->dynamic_state.base.height = lp_llvm_texture_height;
+ sampler->dynamic_state.base.stride = lp_llvm_texture_stride;
+ sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 03b9243b82..93479a0314 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -148,6 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
mt->image_nr = 1;
mt->level[0].pitch = *stride;
mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
+ mt->level[0].tile_mode = bo->tile_mode;
nouveau_bo_ref(bo, &mt->base.bo);
return &mt->base.base;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 344c2cf6dd..d294356f75 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -66,7 +66,8 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
break;
}
- so_data(so, bo->tile_mode << 4);
+ so_data(so, nv50_miptree(pt)->
+ level[fb->cbufs[i]->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1224, 1);
@@ -110,7 +111,8 @@ nv50_state_validate_fb(struct nv50_context *nv50)
so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
break;
}
- so_data(so, bo->tile_mode << 4);
+ so_data(so, nv50_miptree(pt)->
+ level[fb->zsbuf->level].tile_mode << 4);
so_data(so, 0x00000000);
so_method(so, tesla, 0x1538, 1);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index b266324f58..6bf6f773b0 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -60,13 +60,13 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
format = nv50_format(ps->format);
if (format < 0)
return 1;
-
+
if (!bo->tile_flags) {
BEGIN_RING(chan, eng2d, mthd, 2);
OUT_RING (chan, format);
OUT_RING (chan, 1);
BEGIN_RING(chan, eng2d, mthd + 0x14, 5);
- OUT_RING (chan, mt->level[0].pitch);
+ OUT_RING (chan, mt->level[ps->level].pitch);
OUT_RING (chan, ps->width);
OUT_RING (chan, ps->height);
OUT_RELOCh(chan, bo, ps->offset, flags);
@@ -75,7 +75,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
BEGIN_RING(chan, eng2d, mthd, 5);
OUT_RING (chan, format);
OUT_RING (chan, 0);
- OUT_RING (chan, bo->tile_mode << 4);
+ OUT_RING (chan, mt->level[ps->level].tile_mode << 4);
OUT_RING (chan, 1);
OUT_RING (chan, 0);
BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 6c5914baa3..52b1c9a6b2 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -184,8 +184,15 @@ struct r300_texture {
/* Offsets into the buffer. */
unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
- /* Stride (pitch?) of this texture in bytes */
- unsigned stride;
+ /**
+ * If non-zero, override the natural texture layout with
+ * a custom stride (in bytes).
+ *
+ * \note Mipmapping fails for textures with a non-natural layout!
+ *
+ * \sa r300_texture_get_stride
+ */
+ unsigned stride_override;
/* Total size of this texture, in bytes. */
unsigned size;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 1bc35c2486..a1b36ba2ed 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -283,7 +283,7 @@ void r300_emit_fb_state(struct r300_context* r300,
for (i = 0; i < fb->nr_cbufs; i++) {
tex = (struct r300_texture*)fb->cbufs[i]->texture;
assert(tex && tex->buffer && "cbuf is marked, but NULL!");
- pixpitch = tex->stride / tex->tex.block.size;
+ pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -300,7 +300,7 @@ void r300_emit_fb_state(struct r300_context* r300,
if (fb->zsbuf) {
tex = (struct r300_texture*)fb->zsbuf->texture;
assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
- pixpitch = tex->stride / tex->tex.block.size;
+ pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index d05a736dd9..737396d8d9 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -31,6 +31,7 @@
#include "r300_state_derived.h"
/* r300_render: Vertex and index buffer primitive emission. */
+#define R300_MAX_VBO_SIZE (1024 * 1024)
struct r300_render {
/* Parent class */
@@ -46,7 +47,10 @@ struct r300_render {
/* VBO */
struct pipe_buffer* vbo;
- size_t vbo_alloc_size;
+ size_t vbo_size;
+ size_t vbo_offset;
+ size_t vbo_max_used;
+ void * vbo_ptr;
};
static INLINE struct r300_render*
@@ -75,19 +79,18 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render,
struct pipe_screen* screen = r300->context.screen;
size_t size = (size_t)vertex_size * (size_t)count;
- if (r300render->vbo && (size > r300render->vbo_alloc_size)) {
- pipe_buffer_reference(&r300render->vbo, NULL);
- }
-
- if (!r300render->vbo) {
+ if (size + r300render->vbo_offset > r300render->vbo_size)
+ {
r300render->vbo = pipe_buffer_create(screen,
64,
PIPE_BUFFER_USAGE_VERTEX,
- size);
+ R300_MAX_VBO_SIZE);
+ r300render->vbo_size = R300_MAX_VBO_SIZE;
}
- r300render->vbo_alloc_size = MAX2(size, r300render->vbo_alloc_size);
r300render->vertex_size = vertex_size;
+ r300->vbo = r300render->vbo;
+ r300->vbo_offset = r300render->vbo_offset;
return (r300render->vbo) ? TRUE : FALSE;
}
@@ -97,8 +100,10 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
- return (unsigned char*)pipe_buffer_map(screen, r300render->vbo,
- PIPE_BUFFER_USAGE_CPU_WRITE);
+ r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+
+ return (r300render->vbo_ptr + r300render->vbo_offset);
}
static void r300_render_unmap_vertices(struct vbuf_render* render,
@@ -107,15 +112,24 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
{
struct r300_render* r300render = r300_render(render);
struct pipe_screen* screen = r300render->r300->context.screen;
+ CS_LOCALS(r300render->r300);
+ BEGIN_CS(2);
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max);
+ END_CS;
+ r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
+ r300render->vertex_size * (max + 1));
pipe_buffer_unmap(screen, r300render->vbo);
}
static void r300_render_release_vertices(struct vbuf_render* render)
{
struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
- pipe_buffer_reference(&r300render->vbo, NULL);
+ r300render->vbo_offset += r300render->vbo_max_used;
+ r300render->vbo_max_used = 0;
+ r300->vbo = NULL;
}
static boolean r300_render_set_primitive(struct vbuf_render* render,
@@ -163,14 +177,12 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
return TRUE;
}
-static void prepare_render(struct r300_render* render, unsigned count)
+static void r300_prepare_render(struct r300_render* render, unsigned count)
{
struct r300_context* r300 = render->r300;
CS_LOCALS(r300);
- r300->vbo = render->vbo;
-
r300_emit_dirty_state(r300);
}
@@ -183,7 +195,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
CS_LOCALS(r300);
- prepare_render(r300render, count);
+ r300_prepare_render(r300render, count);
DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
@@ -208,7 +220,7 @@ static void r300_render_draw(struct vbuf_render* render,
CS_LOCALS(r300);
- prepare_render(r300render, count);
+ r300_prepare_render(r300render, count);
/* Send our indices into an index buffer. */
index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX,
@@ -217,25 +229,7 @@ static void r300_render_draw(struct vbuf_render* render,
return;
}
-/*
- index_map = pipe_buffer_map(screen, index_buffer,
- PIPE_BUFFER_USAGE_CPU_WRITE);
- memcpy(index_map, indices, count);
- pipe_buffer_unmap(screen, index_buffer);
-
- debug_printf("r300: Doing indexbuf render, count %d\n", count);
-
- BEGIN_CS(8);
- OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
- OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
- r300render->hwprim);
- OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
- OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2));
- OUT_CS_INDEX_RELOC(index_buffer, 0, count, RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_CS; */
-
- BEGIN_CS(4 + (count+1)/2);
- OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+ BEGIN_CS(2 + (count+1)/2);
OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
r300render->hwprim);
@@ -273,6 +267,10 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
r300render->base.release_vertices = r300_render_release_vertices;
r300render->base.destroy = r300_render_destroy;
+ r300render->vbo = NULL;
+ r300render->vbo_size = 0;
+ r300render->vbo_offset = 0;
+
return &r300render->base;
}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 15740f6125..593178c50b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -323,13 +323,14 @@ r300_get_tex_transfer(struct pipe_screen *screen,
if (trans) {
pipe_texture_reference(&trans->transfer.texture, texture);
trans->transfer.format = texture->format;
+ trans->transfer.x = x;
+ trans->transfer.y = y;
trans->transfer.width = w;
trans->transfer.height = h;
trans->transfer.block = texture->block;
trans->transfer.nblocksx = texture->nblocksx[level];
trans->transfer.nblocksy = texture->nblocksy[level];
- trans->transfer.stride = align(pf_get_stride(&trans->transfer.block,
- texture->width[level]), 32);
+ trans->transfer.stride = r300_texture_get_stride(tex, level);
trans->transfer.usage = usage;
trans->offset = offset;
}
@@ -356,7 +357,7 @@ static void* r300_transfer_map(struct pipe_screen* screen,
if (transfer->usage != PIPE_TRANSFER_READ) {
flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
}
-
+
map = pipe_buffer_map(screen, tex->buffer, flags);
if (!map) {
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index 96e6e4a77d..cc6288cb51 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -29,7 +29,7 @@ static void r300_surface_setup(struct r300_context* r300,
unsigned w, unsigned h)
{
struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
- unsigned pixpitch = dest->stride / dest->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size;
CS_LOCALS(r300);
r300_emit_blend_state(r300, &blend_clear_state);
@@ -100,7 +100,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
struct r300_context* r300 = r300_context(pipe);
struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
struct r300_texture* tex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = tex->stride / tex->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
boolean invalid = FALSE;
CS_LOCALS(r300);
@@ -233,7 +233,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
struct r300_texture* srctex = (struct r300_texture*)src->texture;
struct r300_texture* desttex = (struct r300_texture*)dest->texture;
- unsigned pixpitch = srctex->stride / srctex->tex.block.size;
+ unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size;
boolean invalid = FALSE;
float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty;
CS_LOCALS(r300);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 590052509c..6e8c368320 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -25,7 +25,6 @@
static void r300_setup_texture_state(struct r300_texture* tex,
unsigned width,
unsigned height,
- unsigned pitch,
unsigned levels)
{
struct r300_texture_state* state = &tex->state;
@@ -38,7 +37,7 @@ static void r300_setup_texture_state(struct r300_texture* tex,
/* XXX */
state->format1 = r300_translate_texformat(tex->tex.format);
- state->format2 = pitch - 1;
+ state->format2 = r300_texture_get_stride(tex, 0);
/* Assume (somewhat foolishly) that oversized textures will
* not be permitted by the state tracker. */
@@ -50,13 +49,30 @@ static void r300_setup_texture_state(struct r300_texture* tex,
}
debug_printf("r300: Set texture state (%dx%d, pitch %d, %d levels)\n",
- width, height, pitch, levels);
+ width, height, levels);
+}
+
+/**
+ * Return the stride, in bytes, of the texture images of the given texture
+ * at the given level.
+ */
+unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
+{
+ if (tex->stride_override)
+ return tex->stride_override;
+
+ if (level > tex->tex.last_level) {
+ debug_printf("%s: level (%u) > last_level (%u)\n", level, tex->tex.last_level);
+ return 0;
+ }
+
+ return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32);
}
static void r300_setup_miptree(struct r300_texture* tex)
{
struct pipe_texture* base = &tex->tex;
- int stride, size, offset;
+ int stride, size;
int i;
for (i = 0; i <= base->last_level; i++) {
@@ -74,7 +90,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
* XXX
* POT, uncompressed, unmippmapped textures can be aligned to 32,
* instead of 64. */
- stride = align(pf_get_stride(&base->block, base->width[i]), 32);
+ stride = r300_texture_get_stride(tex, i);
size = stride * base->nblocksy[i] * base->depth[i];
tex->offset[i] = align(tex->size, 32);
@@ -84,10 +100,6 @@ static void r300_setup_miptree(struct r300_texture* tex)
"(%dx%dx%d px, pitch %d bytes)\n",
i, base->width[i], base->height[i], base->depth[i],
stride);
- /* Save stride of first level to the texture. */
- if (i == 0) {
- tex->stride = stride;
- }
}
}
@@ -109,7 +121,7 @@ static struct pipe_texture*
r300_setup_miptree(tex);
r300_setup_texture_state(tex, template->width[0], template->height[0],
- template->width[0], template->last_level);
+ template->last_level);
tex->buffer = screen->buffer_create(screen, 1024,
PIPE_BUFFER_USAGE_PIXEL,
@@ -189,11 +201,10 @@ static struct pipe_texture*
pipe_reference_init(&tex->tex.reference, 1);
tex->tex.screen = screen;
- tex->stride = *stride;
+ tex->stride_override = *stride;
/* XXX */
- r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0],
- tex->stride, 0);
+ r300_setup_texture_state(tex, tex->tex.width[0], tex->tex.height[0], 0);
pipe_buffer_reference(&tex->buffer, buffer);
@@ -221,7 +232,7 @@ boolean r300_get_texture_buffer(struct pipe_texture* texture,
pipe_buffer_reference(buffer, tex->buffer);
if (stride) {
- *stride = tex->stride;
+ *stride = r300_texture_get_stride(tex, 0);
}
return TRUE;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 3b56f0307c..3109af5bac 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -30,8 +30,12 @@
#include "r300_context.h"
#include "r300_reg.h"
+struct r300_texture;
+
void r300_init_screen_texture_functions(struct pipe_screen* screen);
+unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level);
+
/* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */
static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
{
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index d68a104106..0913ca1bd5 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -158,7 +158,6 @@ static unsigned translate_saturate(unsigned saturate)
switch(saturate) {
case TGSI_SAT_NONE: return SATURATE_OFF;
case TGSI_SAT_ZERO_ONE: return SATURATE_ZERO_ONE;
- case TGSI_SAT_MINUS_PLUS_ONE: return SATURATE_PLUS_MINUS_ONE;
}
fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index fa59277438..8fac8e6e05 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,8 +36,6 @@
#include "util/u_pack_color.h"
#include "sp_clear.h"
#include "sp_context.h"
-#include "sp_surface.h"
-#include "sp_state.h"
#include "sp_tile_cache.h"
@@ -85,5 +83,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
/* non-cached surface */
pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv);
#endif
- }
+ }
+
+ softpipe->dirty_render_cache = TRUE;
}
diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h
index 2e450672f5..9be3b86fe9 100644
--- a/src/gallium/drivers/softpipe/sp_clear.h
+++ b/src/gallium/drivers/softpipe/sp_clear.h
@@ -32,7 +32,6 @@
#ifndef SP_CLEAR_H
#define SP_CLEAR_H
-#include "pipe/p_state.h"
struct pipe_context;
extern void
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 86df320ea8..c699c433e9 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -83,7 +83,8 @@ softpipe_unmap_transfers(struct softpipe_context *sp)
}
-static void softpipe_destroy( struct pipe_context *pipe )
+static void
+softpipe_destroy( struct pipe_context *pipe )
{
struct softpipe_context *softpipe = softpipe_context( pipe );
uint i;
@@ -121,6 +122,15 @@ static void softpipe_destroy( struct pipe_context *pipe )
FREE( softpipe );
}
+
+/**
+ * if (the texture is being used as a framebuffer surface)
+ * return PIPE_REFERENCED_FOR_WRITE
+ * else if (the texture is a bound texture source)
+ * return PIPE_REFERENCED_FOR_READ XXX not done yet
+ * else
+ * return PIPE_UNREFERENCED
+ */
static unsigned int
softpipe_is_texture_referenced( struct pipe_context *pipe,
struct pipe_texture *texture,
@@ -129,15 +139,17 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
struct softpipe_context *softpipe = softpipe_context( pipe );
unsigned i;
- if(softpipe->dirty_render_cache) {
+ if (softpipe->dirty_render_cache) {
for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
- if(softpipe->framebuffer.cbufs[i] &&
- softpipe->framebuffer.cbufs[i]->texture == texture)
+ if (softpipe->framebuffer.cbufs[i] &&
+ softpipe->framebuffer.cbufs[i]->texture == texture) {
return PIPE_REFERENCED_FOR_WRITE;
+ }
}
- if(softpipe->framebuffer.zsbuf &&
- softpipe->framebuffer.zsbuf->texture == texture)
+ if (softpipe->framebuffer.zsbuf &&
+ softpipe->framebuffer.zsbuf->texture == texture) {
return PIPE_REFERENCED_FOR_WRITE;
+ }
}
/* FIXME: we also need to do the same for the texture cache */
@@ -145,6 +157,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
return PIPE_UNREFERENCED;
}
+
static unsigned int
softpipe_is_buffer_referenced( struct pipe_context *pipe,
struct pipe_buffer *buf)
@@ -152,6 +165,7 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
return PIPE_UNREFERENCED;
}
+
struct pipe_context *
softpipe_create( struct pipe_screen *screen )
{
@@ -222,7 +236,6 @@ softpipe_create( struct pipe_screen *screen )
softpipe->pipe.is_buffer_referenced = softpipe_is_buffer_referenced;
softpipe_init_query_funcs( softpipe );
- softpipe_init_texture_funcs( softpipe );
/*
* Alloc caches for accessing drawing surfaces and textures.
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 70f0932431..548e40c4a8 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -30,26 +30,21 @@
* Michel Dänzer <michel@tungstengraphics.com>
*/
-#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_inlines.h"
-#include "pipe/internal/p_winsys_screen.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "sp_context.h"
#include "sp_state.h"
#include "sp_texture.h"
-#include "sp_tile_cache.h"
#include "sp_screen.h"
#include "sp_winsys.h"
-/* Simple, maximally packed layout.
- */
-
-
-/* Conventional allocation path for non-display textures:
+/**
+ * Conventional allocation path for non-display textures:
+ * Use a simple, maximally packed layout.
*/
static boolean
softpipe_texture_layout(struct pipe_screen *screen,
@@ -89,6 +84,10 @@ softpipe_texture_layout(struct pipe_screen *screen,
return spt->buffer != NULL;
}
+
+/**
+ * Texture layout for simple color buffers.
+ */
static boolean
softpipe_displaytarget_layout(struct pipe_screen *screen,
struct softpipe_texture * spt)
@@ -112,9 +111,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
}
-
-
-
static struct pipe_texture *
softpipe_texture_create(struct pipe_screen *screen,
const struct pipe_texture *templat)
@@ -342,14 +338,13 @@ softpipe_transfer_map( struct pipe_screen *screen,
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
- {
+ if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) {
/* Do something to notify sharing contexts of a texture change.
* In softpipe, that would mean flushing the texture cache.
*/
softpipe_screen(screen)->timestamp++;
}
-
+
xfer_map = map + softpipe_transfer(transfer)->offset +
transfer->y / transfer->block.height * transfer->stride +
transfer->x / transfer->block.width * transfer->block.size;
@@ -360,7 +355,7 @@ softpipe_transfer_map( struct pipe_screen *screen,
static void
softpipe_transfer_unmap(struct pipe_screen *screen,
- struct pipe_transfer *transfer)
+ struct pipe_transfer *transfer)
{
struct softpipe_texture *spt;
@@ -377,12 +372,6 @@ softpipe_transfer_unmap(struct pipe_screen *screen,
void
-softpipe_init_texture_funcs(struct softpipe_context *sp)
-{
-}
-
-
-void
softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
{
screen->texture_create = softpipe_texture_create;
@@ -404,7 +393,7 @@ softpipe_get_texture_buffer( struct pipe_texture *texture,
struct pipe_buffer **buf,
unsigned *stride )
{
- struct softpipe_texture *tex = (struct softpipe_texture *)texture;
+ struct softpipe_texture *tex = (struct softpipe_texture *) texture;
if (!tex)
return FALSE;
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 893aa7d11d..b1b6130b22 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -74,9 +74,6 @@ softpipe_transfer(struct pipe_transfer *pt)
extern void
-softpipe_init_texture_funcs( struct softpipe_context *softpipe );
-
-extern void
softpipe_init_screen_texture_funcs(struct pipe_screen *screen);