summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2010-07-26 17:47:59 -0700
committerEric Anholt <eric@anholt.net>2010-07-26 17:53:27 -0700
commitafe125e0a18ac3886c45c7e6b02b122fb2d327b5 (patch)
tree78621707e71154c0b388b0baacffc26432b7e992 /src/gallium/auxiliary
parentd64343f1ae84979bd154475badf11af8a9bfc2eb (diff)
parent5403ca79b225605c79f49866a6497c97da53be3b (diff)
Merge remote branch 'origin/master' into glsl2
This pulls in multiple i965 driver fixes which will help ensure better testing coverage during development, and also gets past the conflicts of the src/mesa/shader -> src/mesa/program move. Conflicts: src/mesa/Makefile src/mesa/main/shaderapi.c src/mesa/main/shaderobj.h
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile7
-rw-r--r--src/gallium/auxiliary/SConscript13
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c57
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h24
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c229
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h145
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_sample.c215
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_translate.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c52
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c10
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h11
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c72
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c44
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c103
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c35
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c19
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c124
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c120
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c87
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_const.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c47
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format.h30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c432
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_soa.c91
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c399
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.c148
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_gather.h61
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c30
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c49
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.c120
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c95
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h16
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c219
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c211
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h20
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c209
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h48
-rw-r--r--src/gallium/auxiliary/os/os_thread.h87
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_debug_helper.h44
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h63
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h34
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c13
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c4
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c341
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h93
-rw-r--r--src/gallium/auxiliary/util/u_caps.c17
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c12
-rw-r--r--src/gallium/auxiliary/util/u_debug.c4
-rw-r--r--src/gallium/auxiliary/util/u_double_list.h17
-rw-r--r--src/gallium/auxiliary/util/u_format.c2
-rw-r--r--src/gallium/auxiliary/util/u_format.h13
-rwxr-xr-xsrc/gallium/auxiliary/util/u_format_table.py5
-rw-r--r--src/gallium/auxiliary/util/u_math.h14
-rw-r--r--src/gallium/auxiliary/util/u_mempool.c169
-rw-r--r--src/gallium/auxiliary/util/u_mempool.h87
-rw-r--r--src/gallium/auxiliary/util/u_network.c2
71 files changed, 3801 insertions, 938 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 7c8db19f5c..dcebab7c0f 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -124,6 +124,7 @@ C_SOURCES = \
util/u_linear.c \
util/u_network.c \
util/u_math.c \
+ util/u_mempool.c \
util/u_mm.c \
util/u_rect.c \
util/u_ringbuffer.c \
@@ -154,6 +155,8 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_flow.c \
gallivm/lp_bld_format_aos.c \
gallivm/lp_bld_format_soa.c \
+ gallivm/lp_bld_format_yuv.c \
+ gallivm/lp_bld_gather.c \
gallivm/lp_bld_init.c \
gallivm/lp_bld_intr.c \
gallivm/lp_bld_logic.c \
@@ -167,8 +170,10 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
+ draw/draw_vs_llvm.c \
draw/draw_pt_fetch_shade_pipeline_llvm.c \
- draw/draw_llvm_translate.c
+ draw/draw_llvm_translate.c \
+ draw/draw_llvm_sample.c
GALLIVM_CPP_SOURCES = \
gallivm/lp_bld_misc.cpp
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 6242ab0c59..72a16617db 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -32,8 +32,8 @@ env.CodeGenerate(
env.CodeGenerate(
target = 'util/u_format_table.c',
- script = 'util/u_format_table.py',
- source = ['util/u_format.csv'],
+ script = '#src/gallium/auxiliary/util/u_format_table.py',
+ source = ['#src/gallium/auxiliary/util/u_format.csv'],
command = 'python $SCRIPT $SOURCE > $TARGET'
)
@@ -45,7 +45,7 @@ env.CodeGenerate(
)
env.Depends('util/u_format_table.c', [
- 'util/u_format_parse.py',
+ '#src/gallium/auxiliary/util/u_format_parse.py',
'util/u_format_pack.py',
])
@@ -172,6 +172,7 @@ source = [
'util/u_keymap.c',
'util/u_network.c',
'util/u_math.c',
+ 'util/u_mempool.c',
'util/u_mm.c',
'util/u_rect.c',
'util/u_resource.c',
@@ -203,6 +204,8 @@ if env['llvm']:
'gallivm/lp_bld_flow.c',
'gallivm/lp_bld_format_aos.c',
'gallivm/lp_bld_format_soa.c',
+ 'gallivm/lp_bld_format_yuv.c',
+ 'gallivm/lp_bld_gather.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
'gallivm/lp_bld_init.c',
@@ -218,7 +221,9 @@ if env['llvm']:
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
'draw/draw_pt_fetch_shade_pipeline_llvm.c',
- 'draw/draw_llvm_translate.c'
+ 'draw/draw_llvm_translate.c',
+ 'draw/draw_vs_llvm.c',
+ 'draw/draw_llvm_sample.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 20a8612dca..58b022d531 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -289,6 +289,9 @@ void cso_release_all( struct cso_context *ctx )
ctx->pipe->bind_fs_state( ctx->pipe, NULL );
ctx->pipe->bind_vs_state( ctx->pipe, NULL );
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
+ ctx->pipe->set_fragment_sampler_views(ctx->pipe, 0, NULL);
+ if (ctx->pipe->set_vertex_sampler_views)
+ ctx->pipe->set_vertex_sampler_views(ctx->pipe, 0, NULL);
}
for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
@@ -1029,6 +1032,7 @@ static INLINE void
clip_state_cpy(struct pipe_clip_state *dst,
const struct pipe_clip_state *src)
{
+ dst->depth_clamp = src->depth_clamp;
dst->nr = src->nr;
if (src->nr) {
memcpy(dst->ucp, src->ucp, src->nr * sizeof(src->ucp[0]));
@@ -1039,6 +1043,9 @@ static INLINE int
clip_state_cmp(const struct pipe_clip_state *a,
const struct pipe_clip_state *b)
{
+ if (a->depth_clamp != b->depth_clamp) {
+ return 1;
+ }
if (a->nr != b->nr) {
return 1;
}
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index dab95e5051..c127f74188 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -40,6 +40,7 @@
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
+#include "draw_llvm.h"
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -52,6 +53,7 @@ struct draw_context *draw_create( struct pipe_context *pipe )
lp_build_init();
assert(lp_build_engine);
draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
#endif
if (!draw_init(draw))
@@ -132,6 +134,9 @@ void draw_destroy( struct draw_context *draw )
draw_pt_destroy( draw );
draw_vs_destroy( draw );
draw_gs_destroy( draw );
+#ifdef HAVE_LLVM
+ draw_llvm_destroy( draw->llvm );
+#endif
FREE( draw );
}
@@ -211,6 +216,7 @@ void draw_set_clip_state( struct draw_context *draw,
assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
+ draw->depth_clamp = clip->depth_clamp;
}
@@ -601,3 +607,54 @@ draw_set_so_state(struct draw_context *draw,
state,
sizeof(struct pipe_stream_output_state));
}
+
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->sampler_views[i] = views[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->sampler_views[i] = NULL;
+
+ draw->num_sampler_views = num;
+}
+
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num)
+{
+ unsigned i;
+
+ debug_assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ for (i = 0; i < num; ++i)
+ draw->samplers[i] = samplers[i];
+ for (i = num; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ draw->samplers[i] = NULL;
+
+ draw->num_samplers = num;
+}
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+#ifdef HAVE_LLVM
+ draw_llvm_set_mapped_texture(draw,
+ sampler_idx,
+ width, height, depth, last_level,
+ row_stride, img_stride, data);
+#endif
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index c0122f2aca..116716af6f 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -47,11 +47,14 @@ struct draw_vertex_shader;
struct draw_geometry_shader;
struct tgsi_sampler;
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_context *draw_create( struct pipe_context *pipe );
void draw_destroy( struct draw_context *draw );
+void draw_flush(struct draw_context *draw);
+
void draw_set_viewport_state( struct draw_context *draw,
const struct pipe_viewport_state *viewport );
@@ -101,6 +104,23 @@ draw_texture_samplers(struct draw_context *draw,
uint num_samplers,
struct tgsi_sampler **samplers);
+void
+draw_set_sampler_views(struct draw_context *draw,
+ struct pipe_sampler_view **views,
+ unsigned num);
+void
+draw_set_samplers(struct draw_context *draw,
+ struct pipe_sampler_state **samplers,
+ unsigned num);
+
+void
+draw_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
/*
@@ -173,7 +193,7 @@ draw_set_so_state(struct draw_context *draw,
/***********************************************************************
- * draw_prim.c
+ * draw_pt.c
*/
void draw_arrays(struct draw_context *draw, unsigned prim,
@@ -187,8 +207,6 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount);
-void draw_flush(struct draw_context *draw);
-
/*******************************************************************************
* Driver backend interface
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 9117c1303d..19f96c37ab 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1,3 +1,30 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "draw_llvm.h"
#include "draw_context.h"
@@ -15,14 +42,13 @@
#include "tgsi/tgsi_dump.h"
#include "util/u_cpu_detect.h"
-#include "util/u_string.h"
#include "util/u_pointer.h"
+#include "util/u_string.h"
#include <llvm-c/Transforms/Scalar.h>
#define DEBUG_STORE 0
-
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
@@ -36,12 +62,19 @@ init_globals(struct draw_llvm *llvm)
/* struct draw_jit_texture */
{
- LLVMTypeRef elem_types[4];
+ LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
- elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
+ elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+ elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_DATA] =
+ LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+ DRAW_MAX_TEXTURE_LEVELS);
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -51,9 +84,18 @@ init_globals(struct draw_llvm *llvm)
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_HEIGHT);
- LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LAST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
llvm->target, texture_type,
- DRAW_JIT_TEXTURE_STRIDE);
+ DRAW_JIT_TEXTURE_ROW_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_IMG_STRIDE);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_DATA);
@@ -71,7 +113,8 @@ init_globals(struct draw_llvm *llvm)
elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
- elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+ elem_types[2] = LLVMArrayType(texture_type,
+ PIPE_MAX_VERTEX_SAMPLERS); /* textures */
context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -81,7 +124,7 @@ init_globals(struct draw_llvm *llvm)
llvm->target, context_type, 1);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
llvm->target, context_type,
- DRAW_JIT_CONTEXT_TEXTURES_INDEX);
+ DRAW_JIT_CTX_TEXTURES);
LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
llvm->target, context_type);
@@ -195,9 +238,22 @@ draw_llvm_create(struct draw_context *draw)
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
/* TODO: Add more passes */
+
LLVMAddCFGSimplificationPass(llvm->pass);
- LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
- LLVMAddConstantPropagationPass(llvm->pass);
+
+ if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
+ /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
+ * avoid generating bad code.
+ * Test with piglit glsl-vs-sqrt-zero test.
+ */
+ LLVMAddConstantPropagationPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ }
+ else {
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
+ LLVMAddConstantPropagationPass(llvm->pass);
+ }
+
if(util_cpu_caps.has_sse4_1) {
/* FIXME: There is a bug in this pass, whereby the combination of fptosi
* and sitofp (necessary for trunc/floor/ceil/round implementation)
@@ -219,6 +275,9 @@ draw_llvm_create(struct draw_context *draw)
LLVMDumpModule(llvm->module);
}
+ llvm->nr_variants = 0;
+ make_empty_list(&llvm->vs_variants_list);
+
return llvm;
}
@@ -231,9 +290,13 @@ draw_llvm_destroy(struct draw_llvm *llvm)
}
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
{
struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(llvm->draw->vs.vertex_shader);
+
+ variant->llvm = llvm;
draw_llvm_make_variant_key(llvm, &variant->key);
@@ -242,6 +305,12 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs)
draw_llvm_generate(llvm, variant);
draw_llvm_generate_elts(llvm, variant);
+ variant->shader = shader;
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ /*variant->no = */shader->variants_created++;
+ variant->list_item_global.base = variant;
+
return variant;
}
@@ -250,11 +319,13 @@ generate_vs(struct draw_llvm *llvm,
LLVMBuilderRef builder,
LLVMValueRef (*outputs)[NUM_CHANNELS],
const LLVMValueRef (*inputs)[NUM_CHANNELS],
- LLVMValueRef context_ptr)
+ LLVMValueRef context_ptr,
+ struct lp_build_sampler_soa *draw_sampler)
{
const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
struct lp_type vs_type;
LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
+ struct lp_build_sampler_soa *sampler = 0;
memset(&vs_type, 0, sizeof vs_type);
vs_type.floating = TRUE; /* floating point values */
@@ -270,6 +341,10 @@ generate_vs(struct draw_llvm *llvm,
tgsi_dump(tokens, 0);
}
+ if (llvm->draw->num_sampler_views &&
+ llvm->draw->num_samplers)
+ sampler = draw_sampler;
+
lp_build_tgsi_soa(builder,
tokens,
vs_type,
@@ -278,7 +353,7 @@ generate_vs(struct draw_llvm *llvm,
NULL /*pos*/,
inputs,
outputs,
- NULL/*sampler*/,
+ sampler,
&llvm->draw->vs.vertex_shader->info);
}
@@ -306,7 +381,8 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef *res,
struct pipe_vertex_element *velem,
LLVMValueRef vbuf,
- LLVMValueRef index)
+ LLVMValueRef index,
+ LLVMValueRef instance_id)
{
LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
@@ -317,8 +393,15 @@ generate_fetch(LLVMBuilderRef builder,
LLVMValueRef cond;
LLVMValueRef stride;
- cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+ if (velem->instance_divisor) {
+ /* array index = instance_id / instance_divisor */
+ index = LLVMBuildUDiv(builder, instance_id,
+ LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
+ "instance_divisor");
+ }
+ /* limit index to min(inex, vb_max_index) */
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
stride = LLVMBuildMul(builder, vb_stride, index, "");
@@ -586,13 +669,14 @@ convert_to_aos(LLVMBuilderRef builder,
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef start, end, count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
@@ -601,6 +685,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -609,6 +694,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
arg_types[4] = LLVMInt32Type(); /* count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -625,6 +711,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
count = LLVMGetParam(variant->function, 4);
stride = LLVMGetParam(variant->function, 5);
vb_ptr = LLVMGetParam(variant->function, 6);
+ instance_id = LLVMGetParam(variant->function, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -633,6 +720,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
lp_build_name(count, "count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -648,6 +736,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
+ context_ptr);
+
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
start, end, step);
@@ -678,7 +770,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -689,7 +782,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -697,6 +791,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
}
lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -730,13 +826,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
static void
draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
{
- LLVMTypeRef arg_types[7];
+ LLVMTypeRef arg_types[8];
LLVMTypeRef func_type;
LLVMValueRef context_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ LLVMValueRef instance_id;
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
@@ -747,6 +844,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
LLVMValueRef fetch_max;
void *code;
+ struct lp_build_sampler_soa *sampler = 0;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
@@ -755,14 +853,17 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
arg_types[4] = LLVMInt32Type(); /* fetch_count */
arg_types[5] = LLVMInt32Type(); /* stride */
arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+ arg_types[7] = LLVMInt32Type(); /* instance_id */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
- variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts",
+ func_type);
LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
for(i = 0; i < Elements(arg_types); ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
+ LLVMNoAliasAttribute);
context_ptr = LLVMGetParam(variant->function_elts, 0);
io_ptr = LLVMGetParam(variant->function_elts, 1);
@@ -771,6 +872,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
fetch_count = LLVMGetParam(variant->function_elts, 4);
stride = LLVMGetParam(variant->function_elts, 5);
vb_ptr = LLVMGetParam(variant->function_elts, 6);
+ instance_id = LLVMGetParam(variant->function_elts, 7);
lp_build_name(context_ptr, "context");
lp_build_name(io_ptr, "io");
@@ -779,6 +881,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_build_name(fetch_count, "fetch_count");
lp_build_name(stride, "stride");
lp_build_name(vb_ptr, "vb");
+ lp_build_name(instance_id, "instance_id");
/*
* Function body
@@ -793,6 +896,10 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
+ context_ptr);
+
fetch_max = LLVMBuildSub(builder, fetch_count,
LLVMConstInt(LLVMInt32Type(), 1, 0),
"fetch_max");
@@ -833,7 +940,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
&vb_index, 1, "");
generate_fetch(builder, vbuffers_ptr,
- &aos_attribs[j][i], velem, vb, true_index);
+ &aos_attribs[j][i], velem, vb, true_index,
+ instance_id);
}
}
convert_to_soa(builder, aos_attribs, inputs,
@@ -844,7 +952,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
builder,
outputs,
ptr_aos,
- context_ptr);
+ context_ptr,
+ sampler);
convert_to_aos(builder, io, outputs,
draw->vs.vertex_shader->info.num_outputs,
@@ -852,6 +961,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
}
lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+ sampler->destroy(sampler);
+
LLVMBuildRetVoid(builder);
LLVMDisposeBuilder(builder);
@@ -885,6 +996,8 @@ void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key)
{
+ unsigned i;
+
memset(key, 0, sizeof(struct draw_llvm_variant_key));
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
@@ -896,4 +1009,72 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm,
memcpy(&key->vs,
&llvm->draw->vs.vertex_shader->state,
sizeof(struct pipe_shader_state));
+
+ /* if the driver implemented the sampling hooks then
+ * setup our sampling state */
+ if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) {
+ for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
+ struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+ lp_sampler_static_state(&key->sampler[i],
+ llvm->draw->sampler_views[i],
+ llvm->draw->samplers[i]);
+ }
+ }
+}
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS])
+{
+ unsigned j;
+ struct draw_jit_texture *jit_tex;
+
+ assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
+
+
+ jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
+
+ jit_tex->width = width;
+ jit_tex->height = height;
+ jit_tex->depth = depth;
+ jit_tex->last_level = last_level;
+
+ for (j = 0; j <= last_level; j++) {
+ jit_tex->data[j] = data[j];
+ jit_tex->row_stride[j] = row_stride[j];
+ jit_tex->img_stride[j] = img_stride[j];
+ }
+}
+
+void
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
+{
+ struct draw_llvm *llvm = variant->llvm;
+ struct draw_context *draw = llvm->draw;
+
+ if (variant->function_elts) {
+ if (variant->function_elts)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function_elts);
+ LLVMDeleteFunction(variant->function_elts);
+ }
+
+ if (variant->function) {
+ if (variant->function)
+ LLVMFreeMachineCodeForFunction(draw->engine,
+ variant->function);
+ LLVMDeleteFunction(variant->function);
+ }
+
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+ remove_from_list(&variant->list_item_global);
+ llvm->nr_variants--;
+ FREE(variant);
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 58fee7f9d6..4addb47d2d 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -1,28 +1,71 @@
-#ifndef HAVE_LLVM_H
-#define HAVE_LLVM_H
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_LLVM_H
+#define DRAW_LLVM_H
#include "draw/draw_private.h"
+#include "draw/draw_vs.h"
+#include "gallivm/lp_bld_sample.h"
+
#include "pipe/p_context.h"
+#include "util/u_simple_list.h"
#include <llvm-c/Core.h>
#include <llvm-c/Analysis.h>
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>
+#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
+
+struct draw_llvm;
+struct llvm_vertex_shader;
+
struct draw_jit_texture
{
uint32_t width;
uint32_t height;
- uint32_t stride;
- const void *data;
+ uint32_t depth;
+ uint32_t last_level;
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
+ const void *data[DRAW_MAX_TEXTURE_LEVELS];
};
enum {
DRAW_JIT_TEXTURE_WIDTH = 0,
DRAW_JIT_TEXTURE_HEIGHT,
- DRAW_JIT_TEXTURE_STRIDE,
- DRAW_JIT_TEXTURE_DATA
+ DRAW_JIT_TEXTURE_DEPTH,
+ DRAW_JIT_TEXTURE_LAST_LEVEL,
+ DRAW_JIT_TEXTURE_ROW_STRIDE,
+ DRAW_JIT_TEXTURE_IMG_STRIDE,
+ DRAW_JIT_TEXTURE_DATA,
+ DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
};
enum {
@@ -48,7 +91,7 @@ struct draw_jit_context
const float *gs_constants;
- struct draw_jit_texture textures[PIPE_MAX_SAMPLERS];
+ struct draw_jit_texture textures[PIPE_MAX_VERTEX_SAMPLERS];
};
@@ -58,10 +101,10 @@ struct draw_jit_context
#define draw_jit_context_gs_constants(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, 1, "gs_constants")
-#define DRAW_JIT_CONTEXT_TEXTURES_INDEX 2
+#define DRAW_JIT_CTX_TEXTURES 2
#define draw_jit_context_textures(_builder, _ptr) \
- lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CONTEXT_TEXTURES_INDEX, "textures")
+ lp_build_struct_get_ptr(_builder, _ptr, DRAW_JIT_CTX_TEXTURES, "textures")
@@ -92,7 +135,8 @@ typedef void
unsigned start,
unsigned count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
typedef void
@@ -102,13 +146,54 @@ typedef void
const unsigned *fetch_elts,
unsigned fetch_count,
unsigned stride,
- struct pipe_vertex_buffer *vertex_buffers);
+ struct pipe_vertex_buffer *vertex_buffers,
+ unsigned instance_id);
+
+struct draw_llvm_variant_key
+{
+ struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+ struct pipe_shader_state vs;
+ struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS];
+};
+
+struct draw_llvm_variant_list_item
+{
+ struct draw_llvm_variant *base;
+ struct draw_llvm_variant_list_item *next, *prev;
+};
+
+struct draw_llvm_variant
+{
+ struct draw_llvm_variant_key key;
+ LLVMValueRef function;
+ LLVMValueRef function_elts;
+ draw_jit_vert_func jit_func;
+ draw_jit_vert_func_elts jit_func_elts;
+
+ struct llvm_vertex_shader *shader;
+
+ struct draw_llvm *llvm;
+ struct draw_llvm_variant_list_item list_item_global;
+ struct draw_llvm_variant_list_item list_item_local;
+};
+
+struct llvm_vertex_shader {
+ struct draw_vertex_shader base;
+
+ struct draw_llvm_variant_list_item variants;
+ unsigned variants_created;
+ unsigned variants_cached;
+};
struct draw_llvm {
struct draw_context *draw;
struct draw_jit_context jit_context;
+ struct draw_llvm_variant_list_item vs_variants_list;
+ int nr_variants;
+
LLVMModuleRef module;
LLVMExecutionEngineRef engine;
LLVMModuleProviderRef provider;
@@ -121,23 +206,12 @@ struct draw_llvm {
LLVMTypeRef vb_ptr_type;
};
-struct draw_llvm_variant_key
+static INLINE struct llvm_vertex_shader *
+llvm_vertex_shader(struct draw_vertex_shader *vs)
{
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_elements;
- struct pipe_shader_state vs;
-};
+ return (struct llvm_vertex_shader *)vs;
+}
-struct draw_llvm_variant
-{
- struct draw_llvm_variant_key key;
- LLVMValueRef function;
- LLVMValueRef function_elts;
- draw_jit_vert_func jit_func;
- draw_jit_vert_func_elts jit_func_elts;
-
- struct draw_llvm_variant *next;
-};
struct draw_llvm *
draw_llvm_create(struct draw_context *draw);
@@ -146,7 +220,10 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
-draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs);
+
+void
+draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
@@ -156,4 +233,18 @@ LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
LLVMValueRef vbuffer,
enum pipe_format from_format);
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr);
+
+void
+draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned sampler_idx,
+ uint32_t width, uint32_t height, uint32_t depth,
+ uint32_t last_level,
+ uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
+ const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c
new file mode 100644
index 0000000000..e9811010db
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -0,0 +1,215 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_string.h"
+
+#include "draw_llvm.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct draw_llvm_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct draw_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct draw_llvm_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+draw_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ struct draw_llvm_sampler_dynamic_state *state =
+ (struct draw_llvm_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ debug_assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), DRAW_JIT_CTX_TEXTURES, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * llvmpipe internals.
+ */
+#define DRAW_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ draw_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return draw_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \
+ }
+
+
+DRAW_LLVM_TEXTURE_MEMBER(width, DRAW_JIT_TEXTURE_WIDTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(height, DRAW_JIT_TEXTURE_HEIGHT, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(depth, DRAW_JIT_TEXTURE_DEPTH, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE)
+
+
+static void
+draw_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *texel)
+{
+ struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_VERTEX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct draw_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(draw_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = draw_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = draw_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = draw_llvm_texture_width;
+ sampler->dynamic_state.base.height = draw_llvm_texture_height;
+ sampler->dynamic_state.base.depth = draw_llvm_texture_depth;
+ sampler->dynamic_state.base.last_level = draw_llvm_texture_last_level;
+ sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
+ sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c
index d7da7ed357..6ebe1f7de4 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_translate.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c
@@ -7,6 +7,7 @@
#include "gallivm/lp_bld_struct.h"
#include "gallivm/lp_bld_format.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
#include "util/u_memory.h"
#include "util/u_format.h"
@@ -466,6 +467,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
const struct util_format_description *format_desc;
LLVMValueRef zero;
int i;
+ struct lp_type type = lp_float32_vec4_type();
/*
* The above can only cope with straight arrays: no bitfields,
@@ -493,5 +495,5 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
format_desc = util_format_description(from_format);
zero = LLVMConstNull(LLVMInt32Type());
- return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
+ return lp_build_fetch_rgba_aos(builder, format_desc, type, vbuffer, zero, zero, zero);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 83556f10a8..8cd75ecf9a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -177,15 +177,15 @@ static void do_triangle( struct draw_context *draw,
( DRAW_PIPE_RESET_STIPPLE | \
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * elts[i0], \
- verts + stride * elts[i1], \
- verts + stride * elts[i2]); \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
( DRAW_PIPE_EDGE_FLAG_1 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * elts[i0], \
- verts + stride * elts[i2], \
- verts + stride * elts[i3])
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK))
/* emit last quad vertex as last vertex in triangles */
#define QUAD_LAST_PV(i0,i1,i2,i3) \
@@ -193,15 +193,15 @@ static void do_triangle( struct draw_context *draw,
( DRAW_PIPE_RESET_STIPPLE | \
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * elts[i0], \
- verts + stride * elts[i1], \
- verts + stride * elts[i3]); \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
( DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * elts[i1], \
- verts + stride * elts[i2], \
- verts + stride * elts[i3])
+ verts + stride * (elts[i1] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i2] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (elts[i3] & ~DRAW_PIPE_FLAG_MASK))
#define TRIANGLE(flags,i0,i1,i2) \
do_triangle( draw, \
@@ -218,7 +218,7 @@ static void do_triangle( struct draw_context *draw,
#define POINT(i0) \
do_point( draw, \
- verts + stride * elts[i0] )
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK) )
#define FUNC pipe_run
#define ARGS \
@@ -260,7 +260,7 @@ void draw_pipeline_run( struct draw_context *draw,
const struct draw_prim_info *prim_info)
{
unsigned i, start;
-
+
draw->pipeline.verts = (char *)vert_info->verts;
draw->pipeline.vertex_stride = vert_info->stride;
draw->pipeline.vertex_count = vert_info->count;
@@ -296,14 +296,14 @@ void draw_pipeline_run( struct draw_context *draw,
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_1 ), \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2)); \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
( DRAW_PIPE_EDGE_FLAG_1 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i2), \
- verts + stride * (i3))
+ verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK))
/* emit last quad vertex as last vertex in triangles */
#define QUAD_LAST_PV(i0,i1,i2,i3) \
@@ -312,31 +312,31 @@ void draw_pipeline_run( struct draw_context *draw,
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i3)); \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
( DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_1 ), \
verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i2), \
- verts + stride * (i3))
+ verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * ((i3) & ~DRAW_PIPE_FLAG_MASK))
#define TRIANGLE(flags,i0,i1,i2) \
do_triangle( draw, \
flags, /* flags */ \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2))
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * ((i2) & ~DRAW_PIPE_FLAG_MASK))
#define LINE(flags,i0,i1) \
do_line( draw, \
flags, \
verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1))
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK))
#define POINT(i0) \
do_point( draw, \
- verts + stride * i0 )
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK) )
#define FUNC pipe_run_linear
#define ARGS \
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index debd17fd74..c0135f5bb7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -425,7 +425,8 @@ aaline_create_texture(struct aaline_stage *aaline)
/* Fill in mipmap images.
* Basically each level is solid opaque, except for the outermost
- * texels which are zero. Special case the 1x1 and 2x2 levels.
+ * texels which are zero. Special case the 1x1 and 2x2 levels
+ * (though, those levels shouldn't be used - see the max_lod setting).
*/
for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
struct pipe_transfer *transfer;
@@ -497,7 +498,8 @@ aaline_create_sampler(struct aaline_stage *aaline)
sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
sampler.normalized_coords = 1;
sampler.min_lod = 0.0f;
- sampler.max_lod = MAX_TEXTURE_LEVEL;
+ /* avoid using the 1x1 and 2x2 mipmap levels */
+ sampler.max_lod = MAX_TEXTURE_LEVEL - 2;
aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
if (aaline->sampler_cso == NULL)
@@ -669,8 +671,8 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
assert(draw->rasterizer->line_smooth);
- if (draw->rasterizer->line_width <= 3.0)
- aaline->half_line_width = 1.5f;
+ if (draw->rasterizer->line_width <= 2.2)
+ aaline->half_line_width = 1.1f;
else
aaline->half_line_width = 0.5f * draw->rasterizer->line_width;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 122b1c7968..1cf6ee7a7f 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -262,6 +262,7 @@ do_clip_tri( struct draw_stage *stage,
clipmask &= ~(1<<plane_idx);
+ assert(n < MAX_CLIPPED_VERTICES);
inlist[n] = inlist[0]; /* prevent rotation of vertices */
for (i = 1; i <= n; i++) {
@@ -270,11 +271,17 @@ do_clip_tri( struct draw_stage *stage,
float dp = dot4( vert->clip, plane );
if (!IS_NEGATIVE(dp_prev)) {
+ assert(outcount < MAX_CLIPPED_VERTICES);
outlist[outcount++] = vert_prev;
}
if (DIFFERENT_SIGNS(dp, dp_prev)) {
- struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
+ struct vertex_header *new_vert;
+
+ assert(tmpnr < MAX_CLIPPED_VERTICES+1);
+ new_vert = clipper->stage.tmp[tmpnr++];
+
+ assert(outcount < MAX_CLIPPED_VERTICES);
outlist[outcount++] = new_vert;
if (IS_NEGATIVE(dp)) {
@@ -317,12 +324,14 @@ do_clip_tri( struct draw_stage *stage,
if (clipper->flat) {
if (stage->draw->rasterizer->flatshade_first) {
if (inlist[0] != header->v[0]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[0]);
}
}
else {
if (inlist[0] != header->v[2]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
copy_colors(stage, inlist[0], header->v[2]);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index fff960c7eb..ed9a53e154 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -363,8 +363,12 @@ generate_pstip_fs(struct pstip_stage *pstip)
assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);
pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
-
+
FREE((void *)pstip_fs.tokens);
+
+ if (!pstip->fs->pstip_fs)
+ return FALSE;
+
return TRUE;
}
@@ -603,13 +607,16 @@ pstip_destroy(struct draw_stage *stage)
}
+/** Create a new polygon stipple drawing stage object */
static struct pstip_stage *
-draw_pstip_stage(struct draw_context *draw)
+draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe)
{
struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage);
if (pstip == NULL)
goto fail;
+ pstip->pipe = pipe;
+
pstip->stage.draw = draw;
pstip->stage.name = "pstip";
pstip->stage.next = NULL;
@@ -765,14 +772,12 @@ draw_install_pstipple_stage(struct draw_context *draw,
/*
* Create / install pgon stipple drawing / prim stage
*/
- pstip = draw_pstip_stage( draw );
+ pstip = draw_pstip_stage( draw, pipe );
if (pstip == NULL)
goto fail;
draw->pipeline.pstipple = &pstip->stage;
- pstip->pipe = pipe;
-
/* create special texture, sampler state */
if (!pstip_create_texture(pstip))
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 3e6e538995..ee2945c7c9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -226,6 +226,7 @@ static void widepoint_first_point( struct draw_stage *stage,
if (rast->gl_rasterization_rules) {
wide->xbias = 0.125;
+ wide->ybias = -0.125;
}
/* Disable triangle culling, stippling, unfilled mode etc. */
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 4584033bc2..058aeedc17 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -48,6 +48,7 @@
#ifdef HAVE_LLVM
#include <llvm-c/ExecutionEngine.h>
+struct draw_llvm;
#endif
@@ -81,6 +82,9 @@ struct vertex_header {
#define UNDEFINED_VERTEX_ID 0xffff
+/* maximum number of shader variants we can cache */
+#define DRAW_MAX_SHADER_VARIANTS 1024
+
/**
* Private context for the drawing module.
*/
@@ -245,6 +249,7 @@ struct draw_context
*/
float plane[12][4];
unsigned nr_planes;
+ boolean depth_clamp;
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
@@ -259,9 +264,15 @@ struct draw_context
unsigned instance_id;
#ifdef HAVE_LLVM
+ struct draw_llvm *llvm;
LLVMExecutionEngineRef engine;
#endif
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_sampler_views;
+ const struct pipe_sampler_state *samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned num_samplers;
+
void *driver_private;
};
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 02c97fec81..92d4113b4c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -34,9 +34,11 @@
#include "draw/draw_gs.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_math.h"
#include "util/u_prim.h"
+#include "util/u_format.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
@@ -69,7 +71,6 @@ draw_pt_arrays(struct draw_context *draw,
struct draw_pt_front_end *frontend = NULL;
struct draw_pt_middle_end *middle = NULL;
unsigned opt = 0;
- unsigned out_prim = prim;
/* Sanitize primitive length:
*/
@@ -80,18 +81,19 @@ draw_pt_arrays(struct draw_context *draw,
if (count < first)
return TRUE;
}
- if (draw->gs.geometry_shader) {
- out_prim = draw->gs.geometry_shader->output_primitive;
- }
if (!draw->force_passthrough) {
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
if (!draw->render) {
opt |= PT_PIPELINE;
}
if (draw_need_pipeline(draw,
draw->rasterizer,
- out_prim)) {
+ gs_out_prim)) {
opt |= PT_PIPELINE;
}
@@ -102,7 +104,7 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_SHADE;
}
- if (draw->pt.middle.llvm && !draw->gs.geometry_shader) {
+ if (draw->pt.middle.llvm) {
middle = draw->pt.middle.llvm;
} else {
if (opt == 0)
@@ -122,7 +124,7 @@ draw_pt_arrays(struct draw_context *draw,
frontend = draw->pt.front.varray;
}
- frontend->prepare( frontend, prim, out_prim, middle, opt );
+ frontend->prepare( frontend, prim, middle, opt );
frontend->run(frontend,
draw_pt_elt_func(draw),
@@ -265,31 +267,38 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
case PIPE_FORMAT_R32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f @ %p\n", v[0], (void *) v);
+ debug_printf("R %f @ %p\n", v[0], (void *) v);
}
break;
case PIPE_FORMAT_R32G32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v);
+ debug_printf("RG %f %f @ %p\n", v[0], v[1], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
+ debug_printf("RGB %f %f %f @ %p\n", v[0], v[1], v[2], (void *) v);
}
break;
case PIPE_FORMAT_R32G32B32A32_FLOAT:
{
float *v = (float *) ptr;
- debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
+ debug_printf("RGBA %f %f %f %f @ %p\n", v[0], v[1], v[2], v[3],
(void *) v);
}
break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ {
+ ubyte *u = (ubyte *) ptr;
+ debug_printf("BGRA %d %d %d %d @ %p\n", u[0], u[1], u[2], u[3],
+ (void *) u);
+ }
+ break;
default:
- debug_printf("other format (fix me)\n");
- ;
+ debug_printf("other format %s (fix me)\n",
+ util_format_name(draw->pt.vertex_element[j].src_format));
}
}
}
@@ -297,11 +306,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
+ * Non-instanced drawing.
+ * \sa draw_arrays_instanced
*/
void
draw_arrays(struct draw_context *draw, unsigned prim,
@@ -310,6 +316,20 @@ draw_arrays(struct draw_context *draw, unsigned prim,
draw_arrays_instanced(draw, prim, start, count, 0, 1);
}
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module.
+ * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range()
+ * function should have already been called to specify the element/index buffer
+ * information.
+ *
+ * \param prim one of PIPE_PRIM_x
+ * \param start index of first vertex to draw
+ * \param count number of vertices to draw
+ * \param startInstance number for the first primitive instance (usually 0).
+ * \param instanceCount number of instances to draw (1=non-instanced)
+ */
void
draw_arrays_instanced(struct draw_context *draw,
unsigned mode,
@@ -329,26 +349,30 @@ draw_arrays_instanced(struct draw_context *draw,
if (0)
draw_print_arrays(draw, mode, start, MIN2(count, 20));
-#if 0
- {
- int i;
+ if (0) {
+ unsigned int i;
debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
mode, start, count);
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
debug_printf("Elements:\n");
for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- debug_printf(" format=%s\n",
+ debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n",
+ i,
+ draw->pt.vertex_element[i].src_offset,
+ draw->pt.vertex_element[i].instance_divisor,
+ draw->pt.vertex_element[i].vertex_buffer_index,
util_format_name(draw->pt.vertex_element[i].src_format));
}
debug_printf("Buffers:\n");
for (i = 0; i < draw->pt.nr_vertex_buffers; i++) {
- debug_printf(" stride=%u offset=%u ptr=%p\n",
+ debug_printf(" %u: stride=%u maxindex=%u offset=%u ptr=%p\n",
+ i,
draw->pt.vertex_buffer[i].stride,
+ draw->pt.vertex_buffer[i].max_index,
draw->pt.vertex_buffer[i].buffer_offset,
draw->pt.user.vbuffer[i]);
}
}
-#endif
for (instance = 0; instance < instanceCount; instance++) {
draw->instance_id = instance + startInstance;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index b6741ca83c..44356fba4c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -62,8 +62,7 @@ struct draw_vertex_info;
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
- unsigned input_prim,
- unsigned output_prim,
+ unsigned prim,
struct draw_pt_middle_end *,
unsigned opt );
@@ -87,8 +86,7 @@ struct draw_pt_front_end {
*/
struct draw_pt_middle_end {
void (*prepare)( struct draw_pt_middle_end *,
- unsigned input_prim,
- unsigned output_prim,
+ unsigned prim,
unsigned opt,
unsigned *max_vertices );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index bf799db352..ae12ee24bd 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -68,31 +68,12 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
fetch->vertex_size = vertex_size;
- /* Always emit/leave space for a vertex header.
- *
- * It's worth considering whether the vertex headers should contain
- * a pointer to the 'data', rather than having it inline.
- * Something to look at after we've fully switched over to the pt
- * paths.
+ /* Leave the clipmask/edgeflags/pad/vertex_id untouched
*/
- {
- /* Need to set header->vertex_id = 0xffff somehow.
- */
- key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
- key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].input_buffer = draw->pt.nr_vertex_buffers;
- key.element[nr].input_offset = 0;
- key.element[nr].instance_divisor = 0;
- key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
- key.element[nr].output_offset = dst_offset;
- dst_offset += 1 * sizeof(float);
- nr++;
-
-
- /* Just leave the clip[] array untouched.
- */
- dst_offset += 4 * sizeof(float);
- }
+ dst_offset += 1 * sizeof(float);
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
if (instance_id_index != ~0) {
num_extra_inputs++;
@@ -131,26 +112,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch,
key.nr_elements = nr;
key.output_stride = vertex_size;
-
if (!fetch->translate ||
translate_key_compare(&fetch->translate->key, &key) != 0)
{
translate_key_sanitize(&key);
fetch->translate = translate_cache_find(fetch->cache, &key);
-
- {
- static struct vertex_header vh = { 0,
- 1,
- 0,
- UNDEFINED_VERTEX_ID,
- { .0f, .0f, .0f, .0f } };
-
- fetch->translate->set_buffer(fetch->translate,
- draw->pt.nr_vertex_buffers,
- &vh,
- 0,
- ~0);
- }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index c629d55563..5c8af17c8e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -36,6 +36,7 @@
#include "draw/draw_vbuf.h"
#include "draw/draw_vertex.h"
#include "draw/draw_pt.h"
+#include "draw/draw_gs.h"
#include "translate/translate.h"
#include "translate/translate_cache.h"
@@ -90,7 +91,6 @@ struct fetch_emit_middle_end {
static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
unsigned prim,
- unsigned out_prim,
unsigned opt,
unsigned *max_vertices )
{
@@ -101,9 +101,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
boolean ok;
struct translate_key key;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
+
ok = draw->render->set_primitive( draw->render,
- out_prim );
+ gs_out_prim );
if (!ok) {
assert(0);
return;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index 5483a25f1d..b8270280b6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -68,8 +68,7 @@ struct fetch_shade_emit {
static void fse_prepare( struct draw_pt_middle_end *middle,
- unsigned in_prim,
- unsigned out_prim,
+ unsigned prim,
unsigned opt,
unsigned *max_vertices )
{
@@ -80,9 +79,12 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
unsigned i;
unsigned nr_vbs = 0;
+ /* Can't support geometry shader on this path.
+ */
+ assert(!draw->gs.geometry_shader);
if (!draw->render->set_primitive( draw->render,
- out_prim )) {
+ prim )) {
assert(0);
return;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 43b08a030c..121dfc414a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -48,13 +48,11 @@ struct fetch_pipeline_middle_end {
unsigned vertex_data_offset;
unsigned vertex_size;
unsigned input_prim;
- unsigned output_prim;
unsigned opt;
};
static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
- unsigned in_prim,
- unsigned out_prim,
+ unsigned prim,
unsigned opt,
unsigned *max_vertices )
{
@@ -64,6 +62,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned i;
unsigned instance_id_index = ~0;
+ unsigned gs_out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
@@ -79,8 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
}
}
- fpme->input_prim = in_prim;
- fpme->output_prim = out_prim;
+ fpme->input_prim = prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -102,13 +103,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
(boolean)draw->bypass_clipping,
(boolean)draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
draw_pt_so_emit_prepare( fpme->so_emit );
if (!(opt & PT_PIPELINE)) {
draw_pt_emit_prepare( fpme->emit,
- out_prim,
+ gs_out_prim,
max_vertices );
*max_vertices = MAX2( *max_vertices,
@@ -183,7 +184,7 @@ static void draw_vertex_shader_run(struct draw_vertex_shader *vshader,
output_verts->count = input_verts->count;
output_verts->verts =
(struct vertex_header *)MALLOC(output_verts->vertex_size *
- output_verts->count);
+ align(output_verts->count, 4));
vshader->run_linear(vshader,
(const float (*)[4])input_verts->verts->data,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 7d2de58e73..bc074df8c2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -49,48 +49,50 @@ struct llvm_middle_end {
unsigned vertex_data_offset;
unsigned vertex_size;
unsigned input_prim;
- unsigned output_prim;
unsigned opt;
struct draw_llvm *llvm;
- struct draw_llvm_variant *variants;
struct draw_llvm_variant *current_variant;
- int nr_variants;
};
static void
llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
unsigned in_prim,
- unsigned out_prim,
unsigned opt,
unsigned *max_vertices )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_context *draw = fpme->draw;
- struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+ struct llvm_vertex_shader *shader =
+ llvm_vertex_shader(draw->vs.vertex_shader);
struct draw_llvm_variant_key key;
struct draw_llvm_variant *variant = NULL;
+ struct draw_llvm_variant_list_item *li;
unsigned i;
unsigned instance_id_index = ~0;
+
+ unsigned out_prim = (draw->gs.geometry_shader ?
+ draw->gs.geometry_shader->output_primitive :
+ in_prim);
+
/* Add one to num_outputs because the pipeline occasionally tags on
* an additional texcoord, eg for AA lines.
*/
- unsigned nr = MAX2( vs->info.num_inputs,
- vs->info.num_outputs + 1 );
+ unsigned nr = MAX2( shader->base.info.num_inputs,
+ shader->base.info.num_outputs + 1 );
/* Scan for instanceID system value.
*/
- for (i = 0; i < vs->info.num_inputs; i++) {
- if (vs->info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
+ for (i = 0; i < shader->base.info.num_inputs; i++) {
+ if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) {
instance_id_index = i;
break;
}
}
fpme->input_prim = in_prim;
- fpme->output_prim = out_prim;
fpme->opt = opt;
/* Always leave room for the vertex header whether we need it or
@@ -107,7 +109,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
(boolean)draw->bypass_clipping,
(boolean)(draw->identity_viewport),
(boolean)draw->rasterizer->gl_rasterization_rules,
- (draw->vs.edgeflag_output ? true : false) );
+ (draw->vs.edgeflag_output ? TRUE : FALSE) );
draw_pt_so_emit_prepare( fpme->so_emit );
@@ -128,20 +130,41 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
draw_llvm_make_variant_key(fpme->llvm, &key);
- variant = fpme->variants;
- while(variant) {
- if(memcmp(&variant->key, &key, sizeof key) == 0)
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ variant = li->base;
break;
+ }
+ li = next_elem(li);
+ }
- variant = variant->next;
+ if (variant) {
+ move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
}
+ else {
+ unsigned i;
+ if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ /*
+ * XXX: should we flush here ?
+ */
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ struct draw_llvm_variant_list_item *item =
+ last_elem(&fpme->llvm->vs_variants_list);
+ draw_llvm_destroy_variant(item->base);
+ }
+ }
+
+ variant = draw_llvm_create_variant(fpme->llvm, nr);
- if (!variant) {
- variant = draw_llvm_prepare(fpme->llvm, nr);
- variant->next = fpme->variants;
- fpme->variants = variant;
- ++fpme->nr_variants;
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global);
+ fpme->llvm->nr_variants++;
+ shader->variants_cached++;
+ }
}
+
fpme->current_variant = variant;
/*XXX we only support one constant buffer */
@@ -210,7 +233,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle,
fetch_info->start,
fetch_info->count,
fpme->vertex_size,
- draw->pt.vertex_buffer );
+ draw->pt.vertex_buffer,
+ draw->instance_id);
else
fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context,
llvm_vert_info.verts,
@@ -218,7 +242,8 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle,
fetch_info->elts,
fetch_info->count,
fpme->vertex_size,
- draw->pt.vertex_buffer);
+ draw->pt.vertex_buffer,
+ draw->instance_id);
/* Finished with fetch and vs:
*/
@@ -356,31 +381,7 @@ static void llvm_middle_end_finish( struct draw_pt_middle_end *middle )
static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
- struct draw_context *draw = fpme->draw;
- struct draw_llvm_variant *variant = NULL;
-
- variant = fpme->variants;
- while(variant) {
- struct draw_llvm_variant *next = variant->next;
- if (variant->function_elts) {
- if (variant->function_elts)
- LLVMFreeMachineCodeForFunction(draw->engine,
- variant->function_elts);
- LLVMDeleteFunction(variant->function_elts);
- }
-
- if (variant->function) {
- if (variant->function)
- LLVMFreeMachineCodeForFunction(draw->engine,
- variant->function);
- LLVMDeleteFunction(variant->function);
- }
-
- FREE(variant);
-
- variant = next;
- }
if (fpme->fetch)
draw_pt_fetch_destroy( fpme->fetch );
@@ -393,14 +394,12 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle )
if (fpme->post_vs)
draw_pt_post_vs_destroy( fpme->post_vs );
- if (fpme->llvm)
- draw_llvm_destroy( fpme->llvm );
-
FREE(middle);
}
-struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_context *draw )
+struct draw_pt_middle_end *
+draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw)
{
struct llvm_middle_end *fpme = 0;
@@ -436,13 +435,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont
if (!fpme->so_emit)
goto fail;
- fpme->llvm = draw_llvm_create(draw);
+ fpme->llvm = draw->llvm;
if (!fpme->llvm)
goto fail;
- fpme->variants = NULL;
fpme->current_variant = NULL;
- fpme->nr_variants = 0;
return &fpme->base;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 112be50f9a..308f927b77 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -38,7 +38,14 @@ struct pt_post_vs {
struct draw_vertex_info *info );
};
-
+static INLINE void
+initialize_vertex_header(struct vertex_header *header)
+{
+ header->clipmask = 0;
+ header->edgeflag = 1;
+ header->pad = 0;
+ header->vertex_id = UNDEFINED_VERTEX_ID;
+}
static INLINE float
dot4(const float *a, const float *b)
@@ -49,10 +56,9 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
-
-
static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
+compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr,
+ boolean clip_depth)
{
unsigned mask = 0x0;
unsigned i;
@@ -69,8 +75,10 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
if ( clip[0] + clip[3] < 0) mask |= (1<<1);
if (-clip[1] + clip[3] < 0) mask |= (1<<2);
if ( clip[1] + clip[3] < 0) mask |= (1<<3);
- if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
- if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
+ if (clip_depth) {
+ if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
+ if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
+ }
/* Followed by any remaining ones:
*/
@@ -103,6 +111,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
for (j = 0; j < info->count; j++) {
float *position = out->data[pos];
+ initialize_vertex_header(out);
#if 0
debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
j, out, position, position[0], position[1], position[2], position[3]);
@@ -114,9 +123,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
out->clip[3] = position[3];
out->vertex_id = 0xffff;
+ /* Disable depth clipping if depth clamping is enabled. */
out->clipmask = compute_clipmask_gl(out->clip,
pvs->draw->plane,
- pvs->draw->nr_planes);
+ pvs->draw->nr_planes,
+ !pvs->draw->depth_clamp);
clipped += out->clipmask;
if (out->clipmask == 0)
@@ -192,6 +203,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs,
for (j = 0; j < info->count; j++) {
float *position = out->data[pos];
+ initialize_vertex_header(out);
/* Viewport mapping only, no cliptest/rhw divide
*/
position[0] = position[0] * scale[0] + trans[0];
@@ -211,7 +223,16 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs,
static boolean post_vs_none( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
+ struct vertex_header *out = info->verts;
+ unsigned j;
+
if (0) debug_printf("%s\n", __FUNCTION__);
+ /* just initialize the vertex_id in all headers */
+ for (j = 0; j < info->count; j++) {
+ initialize_vertex_header(out);
+
+ out = (struct vertex_header *)((char *)out + info->stride);
+ }
return FALSE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
index 5ea833032f..cd7bb7bf25 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ b/src/gallium/auxiliary/draw/draw_pt_varray.c
@@ -120,24 +120,27 @@ static void varray_fan_segment(struct varray_frontend *varray,
#define FUNC varray_run
#include "draw_pt_varray_tmp_linear.h"
-static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = {
+static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
PIPE_PRIM_POINTS,
PIPE_PRIM_LINES,
PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
PIPE_PRIM_LINE_STRIP,
PIPE_PRIM_TRIANGLES,
PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
+ PIPE_PRIM_TRIANGLE_FAN,
PIPE_PRIM_QUADS,
PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON
+ PIPE_PRIM_POLYGON,
+ PIPE_PRIM_LINES_ADJACENCY,
+ PIPE_PRIM_LINE_STRIP_ADJACENCY,
+ PIPE_PRIM_TRIANGLES_ADJACENCY,
+ PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
};
static void varray_prepare(struct draw_pt_front_end *frontend,
unsigned in_prim,
- unsigned out_prim,
struct draw_pt_middle_end *middle,
unsigned opt)
{
@@ -146,11 +149,13 @@ static void varray_prepare(struct draw_pt_front_end *frontend,
varray->base.run = varray_run;
varray->input_prim = in_prim;
- varray->output_prim = decompose_prim[out_prim];
+ assert(in_prim < Elements(decompose_prim));
+ varray->output_prim = decompose_prim[in_prim];
varray->middle = middle;
- middle->prepare(middle, varray->input_prim,
- varray->output_prim, opt, &varray->driver_fetch_max );
+ middle->prepare(middle,
+ varray->output_prim,
+ opt, &varray->driver_fetch_max );
/* check that the max is even */
assert((varray->driver_fetch_max & 1) == 0);
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 914c87a9dc..8ef94c3163 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -41,6 +41,7 @@
#define FETCH_MAX 256
#define DRAW_MAX (16*1024)
+
struct vcache_frontend {
struct draw_pt_front_end base;
struct draw_context *draw;
@@ -64,13 +65,13 @@ struct vcache_frontend {
unsigned opt;
};
+
static INLINE void
vcache_flush( struct vcache_frontend *vcache )
{
if (vcache->middle_prim != vcache->output_prim) {
vcache->middle_prim = vcache->output_prim;
vcache->middle->prepare( vcache->middle,
- vcache->input_prim,
vcache->middle_prim,
vcache->opt,
&vcache->fetch_max );
@@ -89,12 +90,12 @@ vcache_flush( struct vcache_frontend *vcache )
vcache->draw_count = 0;
}
+
static INLINE void
vcache_check_flush( struct vcache_frontend *vcache )
{
- if ( vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 4 >= FETCH_MAX )
- {
+ if (vcache->draw_count + 6 >= DRAW_MAX ||
+ vcache->fetch_count + 4 >= FETCH_MAX) {
vcache_flush( vcache );
}
}
@@ -146,6 +147,7 @@ vcache_triangle_flags( struct vcache_frontend *vcache,
vcache_check_flush(vcache);
}
+
static INLINE void
vcache_line( struct vcache_frontend *vcache,
unsigned i0,
@@ -177,6 +179,7 @@ vcache_point( struct vcache_frontend *vcache,
vcache_check_flush(vcache);
}
+
static INLINE void
vcache_quad( struct vcache_frontend *vcache,
unsigned i0,
@@ -196,6 +199,7 @@ vcache_quad( struct vcache_frontend *vcache,
}
}
+
static INLINE void
vcache_ef_quad( struct vcache_frontend *vcache,
unsigned i0,
@@ -231,6 +235,7 @@ vcache_ef_quad( struct vcache_frontend *vcache,
}
}
+
/* At least for now, we're back to using a template include file for
* this. The two paths aren't too different though - it may be
* possible to reunify them.
@@ -256,23 +261,23 @@ rebase_uint_elts( const unsigned *src,
ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
+
static INLINE void
rebase_ushort_elts( const ushort *src,
unsigned count,
int delta,
- ushort *dest )
+ ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
+
static INLINE void
rebase_ubyte_elts( const ubyte *src,
unsigned count,
@@ -280,42 +285,39 @@ rebase_ubyte_elts( const ubyte *src,
ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i] + delta);
}
-
static INLINE void
translate_uint_elts( const unsigned *src,
unsigned count,
ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
+
static INLINE void
translate_ushort_elts( const ushort *src,
unsigned count,
ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
+
static INLINE void
translate_ubyte_elts( const ubyte *src,
unsigned count,
ushort *dest )
{
unsigned i;
-
for (i = 0; i < count; i++)
dest[i] = (ushort)(src[i]);
}
@@ -336,6 +338,7 @@ format_from_get_elt( pt_elt_func get_elt )
}
#endif
+
static INLINE void
vcache_check_run( struct draw_pt_front_end *frontend,
pt_elt_func get_elt,
@@ -345,18 +348,46 @@ vcache_check_run( struct draw_pt_front_end *frontend,
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
struct draw_context *draw = vcache->draw;
- unsigned min_index = draw->pt.user.min_index;
- unsigned max_index = draw->pt.user.max_index;
- unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count = max_index + 1 - min_index;
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const unsigned index_size = draw->pt.user.eltSize;
+ unsigned fetch_count;
const ushort *transformed_elts;
ushort *storage = NULL;
boolean ok = FALSE;
+ /* debug: verify indexes are in range [min_index, max_index] */
+ if (0) {
+ unsigned i;
+ for (i = 0; i < draw_count; i++) {
+ if (index_size == 1) {
+ assert( ((const ubyte *) elts)[i] >= min_index);
+ assert( ((const ubyte *) elts)[i] <= max_index);
+ }
+ else if (index_size == 2) {
+ assert( ((const ushort *) elts)[i] >= min_index);
+ assert( ((const ushort *) elts)[i] <= max_index);
+ }
+ else {
+ assert(index_size == 4);
+ assert( ((const uint *) elts)[i] >= min_index);
+ assert( ((const uint *) elts)[i] <= max_index);
+ }
+ }
+ }
+
+ /* Note: max_index is frequently 0xffffffff so we have to be sure
+ * that any arithmetic involving max_index doesn't overflow!
+ */
+ if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
+ goto fail;
- if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
+ fetch_count = max_index + 1 - min_index;
+
+ if (0)
+ debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
+ vcache->fetch_max,
+ draw_count);
if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
fetch_count >= UNDEFINED_VERTEX_ID ||
@@ -368,23 +399,19 @@ vcache_check_run( struct draw_pt_front_end *frontend,
if (vcache->middle_prim != vcache->input_prim) {
vcache->middle_prim = vcache->input_prim;
vcache->middle->prepare( vcache->middle,
- vcache->input_prim,
vcache->middle_prim,
vcache->opt,
&vcache->fetch_max );
}
-
assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
(elt_bias < 0 && min_index + elt_bias < min_index));
if (min_index == 0 &&
- index_size == 2)
- {
+ index_size == 2) {
transformed_elts = (const ushort *)elts;
}
- else
- {
+ else {
storage = MALLOC( draw_count * sizeof(ushort) );
if (!storage)
goto fail;
@@ -419,23 +446,23 @@ vcache_check_run( struct draw_pt_front_end *frontend,
switch(index_size) {
case 1:
rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
+ draw_count,
+ 0 - (int)min_index,
+ storage );
break;
case 2:
rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
+ draw_count,
+ 0 - (int)min_index,
+ storage );
break;
case 4:
rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
+ draw_count,
+ 0 - (int)min_index,
+ storage );
break;
default:
@@ -462,7 +489,7 @@ vcache_check_run( struct draw_pt_front_end *frontend,
debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
fetch_count, draw_count);
- fail:
+fail:
vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
}
@@ -472,23 +499,26 @@ vcache_check_run( struct draw_pt_front_end *frontend,
static void
vcache_prepare( struct draw_pt_front_end *frontend,
unsigned in_prim,
- unsigned out_prim,
struct draw_pt_middle_end *middle,
unsigned opt )
{
struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- if (opt & PT_PIPELINE)
- {
+ if (opt & PT_PIPELINE) {
vcache->base.run = vcache_run_extras;
}
- else
- {
+ else {
vcache->base.run = vcache_check_run;
}
+ /* VCache will always emit the reduced version of its input
+ * primitive, ie STRIP/FANS become TRIS, etc.
+ *
+ * This is not to be confused with what the GS might be up to,
+ * which is a separate issue.
+ */
vcache->input_prim = in_prim;
- vcache->output_prim = u_reduced_prim(out_prim);
+ vcache->output_prim = u_reduced_prim(in_prim);
vcache->middle = middle;
vcache->opt = opt;
@@ -496,12 +526,13 @@ vcache_prepare( struct draw_pt_front_end *frontend,
/* Have to run prepare here, but try and guess a good prim for
* doing so:
*/
- vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim;
- middle->prepare( middle, vcache->input_prim,
- vcache->middle_prim, opt, &vcache->fetch_max );
-}
-
+ vcache->middle_prim = (opt & PT_PIPELINE)
+ ? vcache->output_prim : vcache->input_prim;
+ middle->prepare( middle,
+ vcache->middle_prim,
+ opt, &vcache->fetch_max );
+}
static void
@@ -512,6 +543,7 @@ vcache_finish( struct draw_pt_front_end *frontend )
vcache->middle = NULL;
}
+
static void
vcache_destroy( struct draw_pt_front_end *frontend )
{
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index b9db886a24..57ea63fc06 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -98,6 +98,11 @@ draw_create_vertex_shader(struct draw_context *draw,
vs = draw_create_vs_ppc( draw, shader );
#endif
}
+#if HAVE_LLVM
+ else {
+ vs = draw_create_vs_llvm(draw, shader);
+ }
+#endif
if (!vs) {
vs = draw_create_vs_exec( draw, shader );
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 6c7e94db43..a731994523 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -165,7 +165,6 @@ draw_create_vs_ppc(struct draw_context *draw,
const struct pipe_shader_state *templ);
-
struct draw_vs_varient_key;
struct draw_vertex_shader;
@@ -173,6 +172,11 @@ struct draw_vs_varient *
draw_vs_create_varient_aos_sse( struct draw_vertex_shader *vs,
const struct draw_vs_varient_key *key );
+#if HAVE_LLVM
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state);
+#endif
/********************************************************************************
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
new file mode 100644
index 0000000000..6c13df7913
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -0,0 +1,120 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+#include "draw_vs.h"
+#include "draw_llvm.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
+static void
+vs_llvm_prepare(struct draw_vertex_shader *shader,
+ struct draw_context *draw)
+{
+ /*struct llvm_vertex_shader *evs = llvm_vertex_shader(shader);*/
+}
+
+static void
+vs_llvm_run_linear( struct draw_vertex_shader *shader,
+ const float (*input)[4],
+ float (*output)[4],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ unsigned count,
+ unsigned input_stride,
+ unsigned output_stride )
+{
+ /* we should never get here since the entire pipeline is
+ * generated in draw_pt_fetch_shade_pipeline_llvm.c */
+ debug_assert(0);
+}
+
+
+static void
+vs_llvm_delete( struct draw_vertex_shader *dvs )
+{
+ struct llvm_vertex_shader *shader = llvm_vertex_shader(dvs);
+ struct pipe_fence_handle *fence = NULL;
+ struct draw_llvm_variant_list_item *li;
+ struct pipe_context *pipe = dvs->draw->pipe;
+
+ /*
+ * XXX: This might be not neccessary at all.
+ */
+ pipe->flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct draw_llvm_variant_list_item *next = next_elem(li);
+ draw_llvm_destroy_variant(li->base);
+ li = next;
+ }
+
+ assert(shader->variants_cached == 0);
+ FREE((void*) dvs->state.tokens);
+ FREE( dvs );
+}
+
+
+struct draw_vertex_shader *
+draw_create_vs_llvm(struct draw_context *draw,
+ const struct pipe_shader_state *state)
+{
+ struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader );
+
+ if (vs == NULL)
+ return NULL;
+
+ /* we make a private copy of the tokens */
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+
+ vs->base.draw = draw;
+ vs->base.prepare = vs_llvm_prepare;
+ vs->base.run_linear = vs_llvm_run_linear;
+ vs->base.delete = vs_llvm_delete;
+ vs->base.create_varient = draw_vs_create_varient_generic;
+
+ make_empty_list(&vs->variants);
+
+ return &vs->base;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index d926b2de18..f5f2623e46 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -56,7 +56,6 @@
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
-#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -847,6 +846,11 @@ lp_build_round_sse41(struct lp_build_context *bld,
}
+/**
+ * Return the integer part of a float (vector) value. The returned value is
+ * a float (vector).
+ * Ex: trunc(-1.5) = 1.0
+ */
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a)
@@ -869,6 +873,12 @@ lp_build_trunc(struct lp_build_context *bld,
}
+/**
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is a float (vector).
+ * Ex: round(0.9) = 1.0
+ * Ex: round(-1.5) = -2.0
+ */
LLVMValueRef
lp_build_round(struct lp_build_context *bld,
LLVMValueRef a)
@@ -890,6 +900,11 @@ lp_build_round(struct lp_build_context *bld,
}
+/**
+ * Return floor of float (vector), result is a float (vector)
+ * Ex: floor(1.1) = 1.0
+ * Ex: floor(-1.1) = -2.0
+ */
LLVMValueRef
lp_build_floor(struct lp_build_context *bld,
LLVMValueRef a)
@@ -911,6 +926,11 @@ lp_build_floor(struct lp_build_context *bld,
}
+/**
+ * Return ceiling of float (vector), returning float (vector).
+ * Ex: ceil( 1.1) = 2.0
+ * Ex: ceil(-1.1) = -1.0
+ */
LLVMValueRef
lp_build_ceil(struct lp_build_context *bld,
LLVMValueRef a)
@@ -933,7 +953,7 @@ lp_build_ceil(struct lp_build_context *bld,
/**
- * Return fractional part of 'a' computed as a - floor(f)
+ * Return fractional part of 'a' computed as a - floor(a)
* Typically used in texture coord arithmetic.
*/
LLVMValueRef
@@ -946,8 +966,9 @@ lp_build_fract(struct lp_build_context *bld,
/**
- * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating toward zero.
+ * Return the integer part of a float (vector) value. The returned value is
+ * an integer (vector).
+ * Ex: itrunc(-1.5) = 1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
@@ -964,7 +985,10 @@ lp_build_itrunc(struct lp_build_context *bld,
/**
- * Convert float[] to int[] with round().
+ * Return float (vector) rounded to nearest integer (vector). The returned
+ * value is an integer (vector).
+ * Ex: iround(0.9) = 1
+ * Ex: iround(-1.5) = -2
*/
LLVMValueRef
lp_build_iround(struct lp_build_context *bld,
@@ -1007,7 +1031,9 @@ lp_build_iround(struct lp_build_context *bld,
/**
- * Convert float[] to int[] with floor().
+ * Return floor of float (vector), result is an int (vector)
+ * Ex: ifloor(1.1) = 1.0
+ * Ex: ifloor(-1.1) = -2.0
*/
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
@@ -1034,29 +1060,31 @@ lp_build_ifloor(struct lp_build_context *bld,
/* sign = a < 0 ? ~0 : 0 */
sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "");
- lp_build_name(sign, "floor.sign");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
/* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
offset = LLVMConstBitCast(offset, int_vec_type);
- /* offset = a < 0 ? -0.99999(9)f : 0.0f */
+ /* offset = a < 0 ? offset : 0.0f */
offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
- lp_build_name(offset, "floor.offset");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
- res = LLVMBuildAdd(bld->builder, a, offset, "");
- lp_build_name(res, "floor.res");
+ res = LLVMBuildAdd(bld->builder, a, offset, "ifloor.res");
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
- lp_build_name(res, "floor");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "ifloor.res");
return res;
}
+/**
+ * Return ceiling of float (vector), returning int (vector).
+ * Ex: iceil( 1.1) = 2
+ * Ex: iceil(-1.1) = -1
+ */
LLVMValueRef
lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
@@ -1072,12 +1100,31 @@ lp_build_iceil(struct lp_build_context *bld,
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- /* TODO: mimic lp_build_ifloor() here */
- assert(0);
- res = bld->undef;
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = 0.99999(9)f */
+ offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+
+ res = LLVMBuildAdd(bld->builder, a, offset, "iceil.res");
}
- res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+ /* round to nearest (toward zero) */
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "iceil.res");
return res;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index d46b9f882b..7ee8fff140 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -107,4 +107,12 @@ lp_build_const_mask_aos(struct lp_type type,
const boolean cond[4]);
+static INLINE LLVMValueRef
+lp_build_const_int32(int i)
+{
+ return LLVMConstInt(LLVMInt32Type(), i, 0);
+}
+
+
+
#endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 3f7f2ebde9..77012f1fac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -83,6 +83,9 @@
*
* Although the result values can be scaled to an arbitrary bit width specified
* by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
*/
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
/**
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
*/
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -219,18 +224,19 @@ lp_build_conv(LLVMBuilderRef builder,
unsigned num_tmps;
unsigned i;
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
/* We must not loose or gain channels. Only precision */
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
tmp_type = src_type;
- for(i = 0; i < num_srcs; ++i)
+ for(i = 0; i < num_srcs; ++i) {
+ assert(lp_check_value(src_type, src[i]));
tmp[i] = src[i];
+ }
num_tmps = num_srcs;
/*
@@ -326,30 +332,25 @@ lp_build_conv(LLVMBuilderRef builder,
/*
* Truncate or expand bit width
+ *
+ * No data conversion should happen here, although the sign bits are
+ * crucial to avoid bad clamping.
*/
- assert(!tmp_type.floating || tmp_type.width == dst_type.width);
+ {
+ struct lp_type new_type;
- if(tmp_type.width > dst_type.width) {
- assert(num_dsts == 1);
- tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
- num_tmps = 1;
- }
+ new_type = tmp_type;
+ new_type.sign = dst_type.sign;
+ new_type.width = dst_type.width;
+ new_type.length = dst_type.length;
+
+ lp_build_resize(builder, tmp_type, new_type, tmp, num_srcs, tmp, num_dsts);
- if(tmp_type.width < dst_type.width) {
- assert(num_tmps == 1);
- lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
- tmp_type.width = dst_type.width;
- tmp_type.length = dst_type.length;
+ tmp_type = new_type;
num_tmps = num_dsts;
}
- assert(tmp_type.width == dst_type.width);
- assert(tmp_type.length == dst_type.length);
- assert(num_tmps == num_dsts);
-
/*
* Scale to the widest range
*/
@@ -406,8 +407,10 @@ lp_build_conv(LLVMBuilderRef builder,
}
}
- for(i = 0; i < num_dsts; ++i)
+ for(i = 0; i < num_dsts; ++i) {
dst[i] = tmp[i];
+ assert(lp_check_value(dst_type, dst[i]));
+ }
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 5f5036e7bd..60e22d727a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -48,9 +48,9 @@ struct lp_build_context;
*/
LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed);
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled);
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
@@ -60,7 +60,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j);
@@ -72,7 +74,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
void
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
struct lp_build_context *bld,
- const LLVMValueRef *unswizzled,
+ const LLVMValueRef unswizzled[4],
LLVMValueRef swizzled_out[4]);
void
@@ -82,6 +84,11 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef packed,
LLVMValueRef rgba_out[4]);
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba);
void
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
@@ -93,5 +100,18 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef j,
LLVMValueRef rgba_out[4]);
+/*
+ * YUV
+ */
+
+
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j);
#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 87e3e72a6e..0f01fc1d75 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -38,33 +38,122 @@
#include "util/u_math.h"
#include "util/u_string.h"
+#include "lp_bld_arit.h"
#include "lp_bld_init.h"
#include "lp_bld_type.h"
#include "lp_bld_flow.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
/**
+ * Basic swizzling. Rearrange the order of the unswizzled array elements
+ * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported
+ * too.
+ * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}.
+ */
+LLVMValueRef
+lp_build_format_swizzle_aos(const struct util_format_description *desc,
+ struct lp_build_context *bld,
+ LLVMValueRef unswizzled)
+{
+ unsigned char swizzles[4];
+ unsigned chan;
+
+ assert(bld->type.length % 4 == 0);
+
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ /*
+ * For ZS formats do RGBA = ZZZ1
+ */
+ if (chan == 3) {
+ swizzle = UTIL_FORMAT_SWIZZLE_1;
+ } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
+ swizzle = UTIL_FORMAT_SWIZZLE_0;
+ } else {
+ swizzle = desc->swizzle[0];
+ }
+ } else {
+ swizzle = desc->swizzle[chan];
+ }
+ swizzles[chan] = swizzle;
+ }
+
+ return lp_build_swizzle_aos(bld, unswizzled, swizzles);
+}
+
+
+/**
+ * Whether the format matches the vector type, apart of swizzles.
+ */
+static INLINE boolean
+format_matches_type(const struct util_format_description *desc,
+ struct lp_type type)
+{
+ enum util_format_type chan_type;
+ unsigned chan;
+
+ assert(type.length % 4 == 0);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN ||
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB ||
+ desc->block.width != 1 ||
+ desc->block.height != 1) {
+ return FALSE;
+ }
+
+ if (type.floating) {
+ chan_type = UTIL_FORMAT_TYPE_FLOAT;
+ } else if (type.fixed) {
+ chan_type = UTIL_FORMAT_TYPE_FIXED;
+ } else if (type.sign) {
+ chan_type = UTIL_FORMAT_TYPE_SIGNED;
+ } else {
+ chan_type = UTIL_FORMAT_TYPE_UNSIGNED;
+ }
+
+ for (chan = 0; chan < desc->nr_channels; ++chan) {
+ if (desc->channel[chan].size != type.width) {
+ return FALSE;
+ }
+
+ if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) {
+ if (desc->channel[chan].type != chan_type ||
+ desc->channel[chan].normalized != type.norm) {
+ return FALSE;
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+
+/**
* Unpack a single pixel into its RGBA components.
*
* @param desc the pixel format for the packed pixel value
* @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
*
- * @return RGBA in a 4 floats vector.
+ * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
*/
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- const struct util_format_description *desc,
- LLVMValueRef packed)
+static INLINE LLVMValueRef
+lp_build_unpack_arith_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed)
{
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
LLVMValueRef masks[4];
LLVMValueRef scales[4];
- LLVMValueRef swizzles[4];
- LLVMValueRef aux[4];
+
boolean normalized;
- int empty_channel;
boolean needs_uitofp;
unsigned shift;
unsigned i;
@@ -77,8 +166,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Do the intermediate integer computations with 32bit integers since it
* matches floating point size */
- if (desc->block.bits < 32)
- packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+ assert (LLVMTypeOf(packed) == LLVMInt32Type());
/* Broadcast the packed value to all four channels
* before: packed = BGRA
@@ -98,7 +186,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
/* Initialize vector constants */
normalized = FALSE;
needs_uitofp = FALSE;
- empty_channel = -1;
shift = 0;
/* Loop over 4 color components */
@@ -109,7 +196,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
shifts[i] = LLVMGetUndef(LLVMInt32Type());
masks[i] = LLVMConstNull(LLVMInt32Type());
scales[i] = LLVMConstNull(LLVMFloatType());
- empty_channel = i;
}
else {
unsigned long long mask = (1ULL << bits) - 1;
@@ -158,52 +244,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
else
scaled = casted;
- for (i = 0; i < 4; ++i)
- aux[i] = LLVMGetUndef(LLVMFloatType());
-
- /* Build swizzles vector to put components into R,G,B,A order */
- for (i = 0; i < 4; ++i) {
- enum util_format_swizzle swizzle;
-
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- /*
- * For ZS formats do RGBA = ZZZ1
- */
- if (i == 3) {
- swizzle = UTIL_FORMAT_SWIZZLE_1;
- } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) {
- swizzle = UTIL_FORMAT_SWIZZLE_0;
- } else {
- swizzle = desc->swizzle[0];
- }
- } else {
- swizzle = desc->swizzle[i];
- }
-
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_0:
- assert(empty_channel >= 0);
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), empty_channel, 0);
- break;
- case UTIL_FORMAT_SWIZZLE_1:
- swizzles[i] = LLVMConstInt(LLVMInt32Type(), 4, 0);
- aux[0] = LLVMConstReal(LLVMFloatType(), 1.0);
- break;
- case UTIL_FORMAT_SWIZZLE_NONE:
- swizzles[i] = LLVMGetUndef(LLVMFloatType());
- assert(0);
- break;
- }
- }
-
- return LLVMBuildShuffleVector(builder, scaled, LLVMConstVector(aux, 4),
- LLVMConstVector(swizzles, 4), "");
+ return scaled;
}
@@ -310,22 +351,65 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
}
+
+
/**
* Fetch a pixel into a 4 float AoS.
*
* \param format_desc describes format of the image we're fetching from
* \param ptr address of the pixel block (or the texel if uncompressed)
* \param i, j the sub-block pixel coordinates. For non-compressed formats
- * these will always be (0,).
- * \return valueRef with the float[4] RGBA pixel
+ * these will always be (0, 0).
+ * \return a 4 element vector with the pixel's RGBA values.
*/
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr,
+ struct lp_type type,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j)
{
+ unsigned num_pixels = type.length / 4;
+ struct lp_build_context bld;
+
+ assert(type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(type.length % 4 == 0);
+
+ lp_build_context_init(&bld, builder, type);
+
+ /*
+ * Trivial case
+ *
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ if (format_matches_type(format_desc, type) &&
+ format_desc->block.bits <= type.width * 4 &&
+ util_is_pot(format_desc->block.bits)) {
+ LLVMValueRef packed;
+
+ /*
+ * The format matches the type (apart of a swizzle) so no need for
+ * scaling or converting.
+ */
+
+ packed = lp_build_gather(builder, type.length/4,
+ format_desc->block.bits, type.width*4,
+ base_ptr, offset);
+
+ assert(format_desc->block.bits <= type.width * type.length);
+
+ packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(type), "");
+
+ return lp_build_format_swizzle_aos(format_desc, &bld, packed);
+ }
+
+ /*
+ * Bit arithmetic
+ */
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
@@ -337,21 +421,77 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->is_bitmask &&
!format_desc->is_mixed &&
(format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
- format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
- {
- LLVMValueRef packed;
+ format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) {
+
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
+
+ /*
+ * Unpack a pixel at a time into a <4 x float> RGBA vector
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef packed;
+
+ packed = lp_build_gather_elem(builder, num_pixels,
+ format_desc->block.bits, 32,
+ base_ptr, offset, k);
- ptr = LLVMBuildBitCast(builder, ptr,
- LLVMPointerType(LLVMIntType(format_desc->block.bits), 0) ,
- "");
+ tmps[k] = lp_build_unpack_arith_rgba_aos(builder, format_desc,
+ packed);
+ }
+
+ /*
+ * Type conversion.
+ *
+ * TODO: We could avoid floating conversion for integer to
+ * integer conversions.
+ */
- packed = LLVMBuildLoad(builder, ptr, "packed");
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
- return lp_build_unpack_rgba_aos(builder, format_desc, packed);
+ return lp_build_format_swizzle_aos(format_desc, &bld, res);
}
- else if (format_desc->fetch_rgba_float) {
+
+ /*
+ * YUV / subsampled formats
+ */
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = num_pixels * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_subsampled_rgba_aos(builder,
+ format_desc,
+ num_pixels,
+ base_ptr,
+ offset,
+ i, j);
+
+ lp_build_conv(builder,
+ tmp_type, type,
+ &tmp, 1, &tmp, 1);
+
+ return tmp;
+ }
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_8unorm().
+ */
+
+ if (format_desc->fetch_rgba_8unorm &&
+ !type.floating && type.width == 8 && !type.sign && type.norm) {
/*
- * Fallback to calling util_format_description::fetch_rgba_float.
+ * Fallback to calling util_format_description::fetch_rgba_8unorm.
*
* This is definitely not the most efficient way of fetching pixels, as
* we miss the opportunity to do vectorization, but this it is a
@@ -361,9 +501,113 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
char name[256];
+ LLVMTypeRef i8t = LLVMInt8Type();
+ LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
LLVMValueRef tmp;
- LLVMValueRef args[4];
+ LLVMValueRef res;
+ unsigned k;
+
+ util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm",
+ format_desc->short_name);
+
+ /*
+ * Declare and bind format_desc->fetch_rgba_8unorm().
+ */
+
+ function = LLVMGetNamedFunction(module, name);
+ if (!function) {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[4];
+ LLVMTypeRef function_type;
+
+ ret_type = LLVMVoidType();
+ arg_types[0] = pi8t;
+ arg_types[1] = pi8t;
+ arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+ function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+ function = LLVMAddFunction(module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ assert(LLVMIsDeclaration(function));
+
+ LLVMAddGlobalMapping(lp_build_engine, function,
+ func_to_pointer((func_pointer)format_desc->fetch_rgba_8unorm));
+ }
+
+ tmp_ptr = lp_build_alloca(builder, i32t, "");
+
+ res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));
+
+ /*
+ * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
+ * in the SoA vectors.
+ */
+
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef args[4];
+
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
+
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmp = LLVMBuildLoad(builder, tmp_ptr, "");
+
+ if (num_pixels == 1) {
+ res = tmp;
+ }
+ else {
+ res = LLVMBuildInsertElement(builder, res, tmp, index, "");
+ }
+ }
+
+ /* Bitcast from <n x i32> to <4n x i8> */
+ res = LLVMBuildBitCast(builder, res, bld.vec_type, "");
+
+ return res;
+ }
+
+
+ /*
+ * Fallback to util_format_description::fetch_rgba_float().
+ */
+
+ if (format_desc->fetch_rgba_float) {
+ /*
+ * Fallback to calling util_format_description::fetch_rgba_float.
+ *
+ * This is definitely not the most efficient way of fetching pixels, as
+ * we miss the opportunity to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ char name[256];
+ LLVMTypeRef f32t = LLVMFloatType();
+ LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
+ LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
+ LLVMValueRef function;
+ LLVMValueRef tmp_ptr;
+ LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
+ LLVMValueRef res;
+ unsigned k;
util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
format_desc->short_name);
@@ -379,7 +623,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
LLVMTypeRef function_type;
ret_type = LLVMVoidType();
- arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+ arg_types[0] = pf32t;
arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
@@ -394,25 +638,43 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
func_to_pointer((func_pointer)format_desc->fetch_rgba_float));
}
- tmp = lp_build_alloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+ tmp_ptr = lp_build_alloca(builder, f32x4t, "");
/*
* Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
* in the SoA vectors.
*/
- args[0] = LLVMBuildBitCast(builder, tmp,
- LLVMPointerType(LLVMFloatType(), 0), "");
- args[1] = ptr;
- args[2] = i;
- args[3] = j;
+ for (k = 0; k < num_pixels; ++k) {
+ LLVMValueRef args[4];
- LLVMBuildCall(builder, function, args, Elements(args), "");
+ args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
+ args[1] = lp_build_gather_elem_ptr(builder, num_pixels,
+ base_ptr, offset, k);
- return LLVMBuildLoad(builder, tmp, "");
- }
- else {
- assert(0);
- return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
+ if (num_pixels == 1) {
+ args[2] = i;
+ args[3] = j;
+ }
+ else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ args[2] = LLVMBuildExtractElement(builder, i, index, "");
+ args[3] = LLVMBuildExtractElement(builder, j, index, "");
+ }
+
+ LLVMBuildCall(builder, function, args, Elements(args), "");
+
+ tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
+ }
+
+ lp_build_conv(builder,
+ lp_float32_vec4_type(),
+ type,
+ tmps, num_pixels, &res, 1);
+
+ return res;
}
+
+ assert(0);
+ return lp_build_undef(type);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index e1b94adc85..9f405921b0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -36,7 +36,7 @@
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_swizzle.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
@@ -251,6 +251,41 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
}
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
+ unsigned chan;
+
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_int_vec_type(dst_type), "");
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if (start)
+ input = LLVMBuildLShr(builder, input,
+ lp_build_const_int_vec(dst_type, start), "");
+
+ if (stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+
/**
* Fetch a texels from a texture, returning them in SoA layout.
*
@@ -311,20 +346,49 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
format_desc,
type,
packed, rgba_out);
+ return;
}
- else {
- /*
- * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
- *
- * This is not the most efficient way of fetching pixels, as we
- * miss some opportunities to do vectorization, but this is
- * convenient for formats or scenarios for which there was no
- * opportunity or incentive to optimize.
- */
+ /*
+ * Try calling lp_build_fetch_rgba_aos for all pixels.
+ */
+
+ if (util_format_fits_8unorm(format_desc) &&
+ type.floating && type.width == 32 && type.length == 4) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = type.length * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset, i, j);
+
+ lp_build_rgba8_to_f32_soa(builder,
+ type,
+ tmp,
+ rgba_out);
+
+ return;
+ }
+
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as we
+ * miss some opportunities to do vectorization, but this is
+ * convenient for formats or scenarios for which there was no
+ * opportunity or incentive to optimize.
+ */
+
+ {
unsigned k, chan;
+ struct lp_type tmp_type;
- assert(type.floating);
+ tmp_type = type;
+ tmp_type.length = 4;
for (chan = 0; chan < 4; ++chan) {
rgba_out[chan] = lp_build_undef(type);
@@ -334,18 +398,17 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
for(k = 0; k < type.length; ++k) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
LLVMValueRef offset_elem;
- LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
LLVMValueRef tmp;
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
- ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
/* Get a single float[4]={R,G,B,A} pixel */
- tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr,
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+ base_ptr, offset_elem,
i_elem, j_elem);
/*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
new file mode 100644
index 0000000000..0a5038bc98
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -0,0 +1,399 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * YUV pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_format.h"
+
+
+/**
+ * Extract Y, U, V channels from packed UYVY.
+ * @param packed is a <n x i32> vector with the packed UYVY blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+uyvy_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef shift, mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (uyvy >> 16*i) & 0xff
+ * u = (uyvy ) & 0xff
+ * v = (uyvy >> 16 ) & 0xff
+ */
+
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ *u = packed;
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+/**
+ * Extract Y, U, V channels from packed YUYV.
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ */
+static void
+yuyv_to_yuv_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i,
+ LLVMValueRef *y,
+ LLVMValueRef *u,
+ LLVMValueRef *v)
+{
+ struct lp_type type;
+ LLVMValueRef shift, mask;
+
+ memset(&type, 0, sizeof type);
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, packed));
+ assert(lp_check_value(type, i));
+
+ /*
+ * y = (yuyv >> 16*i) & 0xff
+ * u = (yuyv >> 8 ) & 0xff
+ * v = (yuyv >> 24 ) & 0xff
+ */
+
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
+ *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
+
+ mask = lp_build_const_int_vec(type, 0xff);
+
+ *y = LLVMBuildAnd(builder, *y, mask, "y");
+ *u = LLVMBuildAnd(builder, *u, mask, "u");
+ *v = LLVMBuildAnd(builder, *v, mask, "v");
+}
+
+
+static INLINE void
+yuv_to_rgb_soa(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
+ LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b)
+{
+ struct lp_type type;
+ struct lp_build_context bld;
+
+ LLVMValueRef c0;
+ LLVMValueRef c8;
+ LLVMValueRef c16;
+ LLVMValueRef c128;
+ LLVMValueRef c255;
+
+ LLVMValueRef cy;
+ LLVMValueRef cug;
+ LLVMValueRef cub;
+ LLVMValueRef cvr;
+ LLVMValueRef cvg;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ lp_build_context_init(&bld, builder, type);
+
+ assert(lp_check_value(type, y));
+ assert(lp_check_value(type, u));
+ assert(lp_check_value(type, v));
+
+ /*
+ * Constants
+ */
+
+ c0 = lp_build_const_int_vec(type, 0);
+ c8 = lp_build_const_int_vec(type, 8);
+ c16 = lp_build_const_int_vec(type, 16);
+ c128 = lp_build_const_int_vec(type, 128);
+ c255 = lp_build_const_int_vec(type, 255);
+
+ cy = lp_build_const_int_vec(type, 298);
+ cug = lp_build_const_int_vec(type, -100);
+ cub = lp_build_const_int_vec(type, 516);
+ cvr = lp_build_const_int_vec(type, 409);
+ cvg = lp_build_const_int_vec(type, -208);
+
+ /*
+ * y -= 16;
+ * u -= 128;
+ * v -= 128;
+ */
+
+ y = LLVMBuildSub(builder, y, c16, "");
+ u = LLVMBuildSub(builder, u, c128, "");
+ v = LLVMBuildSub(builder, v, c128, "");
+
+ /*
+ * r = 298 * _y + 409 * _v + 128;
+ * g = 298 * _y - 100 * _u - 208 * _v + 128;
+ * b = 298 * _y + 516 * _u + 128;
+ */
+
+ y = LLVMBuildMul(builder, y, cy, "");
+ y = LLVMBuildAdd(builder, y, c128, "");
+
+ *r = LLVMBuildMul(builder, v, cvr, "");
+ *g = LLVMBuildAdd(builder,
+ LLVMBuildMul(builder, u, cug, ""),
+ LLVMBuildMul(builder, v, cvg, ""),
+ "");
+ *b = LLVMBuildMul(builder, u, cub, "");
+
+ *r = LLVMBuildAdd(builder, *r, y, "");
+ *g = LLVMBuildAdd(builder, *g, y, "");
+ *b = LLVMBuildAdd(builder, *b, y, "");
+
+ /*
+ * r >>= 8;
+ * g >>= 8;
+ * b >>= 8;
+ */
+
+ *r = LLVMBuildAShr(builder, *r, c8, "r");
+ *g = LLVMBuildAShr(builder, *g, c8, "g");
+ *b = LLVMBuildAShr(builder, *b, c8, "b");
+
+ /*
+ * Clamp
+ */
+
+ *r = lp_build_clamp(&bld, *r, c0, c255);
+ *g = lp_build_clamp(&bld, *g, c0, c255);
+ *b = lp_build_clamp(&bld, *b, c0, c255);
+}
+
+
+static LLVMValueRef
+rgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef r, LLVMValueRef g, LLVMValueRef b)
+{
+ struct lp_type type;
+ LLVMValueRef a;
+ LLVMValueRef rgba;
+
+ memset(&type, 0, sizeof type);
+ type.sign = TRUE;
+ type.width = 32;
+ type.length = n;
+
+ assert(lp_check_value(type, r));
+ assert(lp_check_value(type, g));
+ assert(lp_check_value(type, b));
+
+ /*
+ * Make a 4 x unorm8 vector
+ */
+
+ r = r;
+ g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), "");
+ b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), "");
+ a = lp_build_const_int_vec(type, 0xff000000);
+
+ rgba = r;
+ rgba = LLVMBuildOr(builder, rgba, g, "");
+ rgba = LLVMBuildOr(builder, rgba, b, "");
+ rgba = LLVMBuildOr(builder, rgba, a, "");
+
+ rgba = LLVMBuildBitCast(builder, rgba,
+ LLVMVectorType(LLVMInt8Type(), 4*n), "");
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+uyvy_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+yuyv_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef y, u, v;
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &y, &u, &v);
+ yuv_to_rgb_soa(builder, n, y, u, v, &r, &g, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+rgbg_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ uyvy_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS
+ */
+static LLVMValueRef
+grgb_to_rgba_aos(LLVMBuilderRef builder,
+ unsigned n,
+ LLVMValueRef packed,
+ LLVMValueRef i)
+{
+ LLVMValueRef r, g, b;
+ LLVMValueRef rgba;
+
+ yuyv_to_yuv_soa(builder, n, packed, i, &g, &r, &b);
+ rgba = rgb_to_rgba_aos(builder, n, r, g, b);
+
+ return rgba;
+}
+
+
+/**
+ * @param n is the number of pixels processed
+ * @param packed is a <n x i32> vector with the packed YUYV blocks
+ * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1)
+ * @return a <4*n x i8> vector with the pixel RGBA values in AoS
+ */
+LLVMValueRef
+lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *format_desc,
+ unsigned n,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef packed;
+ LLVMValueRef rgba;
+
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED);
+ assert(format_desc->block.bits == 32);
+ assert(format_desc->block.width == 2);
+ assert(format_desc->block.height == 1);
+
+ packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset);
+
+ (void)j;
+
+ switch (format_desc->format) {
+ case PIPE_FORMAT_UYVY:
+ rgba = uyvy_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_YUYV:
+ rgba = yuyv_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ rgba = rgbg_to_rgba_aos(builder, n, packed, i);
+ break;
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ rgba = grgb_to_rgba_aos(builder, n, packed, i);
+ break;
+ default:
+ assert(0);
+ rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n));
+ break;
+ }
+
+ return rgba;
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
new file mode 100644
index 0000000000..d60472e065
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_format.h"
+#include "lp_bld_gather.h"
+
+
+/**
+ * Get the pointer to one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMValueRef offset;
+ LLVMValueRef ptr;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ if (length == 1) {
+ assert(i == 0);
+ offset = offsets;
+ } else {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ }
+
+ ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
+
+ return ptr;
+}
+
+
+/**
+ * Gather one element from scatter positions in memory.
+ *
+ * @sa lp_build_gather()
+ */
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8Type(), 0));
+
+ ptr = lp_build_gather_elem_ptr(builder, length, base_ptr, offsets, i);
+ ptr = LLVMBuildBitCast(builder, ptr, src_ptr_type, "");
+ res = LLVMBuildLoad(builder, ptr, "");
+
+ assert(src_width <= dst_width);
+ if (src_width > dst_width)
+ res = LLVMBuildTrunc(builder, res, dst_elem_type, "");
+ if (src_width < dst_width)
+ res = LLVMBuildZExt(builder, res, dst_elem_type, "");
+
+ return res;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ * Use for fetching texels from a texture.
+ * For SSE, typical values are length=4, src_width=32, dst_width=32.
+ *
+ * @param length length of the offsets
+ * @param src_width src element width in bits
+ * @param dst_width result element width in bits (src will be expanded to fit)
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMValueRef res;
+
+ if (length == 1) {
+ /* Scalar */
+ return lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, 0);
+ } else {
+ /* Vector */
+
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for (i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem;
+ elem = lp_build_gather_elem(builder, length,
+ src_width, dst_width,
+ base_ptr, offsets, i);
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+ }
+
+ return res;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
new file mode 100644
index 0000000000..131af8ea07
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_GATHER_H_
+#define LP_BLD_GATHER_H_
+
+
+#include "gallivm/lp_bld.h"
+
+
+LLVMValueRef
+lp_build_gather_elem_ptr(LLVMBuilderRef builder,
+ unsigned length,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather_elem(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets,
+ unsigned i);
+
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+#endif /* LP_BLD_GATHER_H_ */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 44cfdc4d3f..69353dea09 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -32,6 +32,8 @@
#include "lp_bld_debug.h"
#include "lp_bld_init.h"
+#include <llvm-c/Transforms/Scalar.h>
+
#ifdef DEBUG
unsigned gallivm_debug = 0;
@@ -50,6 +52,7 @@ LLVMModuleRef lp_build_module = NULL;
LLVMExecutionEngineRef lp_build_engine = NULL;
LLVMModuleProviderRef lp_build_provider = NULL;
LLVMTargetDataRef lp_build_target = NULL;
+LLVMPassManagerRef lp_build_pass = NULL;
/*
@@ -127,6 +130,33 @@ lp_build_init(void)
if (!lp_build_target)
lp_build_target = LLVMGetExecutionEngineTargetData(lp_build_engine);
+ if (!lp_build_pass) {
+ lp_build_pass = LLVMCreateFunctionPassManager(lp_build_provider);
+ LLVMAddTargetData(lp_build_target, lp_build_pass);
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(lp_build_pass);
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ LLVMAddConstantPropagationPass(lp_build_pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(lp_build_pass);
+ }
+ LLVMAddGVNPass(lp_build_pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
+ */
+ LLVMAddPromoteMemoryToRegisterPass(lp_build_pass);
+ }
+ }
+
util_cpu_detect();
#if 0
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index 0ec2afcd1b..a32ced9b4c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -38,6 +38,7 @@ extern LLVMModuleRef lp_build_module;
extern LLVMExecutionEngineRef lp_build_engine;
extern LLVMModuleProviderRef lp_build_provider;
extern LLVMTargetDataRef lp_build_target;
+extern LLVMPassManagerRef lp_build_pass;
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index d13fa1a5d0..39854e43b1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -34,6 +34,7 @@
#include "util/u_cpu_detect.h"
+#include "util/u_memory.h"
#include "util/u_debug.h"
#include "lp_bld_type.h"
@@ -187,12 +188,10 @@ lp_build_compare(LLVMBuilderRef builder,
return lp_build_undef(type);
}
- /* There are no signed byte and unsigned word/dword comparison
- * instructions. So flip the sign bit so that the results match.
+ /* There are no unsigned comparison instructions. So flip the sign bit
+ * so that the results match.
*/
- if(table[func].gt &&
- ((type.width == 8 && type.sign) ||
- (type.width != 8 && !type.sign))) {
+ if (table[func].gt && !type.sign) {
LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
a = LLVMBuildXor(builder, a, msb, "");
b = LLVMBuildXor(builder, b, msb, "");
@@ -384,6 +383,46 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
res = LLVMBuildSelect(bld->builder, mask, a, b, "");
}
+ else if (util_cpu_caps.has_sse4_1 &&
+ type.width * type.length == 128 &&
+ !LLVMIsConstant(a) &&
+ !LLVMIsConstant(b) &&
+ !LLVMIsConstant(mask)) {
+ const char *intrinsic;
+ LLVMTypeRef arg_type;
+ LLVMValueRef args[3];
+
+ if (type.width == 64) {
+ intrinsic = "llvm.x86.sse41.blendvpd";
+ arg_type = LLVMVectorType(LLVMDoubleType(), 2);
+ } else if (type.width == 32) {
+ intrinsic = "llvm.x86.sse41.blendvps";
+ arg_type = LLVMVectorType(LLVMFloatType(), 4);
+ } else {
+ intrinsic = "llvm.x86.sse41.pblendvb";
+ arg_type = LLVMVectorType(LLVMInt8Type(), 16);
+ }
+
+ if (arg_type != bld->int_vec_type) {
+ mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
+ }
+
+ if (arg_type != bld->vec_type) {
+ a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
+ b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
+ }
+
+ args[0] = b;
+ args[1] = a;
+ args[2] = mask;
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ arg_type, args, Elements(args));
+
+ if (arg_type != bld->vec_type) {
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ }
+ }
else {
if(type.floating) {
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 186f8849b8..7748f8f099 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -427,3 +427,123 @@ lp_build_pack(LLVMBuilderRef builder,
return tmp[0];
}
+
+
+/**
+ * Truncate or expand the bitwidth.
+ *
+ * NOTE: Getting the right sign flags is crucial here, as we employ some
+ * intrinsics that do saturation.
+ */
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ /*
+ * We don't support float <-> int conversion here. That must be done
+ * before/after calling this function.
+ */
+ assert(src_type.floating == dst_type.floating);
+
+ /*
+ * We don't support double <-> float conversion yet, although it could be
+ * added with little effort.
+ */
+ assert((!src_type.floating && !dst_type.floating) ||
+ src_type.width == dst_type.width);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
+ assert(num_srcs == 1 || num_dsts == 1);
+
+ assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+ if (src_type.width > dst_type.width) {
+ /*
+ * Truncate bit width.
+ */
+
+ assert(num_dsts == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector packing intrinsics
+ */
+
+ tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+ val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else if (src_type.width < dst_type.width) {
+ /*
+ * Expand bit width.
+ */
+
+ assert(num_srcs == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector unpack intrinsics
+ */
+ lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+
+ assert(src_type.length == dst_type.length);
+ tmp[0] = lp_build_undef(dst_type);
+ for (i = 0; i < dst_type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+
+ if (src_type.sign && dst_type.sign) {
+ val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
+ } else {
+ val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
+ }
+ tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+ }
+ }
+ }
+ else {
+ /*
+ * No-op
+ */
+
+ assert(num_srcs == 1);
+ assert(num_dsts == 1);
+
+ tmp[0] = src[0];
+ }
+
+ for(i = 0; i < num_dsts; ++i)
+ dst[i] = tmp[i];
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index 41adeed220..e470082b97 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
const LLVMValueRef *src, unsigned num_srcs);
+void
+lp_build_resize(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 38fd5a39ef..ca36046d22 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -61,8 +61,8 @@ LLVMValueRef
lp_build_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef a_left = lp_build_swizzle1_aos(bld, a, swizzle_left);
- LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right);
+ LLVMValueRef a_left = lp_build_swizzle_aos(bld, a, swizzle_left);
+ LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
return lp_build_sub(bld, a_right, a_left);
}
@@ -71,8 +71,8 @@ LLVMValueRef
lp_build_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef a_top = lp_build_swizzle1_aos(bld, a, swizzle_top);
- LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom);
+ LLVMValueRef a_top = lp_build_swizzle_aos(bld, a, swizzle_top);
+ LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
return lp_build_sub(bld, a_bottom, a_top);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 946c23e317..0fd014ab9b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -40,7 +40,6 @@
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
#include "lp_bld_type.h"
-#include "lp_bld_format.h"
#include "lp_bld_sample.h"
@@ -125,73 +124,53 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
- * Gather elements from scatter positions in memory into a single vector.
- * Use for fetching texels from a texture.
- * For SSE, typical values are length=4, src_width=32, dst_width=32.
- *
- * @param length length of the offsets
- * @param src_width src element width in bits
- * @param dst_width result element width in bits (src will be expanded to fit)
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
-
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
-
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
-
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
- }
-
- return res;
-}
-
-
-/**
* Compute the offset of a pixel block.
*
- * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
- * per format description, and not individual pixels.
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
+ *
+ * Returns the relative offset and i,j sub-block coordinates
*/
-LLVMValueRef
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride)
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j)
{
LLVMValueRef x_stride;
LLVMValueRef offset;
+ LLVMValueRef i;
+ LLVMValueRef j;
+
+ /*
+ * Describe the coordinates in terms of pixel blocks.
+ *
+ * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+ * bit arithmetic. Verify this.
+ */
+
+ if (format_desc->block.width == 1) {
+ i = bld->zero;
+ }
+ else {
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
+ i = LLVMBuildURem(bld->builder, x, block_width, "");
+ x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+ }
+
+ if (format_desc->block.height == 1) {
+ j = bld->zero;
+ }
+ else {
+ LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
+ j = LLVMBuildURem(bld->builder, y, block_height, "");
+ y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+ }
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
offset = lp_build_mul(bld, x, x_stride);
@@ -206,5 +185,7 @@ lp_build_sample_offset(struct lp_build_context *bld,
offset = lp_build_add(bld, offset, z_offset);
}
- return offset;
+ *out_offset = offset;
+ *out_i = i;
+ *out_j = j;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 51e98ab2f9..5b8f478094 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -146,23 +146,17 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
-
-
-LLVMValueRef
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef z,
LLVMValueRef y_stride,
- LLVMValueRef z_stride);
+ LLVMValueRef z_stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i,
+ LLVMValueRef *out_j);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 84c04fe272..1a20d74cac 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -50,8 +50,10 @@
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
+#include "lp_bld_quad.h"
/**
@@ -264,35 +266,11 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (bld->format_desc->block.width == 1) {
- i = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (bld->format_desc->block.height == 1) {
- j = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
-
/* convert x,y,z coords to linear offset from start of texture, in bytes */
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, z, y_stride, z_stride);
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, z, y_stride, z_stride,
+ &offset, &i, &j);
if (use_border) {
/* If we can sample the border color, it means that texcoords may
@@ -344,6 +322,9 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
+/**
+ * Fetch the texels as <4n x i8> in AoS form.
+ */
static LLVMValueRef
lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
@@ -351,25 +332,46 @@ lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef y_stride,
LLVMValueRef data_array)
{
- LLVMValueRef offset;
+ LLVMValueRef offset, i, j;
LLVMValueRef data_ptr;
+ LLVMValueRef res;
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL);
-
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
+ /* convert x,y,z coords to linear offset from start of texture, in bytes */
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, NULL, y_stride, NULL,
+ &offset, &i, &j);
/* get pointer to mipmap level 0 data */
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
- return lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /* Just fetch the data directly without swizzling */
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+ res = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+ }
+ else {
+ struct lp_type type;
+
+ assert(bld->texel_type.width == 32);
+
+ memset(&type, 0, sizeof type);
+ type.width = 8;
+ type.length = bld->texel_type.length*4;
+ type.norm = TRUE;
+
+ res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
+ data_ptr, offset, i, j);
+ }
+
+ return res;
}
@@ -817,9 +819,8 @@ lp_build_minify(struct lp_build_sample_context *bld,
/**
* Generate code to compute texture level of detail (lambda).
- * \param s vector of texcoord s values
- * \param t vector of texcoord t values
- * \param r vector of texcoord r values
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
* \param lod_bias optional float vector with the shader lod bias
* \param explicit_lod optional float vector with the explicit lod
* \param width scalar int texture width
@@ -831,11 +832,8 @@ lp_build_minify(struct lp_build_sample_context *bld,
*/
static LLVMValueRef
lp_build_lod_selector(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
@@ -870,14 +868,6 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- */
dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
dsdx = lp_build_abs(float_bld, dsdx);
dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
@@ -1287,7 +1277,7 @@ lp_build_cube_face(struct lp_build_sample_context *bld,
/**
- * Generate code to do cube face selection and per-face texcoords.
+ * Generate code to do cube face selection and compute per-face texcoords.
*/
static void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
@@ -1411,7 +1401,6 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
lp_build_endif(&if_ctx2);
lp_build_flow_scope_end(flow_ctx2);
lp_build_flow_destroy(flow_ctx2);
-
*face_s = face_s2;
*face_t = face_t2;
*face = face2;
@@ -1457,13 +1446,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
int chan;
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_nearest(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
@@ -1473,13 +1463,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+ /* sample the first mipmap level */
lp_build_sample_image_linear(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_linear(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
@@ -1542,6 +1533,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
LLVMValueRef data_ptr0, data_ptr1 = NULL;
+ LLVMValueRef face_ddx[4], face_ddy[4];
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -1549,6 +1541,30 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
*/
/*
+ * Choose cube face, recompute texcoords and derivatives for the chosen face.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
+ /*
* Compute the level of detail (float).
*/
if (min_filter != mag_filter ||
@@ -1556,7 +1572,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy,
+ lod = lp_build_lod_selector(bld, ddx, ddy,
lod_bias, explicit_lod,
width, height, depth);
}
@@ -1566,9 +1582,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
*/
if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
/* always use mip level 0 */
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
}
else {
+ assert(lod);
if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
}
@@ -1623,18 +1650,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
}
/*
- * Choose cube face, recompute per-face texcoords.
- */
- if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
- LLVMValueRef face, face_s, face_t;
- lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
- s = face_s; /* vec */
- t = face_t; /* vec */
- /* use 'r' to indicate cube face */
- r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
- }
-
- /*
* Get pointer(s) to image data for mipmap level(s).
*/
data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
@@ -1712,36 +1727,6 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
static void
-lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
- struct lp_type dst_type,
- LLVMValueRef packed,
- LLVMValueRef *rgba)
-{
- LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
- unsigned chan;
-
- /* Decode the input vector components */
- for (chan = 0; chan < 4; ++chan) {
- unsigned start = chan*8;
- unsigned stop = start + 8;
- LLVMValueRef input;
-
- input = packed;
-
- if(start)
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
-
- if(stop < 32)
- input = LLVMBuildAnd(builder, input, mask, "");
-
- input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
-
- rgba[chan] = input;
- }
-}
-
-
-static void
lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
@@ -1935,15 +1920,20 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* Convert to SoA and swizzle.
*/
- packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
-
lp_build_rgba8_to_f32_soa(bld->builder,
bld->texel_type,
packed, unswizzled);
- lp_build_format_swizzle_soa(bld->format_desc,
- &bld->texel_bld,
- unswizzled, texel_out);
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ lp_build_format_swizzle_soa(bld->format_desc,
+ &bld->texel_bld,
+ unswizzled, texel_out);
+ } else {
+ texel_out[0] = unswizzled[0];
+ texel_out[1] = unswizzled[1];
+ texel_out[2] = unswizzled[2];
+ texel_out[3] = unswizzled[3];
+ }
apply_sampler_swizzle(bld, texel_out);
}
@@ -2007,6 +1997,8 @@ lp_build_sample_nop(struct lp_build_sample_context *bld,
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
+ * \param ddx partial derivatives of (s,t,r,q) with respect to x
+ * \param ddy partial derivatives of (s,t,r,q) with respect to y
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
@@ -2016,8 +2008,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef texel_out[4])
@@ -2079,7 +2071,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
/* For debug: no-op texture sampling */
lp_build_sample_nop(&bld, texel_out);
}
- else if (util_format_is_rgba8_variant(bld.format_desc) &&
+ else if (util_format_fits_8unorm(bld.format_desc) &&
+ bld.format_desc->nr_channels > 1 &&
static_state->target == PIPE_TEXTURE_2D &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 3c8a7bc09e..20cf96ca66 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
- if (n <= 4) {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
@@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
* YY00 YY00 .... YY00
* YYYY YYYY .... YYYY <= output
*/
- struct lp_type type4 = type;
+ struct lp_type type4;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
@@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+
+ type4 = type;
+ type4.floating = FALSE;
type4.width *= 4;
type4.length /= 4;
@@ -176,80 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4])
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4])
{
- const unsigned n = bld->type.length;
+ const struct lp_type type = bld->type;
+ const unsigned n = type.length;
unsigned i, j;
- if(a == bld->undef || a == bld->zero || a == bld->one)
+ if (swizzles[0] == PIPE_SWIZZLE_RED &&
+ swizzles[1] == PIPE_SWIZZLE_GREEN &&
+ swizzles[2] == PIPE_SWIZZLE_BLUE &&
+ swizzles[3] == PIPE_SWIZZLE_ALPHA) {
return a;
+ }
- if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
- return lp_build_broadcast_aos(bld, a, swizzle[0]);
+ if (swizzles[0] == swizzles[1] &&
+ swizzles[1] == swizzles[2] &&
+ swizzles[2] == swizzles[3]) {
+ switch (swizzles[0]) {
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ return lp_build_broadcast_aos(bld, a, swizzles[0]);
+ case PIPE_SWIZZLE_ZERO:
+ return bld->zero;
+ case PIPE_SWIZZLE_ONE:
+ return bld->one;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+ }
- {
+ if (type.width >= 16) {
/*
* Shuffle.
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
+ LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
+
+ memset(aux, 0, sizeof aux);
+
+ for(j = 0; j < n; j += 4) {
+ for(i = 0; i < 4; ++i) {
+ unsigned shuffle;
+ switch (swizzles[i]) {
+ default:
+ assert(0);
+ /* fall through */
+ case PIPE_SWIZZLE_RED:
+ case PIPE_SWIZZLE_GREEN:
+ case PIPE_SWIZZLE_BLUE:
+ case PIPE_SWIZZLE_ALPHA:
+ shuffle = j + swizzles[i];
+ break;
+ case PIPE_SWIZZLE_ZERO:
+ shuffle = type.length + 0;
+ if (!aux[0]) {
+ aux[0] = lp_build_const_elem(type, 0.0);
+ }
+ break;
+ case PIPE_SWIZZLE_ONE:
+ shuffle = type.length + 1;
+ if (!aux[1]) {
+ aux[1] = lp_build_const_elem(type, 1.0);
+ }
+ break;
+ }
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
+ }
+ }
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
-
- return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
- }
-}
-
+ for (i = 0; i < n; ++i) {
+ if (!aux[i]) {
+ aux[i] = undef;
+ }
+ }
-LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4])
-{
- const unsigned n = bld->type.length;
- unsigned i, j;
+ return LLVMBuildShuffleVector(bld->builder, a,
+ LLVMConstVector(aux, n),
+ LLVMConstVector(shuffles, n), "");
+ } else {
+ /*
+ * Bit mask and shifts.
+ *
+ * For example, this will convert BGRA to RGBA by doing
+ *
+ * rgba = (bgra & 0x00ff0000) >> 16
+ * | (bgra & 0xff00ff00)
+ * | (bgra & 0x000000ff) << 16
+ *
+ * This is necessary not only for faster cause, but because X86 backend
+ * will refuse shuffles of <4 x i8> vectors
+ */
+ LLVMValueRef res;
+ struct lp_type type4;
+ boolean cond[4];
+ unsigned chan;
+ int shift;
- if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ /*
+ * Start with a mixture of 1 and 0.
+ */
+ for (chan = 0; chan < 4; ++chan) {
+ cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
+ }
+ res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
- if(a == b) {
- unsigned char swizzle1[4];
- swizzle1[0] = swizzle[0] % 4;
- swizzle1[1] = swizzle[1] % 4;
- swizzle1[2] = swizzle[2] % 4;
- swizzle1[3] = swizzle[3] % 4;
- return lp_build_swizzle1_aos(bld, a, swizzle1);
- }
+ /*
+ * Build a type where each element is an integer that cover the four
+ * channels.
+ */
+ type4 = type;
+ type4.floating = FALSE;
+ type4.width *= 4;
+ type4.length /= 4;
- if(swizzle[0] % 4 == 0 &&
- swizzle[1] % 4 == 1 &&
- swizzle[2] % 4 == 2 &&
- swizzle[3] % 4 == 3) {
- boolean cond[4];
- cond[0] = swizzle[0] / 4;
- cond[1] = swizzle[1] / 4;
- cond[2] = swizzle[2] / 4;
- cond[3] = swizzle[3] / 4;
- return lp_build_select_aos(bld, a, b, cond);
- }
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+ res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
- {
/*
- * Shuffle.
+ * Mask and shift the channels, trying to group as many channels in the
+ * same shift as possible
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
+ for (shift = -3; shift <= 3; ++shift) {
+ unsigned long long mask = 0;
+
+ assert(type4.width <= sizeof(mask)*8);
+
+ for (chan = 0; chan < 4; ++chan) {
+ /* FIXME: big endian */
+ if (swizzles[chan] < 4 &&
+ chan - swizzles[chan] == shift) {
+ mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+ }
+ }
+
+ if (mask) {
+ LLVMValueRef masked;
+ LLVMValueRef shifted;
+
+ if (0)
+ debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
+
+ masked = LLVMBuildAnd(bld->builder, a,
+ lp_build_const_int_vec(type4, mask), "");
+ if (shift > 0) {
+ shifted = LLVMBuildShl(bld->builder, masked,
+ lp_build_const_int_vec(type4, shift*type.width), "");
+ } else if (shift < 0) {
+ shifted = LLVMBuildLShr(bld->builder, masked,
+ lp_build_const_int_vec(type4, -shift*type.width), "");
+ } else {
+ shifted = masked;
+ }
+
+ res = LLVMBuildOr(bld->builder, res, shifted, "");
+ }
+ }
- return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+ return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index 4f4fa777c9..315e1bcb54 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -68,24 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
/**
* Swizzle a vector consisting of an array of XYZW structs.
*
- * @param swizzle is the in [0,4[ range.
+ * @param swizzles is the in [0,4[ range.
*/
LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- const unsigned char swizzle[4]);
-
-
-/**
- * Swizzle two vector consisting of an array of XYZW structs.
- *
- * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
- */
-LLVMValueRef
-lp_build_swizzle2_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- const unsigned char swizzle[4]);
+lp_build_swizzle_aos(struct lp_build_context *bld,
+ LLVMValueRef a,
+ const unsigned char swizzles[4]);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index dec7556138..21236839fb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -49,6 +49,7 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_arit.h"
+#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
@@ -132,10 +133,14 @@ struct lp_build_tgsi_soa_context
LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS];
LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS];
- /* we allocate an array of temps if we have indirect
- * addressing and then the temps above is unused */
+ /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is
+ * set in the indirect_files field.
+ * The temps[] array above is unused then.
+ */
LLVMValueRef temps_array;
- boolean has_indirect_addressing;
+
+ /** bitmask indicating which register files are accessed indirectly */
+ unsigned indirect_files;
struct lp_build_mask_context *mask;
struct lp_exec_mask exec_mask;
@@ -404,25 +409,92 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
lp_exec_mask_update(mask);
}
+
+/**
+ * Return pointer to a temporary register channel (src or dest).
+ * Note that indirect addressing cannot be handled here.
+ * \param index which temporary register
+ * \param chan which channel of the temp register.
+ */
static LLVMValueRef
get_temp_ptr(struct lp_build_tgsi_soa_context *bld,
unsigned index,
- unsigned chan,
- boolean is_indirect,
- LLVMValueRef addr)
+ unsigned chan)
{
assert(chan < 4);
- if (!bld->has_indirect_addressing) {
- return bld->temps[index][chan];
- } else {
- LLVMValueRef lindex =
- LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0);
- if (is_indirect)
- lindex = lp_build_add(&bld->base, lindex, addr);
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+ LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan);
return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, "");
}
+ else {
+ return bld->temps[index][chan];
+ }
}
+
+/**
+ * Gather vector.
+ * XXX the lp_build_gather() function should be capable of doing this
+ * with a little work.
+ */
+static LLVMValueRef
+build_gather(struct lp_build_tgsi_soa_context *bld,
+ LLVMValueRef base_ptr,
+ LLVMValueRef indexes)
+{
+ LLVMValueRef res = bld->base.undef;
+ unsigned i;
+
+ /*
+ * Loop over elements of index_vec, load scalar value, insert it into 'res'.
+ */
+ for (i = 0; i < bld->base.type.length; i++) {
+ LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder,
+ indexes, ii, "");
+ LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr,
+ &index, 1, "");
+ LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
+
+ res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Read the current value of the ADDR register, convert the floats to
+ * ints, multiply by four and return the vector of offsets.
+ * The offsets will be used to index into the constant buffer or
+ * temporary register file.
+ */
+static LLVMValueRef
+get_indirect_offsets(struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_src_register *indirect_reg)
+{
+ /* always use X component of address register */
+ const int x = indirect_reg->SwizzleX;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
+ uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x);
+ LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4);
+ LLVMValueRef addr_vec;
+
+ addr_vec = LLVMBuildLoad(bld->base.builder,
+ bld->addr[indirect_reg->Index][swizzle],
+ "load addr reg");
+
+ /* for indexing we want integers */
+ addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec,
+ int_vec_type, "");
+
+ /* addr_vec = addr_vec * 4 */
+ addr_vec = lp_build_mul(&bld->base, addr_vec, vec4);
+
+ return addr_vec;
+}
+
+
/**
* Register fetch.
*/
@@ -430,14 +502,14 @@ static LLVMValueRef
emit_fetch(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
- unsigned index,
+ unsigned src_op,
const unsigned chan_index )
{
- const struct tgsi_full_src_register *reg = &inst->Src[index];
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
const unsigned swizzle =
tgsi_util_get_full_src_register_swizzle(reg, chan_index);
LLVMValueRef res;
- LLVMValueRef addr = NULL;
+ LLVMValueRef addr_vec = NULL;
if (swizzle > 3) {
assert(0 && "invalid swizzle in emit_fetch()");
@@ -445,32 +517,33 @@ emit_fetch(
}
if (reg->Register.Indirect) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
- unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
- addr = LLVMBuildLoad(bld->base.builder,
- bld->addr[reg->Indirect.Index][swizzle],
- "");
- /* for indexing we want integers */
- addr = LLVMBuildFPToSI(bld->base.builder, addr,
- int_vec_type, "");
- addr = LLVMBuildExtractElement(bld->base.builder,
- addr, LLVMConstInt(LLVMInt32Type(), 0, 0),
- "");
- addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0));
+ assert(bld->indirect_files);
+ addr_vec = get_indirect_offsets(bld, &reg->Indirect);
}
switch (reg->Register.File) {
case TGSI_FILE_CONSTANT:
- {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(),
- reg->Register.Index*4 + swizzle, 0);
+ if (reg->Register.Indirect) {
+ LLVMValueRef index_vec; /* index into the const buffer */
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT));
+
+ /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
+ index_vec = lp_build_const_int_vec(bld->int_bld.type,
+ reg->Register.Index * 4 + swizzle);
+
+ /* index_vec = index_vec + addr_vec */
+ index_vec = lp_build_add(&bld->base, index_vec, addr_vec);
+
+ /* Gather values from the constant buffer */
+ res = build_gather(bld, bld->consts_ptr, index_vec);
+ }
+ else {
+ LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
- if (reg->Register.Indirect) {
- /*lp_build_printf(bld->base.builder,
- "\taddr = %d\n", addr);*/
- index = lp_build_add(&bld->base, index, addr);
- }
+ index = lp_build_const_int32(reg->Register.Index*4 + swizzle);
+
scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
&index, 1, "");
scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
@@ -490,13 +563,38 @@ emit_fetch(
break;
case TGSI_FILE_TEMPORARY:
- {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- swizzle,
- reg->Register.Indirect,
- addr);
+ if (reg->Register.Indirect) {
+ LLVMValueRef vec_len =
+ lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef temps_array;
+ LLVMTypeRef float4_ptr_type;
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
+
+ /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */
+ index_vec = lp_build_const_int_vec(bld->int_bld.type,
+ reg->Register.Index * 4 + swizzle);
+
+ /* index_vec += addr_vec */
+ index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec);
+
+ /* index_vec *= vector_length */
+ index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len);
+
+ /* cast temps_array pointer to float* */
+ float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0);
+ temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array,
+ float4_ptr_type, "");
+
+ /* Gather values from the temporary register array */
+ res = build_gather(bld, temps_array, index_vec);
+ }
+ else {
+ LLVMValueRef temp_ptr;
+ temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
- if(!res)
+ if (!res)
return bld->base.undef;
}
break;
@@ -660,8 +758,12 @@ emit_store(
}
if (reg->Register.Indirect) {
+ /* XXX use get_indirect_offsets() here eventually */
LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type);
unsigned swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, chan_index );
+
+ assert(bld->indirect_files);
+
addr = LLVMBuildLoad(bld->base.builder,
bld->addr[reg->Indirect.Index][swizzle],
"");
@@ -680,14 +782,18 @@ emit_store(
bld->outputs[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_TEMPORARY: {
- LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
- chan_index,
- reg->Register.Indirect,
- addr);
- lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
+ case TGSI_FILE_TEMPORARY:
+ if (reg->Register.Indirect) {
+ /* XXX not done yet */
+ debug_printf("WARNING: LLVM scatter store of temp regs"
+ " not implemented\n");
+ }
+ else {
+ LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index,
+ chan_index);
+ lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr);
+ }
break;
- }
case TGSI_FILE_ADDRESS:
lp_exec_mask_store(&bld->exec_mask, pred, value,
@@ -905,7 +1011,7 @@ emit_declaration(
switch (decl->Declaration.File) {
case TGSI_FILE_TEMPORARY:
assert(idx < LP_MAX_TGSI_TEMPS);
- if (bld->has_indirect_addressing) {
+ if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
last*4 + 4, 0);
bld->temps_array = lp_build_array_alloca(bld->base.builder,
@@ -1929,8 +2035,7 @@ lp_build_tgsi_soa(LLVMBuilderRef builder,
bld.outputs = outputs;
bld.consts_ptr = consts_ptr;
bld.sampler = sampler;
- bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 ||
- info->opcode_count[TGSI_OPCODE_ARL] > 0;
+ bld.indirect_files = info->indirect_files;
bld.instructions = (struct tgsi_full_instruction *)
MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) );
bld.max_instructions = LP_MAX_INSTRUCTIONS;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index df77ef2155..3ffe916f8e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -316,6 +316,54 @@ LLVMTypeRef
lp_build_int32_vec4_type(void);
+static INLINE struct lp_type
+lp_float32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = TRUE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_int32_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = TRUE;
+ type.norm = FALSE;
+ type.width = 32;
+ type.length = 4;
+
+ return type;
+}
+
+
+static INLINE struct lp_type
+lp_unorm8_vec4_type(void)
+{
+ struct lp_type type;
+
+ memset(&type, 0, sizeof(type));
+ type.floating = FALSE;
+ type.sign = FALSE;
+ type.norm = TRUE;
+ type.width = 8;
+ type.length = 4;
+
+ return type;
+}
+
+
struct lp_type
lp_uint_type(struct lp_type type);
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index 0238308d20..a084310d4f 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -45,7 +45,6 @@
#include <pthread.h> /* POSIX threads headers */
#include <stdio.h> /* for perror() */
-#define PIPE_THREAD_HAVE_CONDVAR
/* pipe_thread
*/
@@ -168,19 +167,59 @@ typedef CRITICAL_SECTION pipe_mutex;
#define pipe_mutex_unlock(mutex) \
LeaveCriticalSection(&mutex)
+/* TODO: Need a macro to declare "I don't care about WinXP compatibilty" */
+#if 0 && defined (_WIN32_WINNT) && (_WIN32_WINNT >= 0x0600)
+/* CONDITION_VARIABLE is only available on newer versions of Windows
+ * (Server 2008/Vista or later).
+ * http://msdn.microsoft.com/en-us/library/ms682052(VS.85).aspx
+ *
+ * pipe_condvar
+ */
+typedef CONDITION_VARIABLE pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = CONDITION_VARIABLE_INIT
+
+#define pipe_condvar_init(cond) \
+ InitializeConditionVariable(&(cond))
+
+#define pipe_condvar_destroy(cond) \
+ (void) cond /* nothing to do */
+
+#define pipe_condvar_wait(cond, mutex) \
+ SleepConditionVariableCS(&(cond), &(mutex), INFINITE)
+
+#define pipe_condvar_signal(cond) \
+ WakeConditionVariable(&(cond))
+
+#define pipe_condvar_broadcast(cond) \
+ WakeAllConditionVariable(&(cond))
+
+#else /* need compatibility with pre-Vista Win32 */
/* pipe_condvar (XXX FIX THIS)
+ * See http://www.cs.wustl.edu/~schmidt/win32-cv-1.html
+ * for potential pitfalls in implementation.
*/
-typedef unsigned pipe_condvar;
+typedef DWORD pipe_condvar;
+
+#define pipe_static_condvar(cond) \
+ /*static*/ pipe_condvar cond = 1
#define pipe_condvar_init(cond) \
- (void) cond
+ (void) (cond = 1)
#define pipe_condvar_destroy(cond) \
(void) cond
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(cond, mutex) \
- (void) cond; (void) mutex
+ do { pipe_mutex_unlock(mutex); \
+ Sleep(cond); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(cond) \
(void) cond
@@ -188,9 +227,12 @@ typedef unsigned pipe_condvar;
#define pipe_condvar_broadcast(cond) \
(void) cond
+#endif /* pre-Vista win32 */
#else
+#include "os/os_time.h"
+
/** Dummy definitions */
typedef unsigned pipe_thread;
@@ -214,7 +256,6 @@ static INLINE int pipe_thread_destroy( pipe_thread thread )
}
typedef unsigned pipe_mutex;
-typedef unsigned pipe_condvar;
#define pipe_static_mutex(mutex) \
static pipe_mutex mutex = 0
@@ -231,17 +272,25 @@ typedef unsigned pipe_condvar;
#define pipe_mutex_unlock(mutex) \
(void) mutex
+typedef int64_t pipe_condvar;
+
#define pipe_static_condvar(condvar) \
- static unsigned condvar = 0
+ static pipe_condvar condvar = 1000
#define pipe_condvar_init(condvar) \
- (void) condvar
+ (void) (condvar = 1000)
#define pipe_condvar_destroy(condvar) \
(void) condvar
+/* Poor man's pthread_cond_wait():
+ Just release the mutex and sleep for one millisecond.
+ The caller's while() loop does all the work. */
#define pipe_condvar_wait(condvar, mutex) \
- (void) condvar
+ do { pipe_mutex_unlock(mutex); \
+ os_time_sleep(condvar); \
+ pipe_mutex_lock(mutex); \
+ } while (0)
#define pipe_condvar_signal(condvar) \
(void) condvar
@@ -277,27 +326,7 @@ static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
}
-#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-
-/* XXX FIX THIS */
-typedef unsigned pipe_barrier;
-
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
-{
- /* XXX we could implement barriers with a mutex and condition var */
-}
-
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
-{
-}
-
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
-{
- assert(0);
-}
-
-
-#else
+#else /* If the OS doesn't have its own, implement barriers using a mutex and a condvar */
typedef struct {
unsigned count;
diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
new file mode 100644
index 0000000000..0433da6141
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -0,0 +1,44 @@
+
+#ifndef INLINE_DEBUG_HELPER_H
+#define INLINE_DEBUG_HELPER_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+
+/* Helper function to wrap a screen with
+ * one or more debug driver: rbug, trace.
+ */
+
+#ifdef GALLIUM_TRACE
+#include "trace/tr_public.h"
+#endif
+
+#ifdef GALLIUM_RBUG
+#include "rbug/rbug_public.h"
+#endif
+
+#ifdef GALLIUM_GALAHAD
+#include "galahad/glhd_public.h"
+#endif
+
+static INLINE struct pipe_screen *
+debug_screen_wrap(struct pipe_screen *screen)
+{
+
+#if defined(GALLIUM_RBUG)
+ screen = rbug_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_TRACE)
+ screen = trace_screen_create(screen);
+#endif
+
+#if defined(GALLIUM_GALAHAD)
+ screen = galahad_screen_create(screen);
+#endif
+
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
new file mode 100644
index 0000000000..036c1ee48a
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -0,0 +1,63 @@
+
+#ifndef INLINE_SW_HELPER_H
+#define INLINE_SW_HELPER_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+#include "state_tracker/sw_winsys.h"
+
+
+/* Helper function to choose and instantiate one of the software rasterizers:
+ * cell, llvmpipe, softpipe.
+ */
+
+#ifdef GALLIUM_SOFTPIPE
+#include "softpipe/sp_public.h"
+#endif
+
+#ifdef GALLIUM_LLVMPIPE
+#include "llvmpipe/lp_public.h"
+#endif
+
+#ifdef GALLIUM_CELL
+#include "cell/ppu/cell_public.h"
+#endif
+
+static INLINE struct pipe_screen *
+sw_screen_create(struct sw_winsys *winsys)
+{
+ const char *default_driver;
+ const char *driver;
+ struct pipe_screen *screen = NULL;
+
+#if defined(GALLIUM_CELL)
+ default_driver = "cell";
+#elif defined(GALLIUM_LLVMPIPE)
+ default_driver = "llvmpipe";
+#elif defined(GALLIUM_SOFTPIPE)
+ default_driver = "softpipe";
+#else
+ default_driver = "";
+#endif
+
+ driver = debug_get_option("GALLIUM_DRIVER", default_driver);
+
+#if defined(GALLIUM_CELL)
+ if (screen == NULL && strcmp(driver, "cell") == 0)
+ screen = cell_create_screen(winsys);
+#endif
+
+#if defined(GALLIUM_LLVMPIPE)
+ if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
+ screen = llvmpipe_create_screen(winsys);
+#endif
+
+#if defined(GALLIUM_SOFTPIPE)
+ if (screen == NULL)
+ screen = softpipe_create_screen(winsys);
+#endif
+
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
new file mode 100644
index 0000000000..0b4e740403
--- /dev/null
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -0,0 +1,34 @@
+
+#ifndef INLINE_WRAPPER_SW_HELPER_H
+#define INLINE_WRAPPER_SW_HELPER_H
+
+#include "target-helpers/inline_sw_helper.h"
+#include "sw/wrapper/wrapper_sw_winsys.h"
+
+/**
+ * Try to wrap a hw screen with a software screen.
+ * On failure will return given screen.
+ */
+static INLINE struct pipe_screen *
+sw_screen_wrap(struct pipe_screen *screen)
+{
+ struct sw_winsys *sws;
+ struct pipe_screen *sw_screen;
+
+ sws = wrapper_sw_winsys_warp_pipe_screen(screen);
+ if (!sws)
+ goto err;
+
+ sw_screen = sw_screen_create(sws);
+ if (sw_screen == screen)
+ goto err_winsys;
+
+ return sw_screen;
+
+err_winsys:
+ sws->destroy(sws);
+err:
+ return screen;
+}
+
+#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 9fcc28f4c9..f71ffb7030 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -176,7 +176,11 @@ static const char *primitive_names[] =
"TRIANGLE_FAN",
"QUADS",
"QUAD_STRIP",
- "POLYGON"
+ "POLYGON",
+ "LINES_ADJACENCY",
+ "LINE_STRIP_ADJACENCY",
+ "TRIANGLES_ADJACENCY",
+ "TRIANGLE_STRIP_ADJACENCY"
};
static const char *fs_coord_origin_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index ced9c94f46..90198a4f60 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -109,6 +109,19 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_usage_mask[ind] |= usage_mask;
}
}
+
+ /* check for indirect register reads */
+ if (src->Register.Indirect) {
+ info->indirect_files |= (1 << src->Register.File);
+ }
+ }
+
+ /* check for indirect register writes */
+ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) {
+ const struct tgsi_full_dst_register *dst = &fullinst->Dst[i];
+ if (dst->Register.Indirect) {
+ info->indirect_files |= (1 << dst->Register.File);
+ }
}
info->num_instructions++;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index e75280336f..f8aa90cf06 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -63,6 +63,12 @@ struct tgsi_shader_info
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
+ /**
+ * Bitmask indicating which register files are accessed with
+ * indirect addressing. The bits are (1 << TGSI_FILE_x), etc.
+ */
+ unsigned indirect_files;
+
struct {
unsigned name;
unsigned data[8];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 55fccba4d8..b01d2ff468 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -58,7 +58,7 @@ static boolean is_digit_alpha_underscore( const char *cur )
static char uprcase( char c )
{
if (c >= 'a' && c <= 'z')
- return c += 'A' - 'a';
+ return c + 'A' - 'a';
return c;
}
@@ -732,7 +732,7 @@ parse_optional_swizzle(
else if (uprcase( *cur ) == 'W')
swizzle[i] = TGSI_SWIZZLE_W;
else {
- report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
+ report_error( ctx, "Expected register swizzle component `x', `y', `z' or `w'" );
return FALSE;
}
cur++;
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index dfe2101c2e..0d94aaae95 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -52,9 +52,8 @@
struct blitter_context_priv
{
- struct blitter_context blitter;
+ struct blitter_context base;
- struct pipe_context *pipe; /**< pipe context */
struct pipe_resource *vbuf; /**< quad */
float vertices[4][2][4]; /**< {pos, color} or {pos, texcoord} */
@@ -97,15 +96,25 @@ struct blitter_context_priv
/* Rasterizer state. */
void *rs_state;
- struct pipe_sampler_view *sampler_view;
-
/* Viewport state. */
struct pipe_viewport_state viewport;
/* Clip state. */
struct pipe_clip_state clip;
+
+ /* Destination surface dimensions. */
+ unsigned dst_width;
+ unsigned dst_height;
};
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
+
struct blitter_context *util_blitter_create(struct pipe_context *pipe)
{
struct blitter_context_priv *ctx;
@@ -120,19 +129,20 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
if (!ctx)
return NULL;
- ctx->pipe = pipe;
+ ctx->base.pipe = pipe;
+ ctx->base.draw_rectangle = blitter_draw_rectangle;
/* init state objects for them to be considered invalid */
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
- ctx->blitter.saved_num_sampler_views = ~0;
- ctx->blitter.saved_num_sampler_states = ~0;
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
+ ctx->base.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_vertex_buffers = ~0;
/* blend state objects */
memset(&blend, 0, sizeof(blend));
@@ -219,17 +229,17 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
ctx->vertices[i][0][3] = 1; /*v.w*/
/* create the vertex buffer */
- ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
+ ctx->vbuf = pipe_buffer_create(ctx->base.pipe->screen,
PIPE_BIND_VERTEX_BUFFER,
sizeof(ctx->vertices));
- return &ctx->blitter;
+ return &ctx->base;
}
void util_blitter_destroy(struct blitter_context *blitter)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = blitter->pipe;
int i;
pipe->delete_blend_state(pipe, ctx->blend_write_color);
@@ -260,10 +270,6 @@ void util_blitter_destroy(struct blitter_context *blitter)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
- if (ctx->sampler_view) {
- pipe_sampler_view_reference(&ctx->sampler_view, NULL);
- }
-
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
}
@@ -271,112 +277,117 @@ void util_blitter_destroy(struct blitter_context *blitter)
static void blitter_check_saved_CSOs(struct blitter_context_priv *ctx)
{
/* make sure these CSOs have been saved */
- assert(ctx->blitter.saved_blend_state != INVALID_PTR &&
- ctx->blitter.saved_dsa_state != INVALID_PTR &&
- ctx->blitter.saved_rs_state != INVALID_PTR &&
- ctx->blitter.saved_fs != INVALID_PTR &&
- ctx->blitter.saved_vs != INVALID_PTR &&
- ctx->blitter.saved_velem_state != INVALID_PTR);
+ assert(ctx->base.saved_blend_state != INVALID_PTR &&
+ ctx->base.saved_dsa_state != INVALID_PTR &&
+ ctx->base.saved_rs_state != INVALID_PTR &&
+ ctx->base.saved_fs != INVALID_PTR &&
+ ctx->base.saved_vs != INVALID_PTR &&
+ ctx->base.saved_velem_state != INVALID_PTR);
}
static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
unsigned i;
/* restore the state objects which are always required to be saved */
- pipe->bind_blend_state(pipe, ctx->blitter.saved_blend_state);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->blitter.saved_dsa_state);
- pipe->bind_rasterizer_state(pipe, ctx->blitter.saved_rs_state);
- pipe->bind_fs_state(pipe, ctx->blitter.saved_fs);
- pipe->bind_vs_state(pipe, ctx->blitter.saved_vs);
- pipe->bind_vertex_elements_state(pipe, ctx->blitter.saved_velem_state);
+ pipe->bind_blend_state(pipe, ctx->base.saved_blend_state);
+ pipe->bind_depth_stencil_alpha_state(pipe, ctx->base.saved_dsa_state);
+ pipe->bind_rasterizer_state(pipe, ctx->base.saved_rs_state);
+ pipe->bind_fs_state(pipe, ctx->base.saved_fs);
+ pipe->bind_vs_state(pipe, ctx->base.saved_vs);
+ pipe->bind_vertex_elements_state(pipe, ctx->base.saved_velem_state);
- ctx->blitter.saved_blend_state = INVALID_PTR;
- ctx->blitter.saved_dsa_state = INVALID_PTR;
- ctx->blitter.saved_rs_state = INVALID_PTR;
- ctx->blitter.saved_fs = INVALID_PTR;
- ctx->blitter.saved_vs = INVALID_PTR;
- ctx->blitter.saved_velem_state = INVALID_PTR;
+ ctx->base.saved_blend_state = INVALID_PTR;
+ ctx->base.saved_dsa_state = INVALID_PTR;
+ ctx->base.saved_rs_state = INVALID_PTR;
+ ctx->base.saved_fs = INVALID_PTR;
+ ctx->base.saved_vs = INVALID_PTR;
+ ctx->base.saved_velem_state = INVALID_PTR;
- pipe->set_stencil_ref(pipe, &ctx->blitter.saved_stencil_ref);
+ pipe->set_stencil_ref(pipe, &ctx->base.saved_stencil_ref);
- pipe->set_viewport_state(pipe, &ctx->blitter.saved_viewport);
- pipe->set_clip_state(pipe, &ctx->blitter.saved_clip);
+ pipe->set_viewport_state(pipe, &ctx->base.saved_viewport);
+ pipe->set_clip_state(pipe, &ctx->base.saved_clip);
/* restore the state objects which are required to be saved before copy/fill
*/
- if (ctx->blitter.saved_fb_state.nr_cbufs != ~0) {
- pipe->set_framebuffer_state(pipe, &ctx->blitter.saved_fb_state);
- ctx->blitter.saved_fb_state.nr_cbufs = ~0;
+ if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
+ pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
+ util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
+ ctx->base.saved_fb_state.nr_cbufs = ~0;
}
- if (ctx->blitter.saved_num_sampler_states != ~0) {
+ if (ctx->base.saved_num_sampler_states != ~0) {
pipe->bind_fragment_sampler_states(pipe,
- ctx->blitter.saved_num_sampler_states,
- ctx->blitter.saved_sampler_states);
- ctx->blitter.saved_num_sampler_states = ~0;
+ ctx->base.saved_num_sampler_states,
+ ctx->base.saved_sampler_states);
+ ctx->base.saved_num_sampler_states = ~0;
}
- if (ctx->blitter.saved_num_sampler_views != ~0) {
+ if (ctx->base.saved_num_sampler_views != ~0) {
pipe->set_fragment_sampler_views(pipe,
- ctx->blitter.saved_num_sampler_views,
- ctx->blitter.saved_sampler_views);
- ctx->blitter.saved_num_sampler_views = ~0;
+ ctx->base.saved_num_sampler_views,
+ ctx->base.saved_sampler_views);
+
+ for (i = 0; i < ctx->base.saved_num_sampler_views; i++)
+ pipe_sampler_view_reference(&ctx->base.saved_sampler_views[i],
+ NULL);
+
+ ctx->base.saved_num_sampler_views = ~0;
}
- if (ctx->blitter.saved_num_vertex_buffers != ~0) {
+ if (ctx->base.saved_num_vertex_buffers != ~0) {
pipe->set_vertex_buffers(pipe,
- ctx->blitter.saved_num_vertex_buffers,
- ctx->blitter.saved_vertex_buffers);
+ ctx->base.saved_num_vertex_buffers,
+ ctx->base.saved_vertex_buffers);
- for (i = 0; i < ctx->blitter.saved_num_vertex_buffers; i++) {
- if (ctx->blitter.saved_vertex_buffers[i].buffer) {
- pipe_resource_reference(&ctx->blitter.saved_vertex_buffers[i].buffer,
+ for (i = 0; i < ctx->base.saved_num_vertex_buffers; i++) {
+ if (ctx->base.saved_vertex_buffers[i].buffer) {
+ pipe_resource_reference(&ctx->base.saved_vertex_buffers[i].buffer,
NULL);
}
}
- ctx->blitter.saved_num_vertex_buffers = ~0;
+ ctx->base.saved_num_vertex_buffers = ~0;
}
}
static void blitter_set_rectangle(struct blitter_context_priv *ctx,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
- unsigned width, unsigned height,
float depth)
{
int i;
/* set vertex positions */
- ctx->vertices[0][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v0.x*/
- ctx->vertices[0][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v0.y*/
+ ctx->vertices[0][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v0.x*/
+ ctx->vertices[0][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v0.y*/
- ctx->vertices[1][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v1.x*/
- ctx->vertices[1][0][1] = (float)y1 / height * 2.0f - 1.0f; /*v1.y*/
+ ctx->vertices[1][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v1.x*/
+ ctx->vertices[1][0][1] = (float)y1 / ctx->dst_height * 2.0f - 1.0f; /*v1.y*/
- ctx->vertices[2][0][0] = (float)x2 / width * 2.0f - 1.0f; /*v2.x*/
- ctx->vertices[2][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v2.y*/
+ ctx->vertices[2][0][0] = (float)x2 / ctx->dst_width * 2.0f - 1.0f; /*v2.x*/
+ ctx->vertices[2][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v2.y*/
- ctx->vertices[3][0][0] = (float)x1 / width * 2.0f - 1.0f; /*v3.x*/
- ctx->vertices[3][0][1] = (float)y2 / height * 2.0f - 1.0f; /*v3.y*/
+ ctx->vertices[3][0][0] = (float)x1 / ctx->dst_width * 2.0f - 1.0f; /*v3.x*/
+ ctx->vertices[3][0][1] = (float)y2 / ctx->dst_height * 2.0f - 1.0f; /*v3.y*/
for (i = 0; i < 4; i++)
ctx->vertices[i][0][2] = depth; /*z*/
/* viewport */
- ctx->viewport.scale[0] = 0.5f * width;
- ctx->viewport.scale[1] = 0.5f * height;
+ ctx->viewport.scale[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.scale[1] = 0.5f * ctx->dst_height;
ctx->viewport.scale[2] = 1.0f;
ctx->viewport.scale[3] = 1.0f;
- ctx->viewport.translate[0] = 0.5f * width;
- ctx->viewport.translate[1] = 0.5f * height;
+ ctx->viewport.translate[0] = 0.5f * ctx->dst_width;
+ ctx->viewport.translate[1] = 0.5f * ctx->dst_height;
ctx->viewport.translate[2] = 0.0f;
ctx->viewport.translate[3] = 0.0f;
- ctx->pipe->set_viewport_state(ctx->pipe, &ctx->viewport);
+ ctx->base.pipe->set_viewport_state(ctx->base.pipe, &ctx->viewport);
/* clip */
- ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip);
+ ctx->base.pipe->set_clip_state(ctx->base.pipe, &ctx->clip);
}
static void blitter_set_clear_color(struct blitter_context_priv *ctx,
@@ -401,29 +412,45 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
}
}
+static void get_normalized_texcoords(struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float out[4])
+{
+ out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
+ out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+ out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
+ out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+}
+
+static void set_texcoords_in_vertices(const float coord[4],
+ float *out, unsigned stride)
+{
+ out[0] = coord[0]; /*t0.s*/
+ out[1] = coord[1]; /*t0.t*/
+ out += stride;
+ out[0] = coord[2]; /*t1.s*/
+ out[1] = coord[1]; /*t1.t*/
+ out += stride;
+ out[0] = coord[2]; /*t2.s*/
+ out[1] = coord[3]; /*t2.t*/
+ out += stride;
+ out[0] = coord[0]; /*t3.s*/
+ out[1] = coord[3]; /*t3.t*/
+}
+
static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
struct pipe_resource *src,
struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2)
{
- int i;
- float s1 = x1 / (float)u_minify(src->width0, subsrc.level);
- float t1 = y1 / (float)u_minify(src->height0, subsrc.level);
- float s2 = x2 / (float)u_minify(src->width0, subsrc.level);
- float t2 = y2 / (float)u_minify(src->height0, subsrc.level);
-
- ctx->vertices[0][1][0] = s1; /*t0.s*/
- ctx->vertices[0][1][1] = t1; /*t0.t*/
-
- ctx->vertices[1][1][0] = s2; /*t1.s*/
- ctx->vertices[1][1][1] = t1; /*t1.t*/
-
- ctx->vertices[2][1][0] = s2; /*t2.s*/
- ctx->vertices[2][1][1] = t2; /*t2.t*/
+ unsigned i;
+ float coord[4];
- ctx->vertices[3][1][0] = s1; /*t3.s*/
- ctx->vertices[3][1][1] = t2; /*t3.t*/
+ get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
ctx->vertices[i][1][2] = 0; /*r*/
@@ -454,20 +481,11 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
unsigned x2, unsigned y2)
{
int i;
- float s1 = x1 / (float)u_minify(src->width0, subsrc.level);
- float t1 = y1 / (float)u_minify(src->height0, subsrc.level);
- float s2 = x2 / (float)u_minify(src->width0, subsrc.level);
- float t2 = y2 / (float)u_minify(src->height0, subsrc.level);
+ float coord[4];
float st[4][2];
- st[0][0] = s1;
- st[0][1] = t1;
- st[1][0] = s2;
- st[1][1] = t1;
- st[2][0] = s2;
- st[2][1] = t2;
- st[3][0] = s1;
- st[3][1] = t2;
+ get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ set_texcoords_in_vertices(coord, &st[0][0], 2);
util_map_texcoords2d_onto_cubemap(subsrc.face,
/* pointer, stride in floats */
@@ -478,9 +496,16 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
ctx->vertices[i][1][3] = 1; /*q*/
}
+static void blitter_set_dst_dimensions(struct blitter_context_priv *ctx,
+ unsigned width, unsigned height)
+{
+ ctx->dst_width = width;
+ ctx->dst_height = height;
+}
+
static void blitter_draw_quad(struct blitter_context_priv *ctx)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
/* write vertices and draw them */
pipe_buffer_write(pipe, ctx->vbuf,
@@ -495,7 +520,7 @@ static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
int miplevel)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
@@ -518,7 +543,7 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
static INLINE
void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
@@ -531,7 +556,7 @@ void *blitter_get_fs_col(struct blitter_context_priv *ctx, unsigned num_cbufs)
/** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
static unsigned
-pipe_tex_to_tgsi_tex(unsigned pipe_tex_target)
+pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
{
switch (pipe_tex_target) {
case PIPE_TEXTURE_1D:
@@ -553,7 +578,7 @@ static INLINE
void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
@@ -572,7 +597,7 @@ static INLINE
void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
unsigned tex_target)
{
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
assert(tex_target < PIPE_MAX_TEXTURE_TYPES);
@@ -588,6 +613,31 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
return ctx->fs_texfetch_depth[tex_target];
}
+static void blitter_draw_rectangle(struct blitter_context *blitter,
+ unsigned x1, unsigned y1,
+ unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4])
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+
+ switch (type) {
+ case UTIL_BLITTER_ATTRIB_COLOR:
+ blitter_set_clear_color(ctx, attrib);
+ break;
+
+ case UTIL_BLITTER_ATTRIB_TEXCOORD:
+ set_texcoords_in_vertices(attrib, &ctx->vertices[0][1][0], 8);
+ break;
+
+ default:;
+ }
+
+ blitter_set_rectangle(ctx, x1, y1, x2, y2, depth);
+ blitter_draw_quad(ctx);
+}
+
void util_blitter_clear(struct blitter_context *blitter,
unsigned width, unsigned height,
unsigned num_cbufs,
@@ -596,7 +646,7 @@ void util_blitter_clear(struct blitter_context *blitter,
double depth, unsigned stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_stencil_ref sr = { { 0 } };
assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);
@@ -630,9 +680,9 @@ void util_blitter_clear(struct blitter_context *blitter,
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs));
pipe->bind_vs_state(pipe, ctx->vs_col);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, width, height, depth);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, width, height);
+ blitter->draw_rectangle(blitter, 0, 0, width, height, depth,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
@@ -654,7 +704,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
boolean ignore_stencil)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_screen *screen = pipe->screen;
struct pipe_surface *dstsurf;
struct pipe_framebuffer_state fb_state;
@@ -736,11 +786,6 @@ void util_blitter_copy_region(struct blitter_context *blitter,
u_sampler_view_default_template(&viewTempl, src, src->format);
view = pipe->create_sampler_view(pipe, src, &viewTempl);
- if (ctx->sampler_view) {
- pipe_sampler_view_reference(&ctx->sampler_view, NULL);
- }
- ctx->sampler_view = view;
-
/* Set rasterizer state, shaders, and textures. */
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
@@ -750,32 +795,49 @@ void util_blitter_copy_region(struct blitter_context *blitter,
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
- /* Set texture coordinates. */
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+
switch (src->target) {
+ /* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
- blitter_set_texcoords_2d(ctx, src, subsrc,
- srcx, srcy, srcx+width, srcy+height);
+ {
+ /* Set texture coordinates. */
+ float coord[4];
+ get_normalized_texcoords(src, subsrc, srcx, srcy,
+ srcx+width, srcy+height, coord);
+
+ /* Draw. */
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_TEXCOORD, coord);
+ }
break;
+
+ /* Draw the quad with the generic codepath. */
case PIPE_TEXTURE_3D:
- blitter_set_texcoords_3d(ctx, src, subsrc, srcz,
- srcx, srcy, srcx+width, srcy+height);
- break;
case PIPE_TEXTURE_CUBE:
- blitter_set_texcoords_cube(ctx, src, subsrc,
- srcx, srcy, srcx+width, srcy+height);
+ /* Set texture coordinates. */
+ if (src->target == PIPE_TEXTURE_3D)
+ blitter_set_texcoords_3d(ctx, src, subsrc, srcz,
+ srcx, srcy, srcx+width, srcy+height);
+ else
+ blitter_set_texcoords_cube(ctx, src, subsrc,
+ srcx, srcy, srcx+width, srcy+height);
+
+ /* Draw. */
+ blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height, 0);
+ blitter_draw_quad(ctx);
break;
+
default:
assert(0);
return;
}
- blitter_set_rectangle(ctx, dstx, dsty, dstx+width, dsty+height,
- dstsurf->width, dstsurf->height, 0);
- blitter_draw_quad(ctx);
blitter_restore_CSOs(ctx);
pipe_surface_reference(&dstsurf, NULL);
+ pipe_sampler_view_reference(&view, NULL);
}
/* Clear a region of a color surface to a constant value. */
@@ -786,7 +848,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
assert(dstsurf->texture);
@@ -813,9 +875,9 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
fb_state.zsbuf = 0;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_clear_color(ctx, rgba);
- blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, 0);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
+ UTIL_BLITTER_ATTRIB_COLOR, rgba);
blitter_restore_CSOs(ctx);
}
@@ -829,7 +891,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
unsigned width, unsigned height)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
- struct pipe_context *pipe = ctx->pipe;
+ struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
struct pipe_stencil_ref sr = { { 0 } };
@@ -873,7 +935,8 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
fb_state.zsbuf = dstsurf;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_rectangle(ctx, 0, 0, width, height, dstsurf->width, dstsurf->height, depth);
- blitter_draw_quad(ctx);
+ blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
+ blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, depth,
+ UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 22849280ab..ba3f92eca8 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -39,9 +39,48 @@ extern "C" {
struct pipe_context;
+enum blitter_attrib_type {
+ UTIL_BLITTER_ATTRIB_NONE,
+ UTIL_BLITTER_ATTRIB_COLOR,
+ UTIL_BLITTER_ATTRIB_TEXCOORD
+};
+
struct blitter_context
{
+ /**
+ * Draw a rectangle.
+ *
+ * \param x1 An X coordinate of the top-left corner.
+ * \param y1 A Y coordinate of the top-left corner.
+ * \param x2 An X coordinate of the bottom-right corner.
+ * \param y2 A Y coordinate of the bottom-right corner.
+ * \param depth A depth which the rectangle is rendered at.
+ *
+ * \param type Semantics of the attributes "attrib".
+ * If type is UTIL_BLITTER_ATTRIB_NONE, ignore them.
+ * If type is UTIL_BLITTER_ATTRIB_COLOR, the attributes
+ * make up a constant RGBA color, and should go to the COLOR0
+ * varying slot of a fragment shader.
+ * If type is UTIL_BLITTER_ATTRIB_TEXCOORD, {a1, a2} and
+ * {a3, a4} specify top-left and bottom-right texture
+ * coordinates of the rectangle, respectively, and should go
+ * to the GENERIC0 varying slot of a fragment shader.
+ *
+ * \param attrib See type.
+ *
+ * \note A driver may optionally override this callback to implement
+ * a specialized hardware path for drawing a rectangle, e.g. using
+ * a rectangular point sprite.
+ */
+ void (*draw_rectangle)(struct blitter_context *blitter,
+ unsigned x1, unsigned y1, unsigned x2, unsigned y2,
+ float depth,
+ enum blitter_attrib_type type,
+ const float attrib[4]);
+
/* Private members, really. */
+ struct pipe_context *pipe; /**< pipe context */
+
void *saved_blend_state; /**< blend state */
void *saved_dsa_state; /**< depth stencil alpha state */
void *saved_velem_state; /**< vertex elements state */
@@ -73,6 +112,15 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe);
*/
void util_blitter_destroy(struct blitter_context *blitter);
+/**
+ * Return the pipe context associated with a blitter context.
+ */
+static INLINE
+struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
+{
+ return blitter->pipe;
+}
+
/*
* These CSOs must be saved before any of the following functions is called:
* - blend state
@@ -208,11 +256,45 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
blitter->saved_vs = vs;
}
+/* XXX This should probably be moved elsewhere. */
+static INLINE
+void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst,
+ const struct pipe_framebuffer_state *src)
+{
+ unsigned i;
+
+ if (src) {
+ /* Reference all surfaces. */
+ for (i = 0; i < src->nr_cbufs; i++) {
+ pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
+ }
+ for (; i < dst->nr_cbufs; i++) {
+ pipe_surface_reference(&dst->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&dst->zsbuf, src->zsbuf);
+
+ dst->nr_cbufs = src->nr_cbufs;
+ dst->width = src->width;
+ dst->height = src->height;
+ } else {
+ /* Set all surfaces to NULL. */
+ for (i = 0; i < dst->nr_cbufs; i++) {
+ pipe_surface_reference(&dst->cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&dst->zsbuf, NULL);
+
+ dst->nr_cbufs = 0;
+ }
+}
+
static INLINE
void util_blitter_save_framebuffer(struct blitter_context *blitter,
- struct pipe_framebuffer_state *state)
+ const struct pipe_framebuffer_state *state)
{
- blitter->saved_fb_state = *state;
+ blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
+ util_assign_framebuffer_state(&blitter->saved_fb_state, state);
}
static INLINE
@@ -247,12 +329,13 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
int num_views,
struct pipe_sampler_view **views)
{
+ unsigned i;
assert(num_views <= Elements(blitter->saved_sampler_views));
blitter->saved_num_sampler_views = num_views;
- memcpy(blitter->saved_sampler_views,
- views,
- num_views * sizeof(struct pipe_sampler_view *));
+ for (i = 0; i < num_views; i++)
+ pipe_sampler_view_reference(&blitter->saved_sampler_views[i],
+ views[i]);
}
static INLINE void
diff --git a/src/gallium/auxiliary/util/u_caps.c b/src/gallium/auxiliary/util/u_caps.c
index 294ee37033..94d5bd3027 100644
--- a/src/gallium/auxiliary/util/u_caps.c
+++ b/src/gallium/auxiliary/util/u_caps.c
@@ -186,6 +186,22 @@ static unsigned caps_opengl_2_1[] = {
/* OpenGL 3.0 */
/* UTIL_CHECK_INT(MAX_RENDER_TARGETS, 8), */
+/* Shader Model 3 */
+static unsigned caps_sm3[] = {
+ UTIL_CHECK_INT(MAX_FS_INSTRUCTIONS, 512),
+ UTIL_CHECK_INT(MAX_FS_INPUTS, 10),
+ UTIL_CHECK_INT(MAX_FS_TEMPS, 32),
+ UTIL_CHECK_INT(MAX_FS_ADDRS, 1),
+ UTIL_CHECK_INT(MAX_FS_CONSTS, 224),
+
+ UTIL_CHECK_INT(MAX_VS_INSTRUCTIONS, 512),
+ UTIL_CHECK_INT(MAX_VS_INPUTS, 16),
+ UTIL_CHECK_INT(MAX_VS_TEMPS, 32),
+ UTIL_CHECK_INT(MAX_VS_ADDRS, 2),
+ UTIL_CHECK_INT(MAX_VS_CONSTS, 256),
+
+ UTIL_CHECK_TERMINATE
+};
/**
* Demo function which checks against theoretical caps needed for different APIs.
@@ -203,6 +219,7 @@ void util_caps_demo_print(struct pipe_screen *screen)
{"DX 11", caps_dx_11},
{"OpenGL 2.1", caps_opengl_2_1},
/* {"OpenGL 3.0", caps_opengl_3_0},*/
+ {"SM3", caps_sm3},
{NULL, NULL}
};
int i, out = 0;
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index a08241971c..23d33af4e4 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -38,7 +38,7 @@
#include "u_cpu_detect.h"
#if defined(PIPE_ARCH_PPC)
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
#include <sys/sysctl.h>
#else
#include <signal.h>
@@ -132,7 +132,7 @@ win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
#endif /* PIPE_ARCH_X86 */
-#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_DARWIN)
+#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -153,7 +153,7 @@ sigill_handler(int sig)
static void
check_os_altivec_support(void)
{
-#if defined(PIPE_OS_DARWIN)
+#if defined(PIPE_OS_APPLE)
int sels[2] = {CTL_HW, HW_VECTORUNIT};
int has_vu = 0;
int len = sizeof (has_vu);
@@ -166,8 +166,8 @@ check_os_altivec_support(void)
util_cpu_caps.has_altivec = 1;
}
}
-#else /* !PIPE_OS_DARWIN */
- /* no Darwin, do it the brute-force way */
+#else /* !PIPE_OS_APPLE */
+ /* not on Apple/Darwin, do it the brute-force way */
/* this is borrowed from the libmpeg2 library */
signal(SIGILL, sigill_handler);
if (setjmp(__lv_powerpc_jmpbuf)) {
@@ -184,7 +184,7 @@ check_os_altivec_support(void)
signal(SIGILL, SIG_DFL);
util_cpu_caps.has_altivec = 1;
}
-#endif /* PIPE_OS_DARWIN */
+#endif /* !PIPE_OS_APPLE */
}
#endif /* PIPE_ARCH_PPC */
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 5e373ff24c..ad162558bc 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -190,11 +190,11 @@ debug_get_flags_option(const char *name,
result = dfault;
else if (!util_strcmp(str, "help")) {
result = dfault;
- debug_printf("%s: help for %s:\n", __FUNCTION__, name);
+ _debug_printf("%s: help for %s:\n", __FUNCTION__, name);
for (; flags->name; ++flags)
namealign = MAX2(namealign, strlen(flags->name));
for (flags = orig; flags->name; ++flags)
- debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+ _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
(int)sizeof(unsigned long)*CHAR_BIT/4, flags->value,
flags->desc ? " " : "", flags->desc ? flags->desc : "");
}
diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h
index 53bb1342dd..42adb1f069 100644
--- a/src/gallium/auxiliary/util/u_double_list.h
+++ b/src/gallium/auxiliary/util/u_double_list.h
@@ -98,5 +98,20 @@ struct list_head
#define LIST_IS_EMPTY(__list) \
((__list)->next == (__list))
-
+#ifndef container_of
+#define container_of(ptr, sample, member) \
+ (void *)((char *)(ptr) \
+ - ((char *)&(sample)->member - (char *)(sample)))
+#endif
+
+#define LIST_FOR_EACH_ENTRY(pos, head, member) \
+ for (pos = container_of((head)->next, pos, member); \
+ &pos->member != (head); \
+ pos = container_of(pos->member.next, pos, member))
+
+#define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \
+ for (pos = container_of((head)->next, pos, member), \
+ storage = container_of(pos->member.next, pos, member); \
+ &pos->member != (head); \
+ pos = storage, storage = container_of(storage->member.next, storage, member))
#endif /*_U_DOUBLE_LIST_H_*/
diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c
index 3168a1fab4..43d09f1960 100644
--- a/src/gallium/auxiliary/util/u_format.c
+++ b/src/gallium/auxiliary/util/u_format.c
@@ -120,7 +120,7 @@ util_format_write_4ub(enum pipe_format format, const uint8_t *src, unsigned src_
}
-static INLINE boolean
+boolean
util_format_fits_8unorm(const struct util_format_description *format_desc)
{
unsigned chan;
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index fd95bea1a7..38254b1096 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -213,6 +213,16 @@ struct util_format_description
unsigned width, unsigned height);
/**
+ * Fetch a single pixel (i, j) from a block.
+ *
+ * XXX: Only defined for a very few select formats.
+ */
+ void
+ (*fetch_rgba_8unorm)(uint8_t *dst,
+ const uint8_t *src,
+ unsigned i, unsigned j);
+
+ /**
* Unpack pixel blocks to R32G32B32A32_FLOAT.
* Note: strides are in bytes.
*
@@ -663,6 +673,9 @@ util_format_write_4ub(enum pipe_format format,
* Generic format conversion;
*/
+boolean
+util_format_fits_8unorm(const struct util_format_description *format_desc);
+
void
util_format_translate(enum pipe_format dst_format,
void *dst, unsigned dst_stride,
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index ae9a598197..f0b407b8b8 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -132,12 +132,17 @@ def write_format_table(formats):
if format.colorspace != ZS:
print " &util_format_%s_unpack_rgba_8unorm," % format.short_name()
print " &util_format_%s_pack_rgba_8unorm," % format.short_name()
+ if format.layout == 's3tc':
+ print " &util_format_%s_fetch_rgba_8unorm," % format.short_name()
+ else:
+ print " NULL, /* fetch_rgba_8unorm */"
print " &util_format_%s_unpack_rgba_float," % format.short_name()
print " &util_format_%s_pack_rgba_float," % format.short_name()
print " &util_format_%s_fetch_rgba_float," % format.short_name()
else:
print " NULL, /* unpack_rgba_8unorm */"
print " NULL, /* pack_rgba_8unorm */"
+ print " NULL, /* fetch_rgba_8unorm */"
print " NULL, /* unpack_rgba_float */"
print " NULL, /* pack_rgba_float */"
print " NULL, /* fetch_rgba_float */"
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 6370e77986..fe19466436 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -567,12 +567,26 @@ util_bswap16(uint16_t n)
#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+/**
+ * Align a value, only works pot alignemnts.
+ */
static INLINE int
align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
}
+/**
+ * Works like align but on npot alignments.
+ */
+static INLINE size_t
+util_align_npot(size_t value, size_t alignment)
+{
+ if (value % alignment)
+ return value + (alignment - (value % alignment));
+ return value;
+}
+
static INLINE unsigned
u_minify(unsigned value, unsigned levels)
{
diff --git a/src/gallium/auxiliary/util/u_mempool.c b/src/gallium/auxiliary/util/u_mempool.c
new file mode 100644
index 0000000000..1f336b39a1
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_mempool.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_mempool.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include <stdio.h>
+
+#define UTIL_MEMPOOL_MAGIC 0xcafe4321
+
+/* The block is either allocated memory or free space. */
+struct util_mempool_block {
+ /* The header. */
+ /* The first next free block. */
+ struct util_mempool_block *next_free;
+
+ intptr_t magic;
+
+ /* Memory after the last member is dedicated to the block itself.
+ * The allocated size is always larger than this structure. */
+};
+
+static struct util_mempool_block *
+util_mempool_get_block(struct util_mempool *pool,
+ struct util_mempool_page *page, unsigned index)
+{
+ return (struct util_mempool_block*)
+ ((uint8_t*)page + sizeof(struct util_mempool_page) +
+ (pool->block_size * index));
+}
+
+static void util_mempool_add_new_page(struct util_mempool *pool)
+{
+ struct util_mempool_page *page;
+ struct util_mempool_block *block;
+ int i;
+
+ page = MALLOC(pool->page_size);
+ insert_at_tail(&pool->list, page);
+
+ /* Mark all blocks as free. */
+ for (i = 0; i < pool->num_blocks-1; i++) {
+ block = util_mempool_get_block(pool, page, i);
+ block->next_free = util_mempool_get_block(pool, page, i+1);
+ block->magic = UTIL_MEMPOOL_MAGIC;
+ }
+
+ block = util_mempool_get_block(pool, page, pool->num_blocks-1);
+ block->next_free = pool->first_free;
+ block->magic = UTIL_MEMPOOL_MAGIC;
+ pool->first_free = util_mempool_get_block(pool, page, 0);
+ pool->num_pages++;
+
+#if 0
+ fprintf(stderr, "New page! Num of pages: %i\n", pool->num_pages);
+#endif
+}
+
+static void *util_mempool_malloc_st(struct util_mempool *pool)
+{
+ struct util_mempool_block *block;
+
+ if (!pool->first_free)
+ util_mempool_add_new_page(pool);
+
+ block = pool->first_free;
+ assert(block->magic == UTIL_MEMPOOL_MAGIC);
+ pool->first_free = block->next_free;
+
+ return (uint8_t*)block + sizeof(struct util_mempool_block);
+}
+
+static void util_mempool_free_st(struct util_mempool *pool, void *ptr)
+{
+ struct util_mempool_block *block =
+ (struct util_mempool_block*)
+ ((uint8_t*)ptr - sizeof(struct util_mempool_block));
+
+ assert(block->magic == UTIL_MEMPOOL_MAGIC);
+ block->next_free = pool->first_free;
+ pool->first_free = block;
+}
+
+static void *util_mempool_malloc_mt(struct util_mempool *pool)
+{
+ void *mem;
+
+ pipe_mutex_lock(pool->mutex);
+ mem = util_mempool_malloc_st(pool);
+ pipe_mutex_unlock(pool->mutex);
+ return mem;
+}
+
+static void util_mempool_free_mt(struct util_mempool *pool, void *ptr)
+{
+ pipe_mutex_lock(pool->mutex);
+ util_mempool_free_st(pool, ptr);
+ pipe_mutex_unlock(pool->mutex);
+}
+
+void util_mempool_set_thread_safety(struct util_mempool *pool,
+ enum util_mempool_threading threading)
+{
+ pool->threading = threading;
+
+ if (threading) {
+ pool->malloc = util_mempool_malloc_mt;
+ pool->free = util_mempool_free_mt;
+ } else {
+ pool->malloc = util_mempool_malloc_st;
+ pool->free = util_mempool_free_st;
+ }
+}
+
+void util_mempool_create(struct util_mempool *pool,
+ unsigned item_size,
+ unsigned num_blocks,
+ enum util_mempool_threading threading)
+{
+ item_size = align(item_size, sizeof(intptr_t));
+
+ pool->num_pages = 0;
+ pool->num_blocks = num_blocks;
+ pool->block_size = sizeof(struct util_mempool_block) + item_size;
+ pool->block_size = align(pool->block_size, sizeof(intptr_t));
+ pool->page_size = sizeof(struct util_mempool_page) +
+ num_blocks * pool->block_size;
+ pool->first_free = NULL;
+
+ make_empty_list(&pool->list);
+
+ pipe_mutex_init(pool->mutex);
+
+ util_mempool_set_thread_safety(pool, threading);
+}
+
+void util_mempool_destroy(struct util_mempool *pool)
+{
+ struct util_mempool_page *page, *temp;
+
+ foreach_s(page, temp, &pool->list) {
+ remove_from_list(page);
+ FREE(page);
+ }
+
+ pipe_mutex_destroy(pool->mutex);
+}
diff --git a/src/gallium/auxiliary/util/u_mempool.h b/src/gallium/auxiliary/util/u_mempool.h
new file mode 100644
index 0000000000..a5b5d6a9b7
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_mempool.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * @file
+ * Simple memory pool for equally sized memory allocations.
+ * util_mempool_malloc and util_mempool_free are in O(1).
+ *
+ * Good for allocations which have very low lifetime and are allocated
+ * and freed very often. Use a profiler first!
+ *
+ * Candidates: get_transfer, user_buffer_create
+ *
+ * @author Marek Olšák
+ */
+
+#ifndef U_MEMPOOL_H
+#define U_MEMPOOL_H
+
+#include "os/os_thread.h"
+
+enum util_mempool_threading {
+ UTIL_MEMPOOL_SINGLETHREADED = FALSE,
+ UTIL_MEMPOOL_MULTITHREADED = TRUE
+};
+
+/* The page is an array of blocks (allocations). */
+struct util_mempool_page {
+ /* The header (linked-list pointers). */
+ struct util_mempool_page *prev, *next;
+
+ /* Memory after the last member is dedicated to the page itself.
+ * The allocated size is always larger than this structure. */
+};
+
+struct util_mempool {
+ /* Public members. */
+ void *(*malloc)(struct util_mempool *pool);
+ void (*free)(struct util_mempool *pool, void *ptr);
+
+ /* Private members. */
+ struct util_mempool_block *first_free;
+
+ struct util_mempool_page list;
+
+ unsigned block_size;
+ unsigned page_size;
+ unsigned num_blocks;
+ unsigned num_pages;
+ enum util_mempool_threading threading;
+
+ pipe_mutex mutex;
+};
+
+void util_mempool_create(struct util_mempool *pool,
+ unsigned item_size,
+ unsigned num_blocks,
+ enum util_mempool_threading threading);
+
+void util_mempool_destroy(struct util_mempool *pool);
+
+void util_mempool_set_thread_safety(struct util_mempool *pool,
+ enum util_mempool_threading threading);
+
+#define util_mempool_malloc(pool) (pool)->malloc(pool)
+#define util_mempool_free(pool, ptr) (pool)->free(pool, ptr)
+
+#endif
diff --git a/src/gallium/auxiliary/util/u_network.c b/src/gallium/auxiliary/util/u_network.c
index 87ee0e4768..77f2c5fc7d 100644
--- a/src/gallium/auxiliary/util/u_network.c
+++ b/src/gallium/auxiliary/util/u_network.c
@@ -6,7 +6,7 @@
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include <winsock2.h>
# include <windows.h>
-#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_HAIKU) || defined(PIPE_OS_APPLE) || defined(PIPE_OS_CYGWIN)
# include <sys/socket.h>
# include <netinet/in.h>
# include <unistd.h>