summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorKeith Whitwell <keithw@vmware.com>2010-10-17 19:03:42 -0700
committerKeith Whitwell <keithw@vmware.com>2010-10-17 19:09:42 -0700
commit0072acd447dc6be652e63752e50215c3105322c8 (patch)
tree847d1763b54772d336a04e606f8248291c3092b7 /src/gallium/auxiliary
parent543fb77ddece7e1806e8eaa0d65bb2a945ef9a75 (diff)
parentca2b2ac131933b4171b519813df1aaa3a81621cd (diff)
Merge remote branch 'origin/master' into lp-setup-llvm
Conflicts: src/gallium/drivers/llvmpipe/lp_setup_coef.c src/gallium/drivers/llvmpipe/lp_setup_coef.h src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c src/gallium/drivers/llvmpipe/lp_setup_point.c src/gallium/drivers/llvmpipe/lp_setup_tri.c src/gallium/drivers/llvmpipe/lp_state_derived.c src/gallium/drivers/llvmpipe/lp_state_fs.h
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile3
-rw-r--r--src/gallium/auxiliary/SConscript3
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c67
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h19
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.c73
-rw-r--r--src/gallium/auxiliary/draw/draw_fs.h42
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c49
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h24
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm_sample.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aapoint.c8
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c49
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c118
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h17
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c511
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.h23
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_conv.c218
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c19
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.c604
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_flow.h59
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c64
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.c1
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_init.h6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_logic.c26
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp10
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.c21
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_printf.h4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_quad.c28
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c755
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h135
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c621
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h9
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c694
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.c77
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_swizzle.h8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h77
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c479
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c72
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c33
-rw-r--r--src/gallium/auxiliary/rbug/rbug_context.c20
-rw-r--r--src/gallium/auxiliary/rbug/rbug_core.c10
-rw-r--r--src/gallium/auxiliary/rbug/rbug_demarshal.c64
-rw-r--r--src/gallium/auxiliary/rbug/rbug_proto.h5
-rw-r--r--src/gallium/auxiliary/rbug/rbug_shader.c12
-rw-r--r--src/gallium/auxiliary/rbug/rbug_texture.c14
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_execmem.c1
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_sw_helper.h39
-rw-r--r--src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h18
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c52
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c6
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c56
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c16
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h24
-rw-r--r--src/gallium/auxiliary/util/u_atomic.h47
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c47
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h6
-rw-r--r--src/gallium/auxiliary/util/u_format.csv3
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.c53
-rw-r--r--src/gallium/auxiliary/util/u_format_zs.h16
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.c127
-rw-r--r--src/gallium/auxiliary/util/u_index_modify.h41
-rw-r--r--src/gallium/auxiliary/util/u_math.h5
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h52
-rw-r--r--src/gallium/auxiliary/util/u_simple_list.h2
-rw-r--r--src/gallium/auxiliary/util/u_sse.h156
-rw-r--r--src/gallium/auxiliary/util/u_surface.c2
-rw-r--r--src/gallium/auxiliary/util/u_tile.c84
73 files changed, 4166 insertions, 1894 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 2de764c4ee..abd33f6eef 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -8,6 +8,7 @@ C_SOURCES = \
cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
+ draw/draw_fs.c \
draw/draw_gs.c \
draw/draw_pipe.c \
draw/draw_pipe_aaline.c \
@@ -121,6 +122,7 @@ C_SOURCES = \
util/u_handle_table.c \
util/u_hash.c \
util/u_hash_table.c \
+ util/u_index_modify.c \
util/u_keymap.c \
util/u_linear.c \
util/u_linkage.c \
@@ -174,6 +176,7 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_struct.c \
gallivm/lp_bld_swizzle.c \
gallivm/lp_bld_tgsi_aos.c \
+ gallivm/lp_bld_tgsi_info.c \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 294df30094..94cd74424a 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -54,6 +54,7 @@ source = [
'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
+ 'draw/draw_fs.c',
'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
@@ -170,6 +171,7 @@ source = [
'util/u_handle_table.c',
'util/u_hash.c',
'util/u_hash_table.c',
+ 'util/u_index_modify.c',
'util/u_keymap.c',
'util/u_linear.c',
'util/u_linkage.c',
@@ -225,6 +227,7 @@ if env['llvm']:
'gallivm/lp_bld_struct.c',
'gallivm/lp_bld_swizzle.c',
'gallivm/lp_bld_tgsi_aos.c',
+ 'gallivm/lp_bld_tgsi_info.c',
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 937b093479..40f654643b 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -413,6 +413,42 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
}
+
+/**
+ * Allocate an extra vertex/geometry shader vertex attribute.
+ * This is used by some of the optional draw module stages such
+ * as wide_point which may need to allocate additional generic/texcoord
+ * attributes.
+ */
+int
+draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index)
+{
+ const int num_outputs = draw_current_shader_outputs(draw);
+ const int n = draw->extra_shader_outputs.num;
+
+ assert(n < Elements(draw->extra_shader_outputs.semantic_name));
+
+ draw->extra_shader_outputs.semantic_name[n] = semantic_name;
+ draw->extra_shader_outputs.semantic_index[n] = semantic_index;
+ draw->extra_shader_outputs.slot[n] = num_outputs + n;
+ draw->extra_shader_outputs.num++;
+
+ return draw->extra_shader_outputs.slot[n];
+}
+
+
+/**
+ * Remove all extra vertex attributes that were allocated with
+ * draw_alloc_extra_vertex_attrib().
+ */
+void
+draw_remove_extra_vertex_attribs(struct draw_context *draw)
+{
+ draw->extra_shader_outputs.num = 0;
+}
+
+
/**
* Ask the draw module for the location/slot of the given vertex attribute in
* a post-transformed vertex.
@@ -446,12 +482,12 @@ draw_find_shader_output(const struct draw_context *draw,
return i;
}
- /* XXX there may be more than one extra vertex attrib.
- * For example, simulated gl_FragCoord and gl_PointCoord.
- */
- if (draw->extra_shader_outputs.semantic_name == semantic_name &&
- draw->extra_shader_outputs.semantic_index == semantic_index) {
- return draw->extra_shader_outputs.slot;
+ /* Search the extra vertex attributes */
+ for (i = 0; i < draw->extra_shader_outputs.num; i++) {
+ if (draw->extra_shader_outputs.semantic_name[i] == semantic_name &&
+ draw->extra_shader_outputs.semantic_index[i] == semantic_index) {
+ return draw->extra_shader_outputs.slot[i];
+ }
}
return 0;
@@ -470,16 +506,18 @@ draw_find_shader_output(const struct draw_context *draw,
uint
draw_num_shader_outputs(const struct draw_context *draw)
{
- uint count = draw->vs.vertex_shader->info.num_outputs;
+ uint count;
/* If a geometry shader is present, its outputs go to the
* driver, else the vertex shader's outputs.
*/
if (draw->gs.geometry_shader)
count = draw->gs.geometry_shader->info.num_outputs;
+ else
+ count = draw->vs.vertex_shader->info.num_outputs;
+
+ count += draw->extra_shader_outputs.num;
- if (draw->extra_shader_outputs.slot > 0)
- count++;
return count;
}
@@ -671,6 +709,11 @@ draw_set_samplers(struct draw_context *draw,
draw->samplers[i] = NULL;
draw->num_samplers = num;
+
+#ifdef HAVE_LLVM
+ if (draw->llvm)
+ draw_llvm_set_sampler_state(draw);
+#endif
}
void
@@ -678,9 +721,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS])
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
if(draw->llvm)
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 4f0d30123a..ff4f753604 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -46,9 +46,9 @@ struct draw_context;
struct draw_stage;
struct draw_vertex_shader;
struct draw_geometry_shader;
+struct draw_fragment_shader;
struct tgsi_sampler;
-#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_context *draw_create( struct pipe_context *pipe );
@@ -119,9 +119,9 @@ draw_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS]);
/*
@@ -138,6 +138,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
/*
+ * Fragment shader functions
+ */
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+void draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dvs);
+
+/*
* Geometry shader functions
*/
struct draw_geometry_shader *
diff --git a/src/gallium/auxiliary/draw/draw_fs.c b/src/gallium/auxiliary/draw/draw_fs.c
new file mode 100644
index 0000000000..1543bd86f1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.c
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "draw_fs.h"
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+struct draw_fragment_shader *
+draw_create_fragment_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ struct draw_fragment_shader *dfs;
+
+ dfs = CALLOC_STRUCT(draw_fragment_shader);
+ if (dfs) {
+ dfs->base = *shader;
+ tgsi_scan_shader(shader->tokens, &dfs->info);
+ }
+
+ return dfs;
+}
+
+
+void
+draw_bind_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
+ draw->fs.fragment_shader = dfs;
+}
+
+
+void
+draw_delete_fragment_shader(struct draw_context *draw,
+ struct draw_fragment_shader *dfs)
+{
+ FREE(dfs);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_fs.h b/src/gallium/auxiliary/draw/draw_fs.h
new file mode 100644
index 0000000000..44995b8277
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_fs.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRAW_FS_H
+#define DRAW_FS_H
+
+
+#include "tgsi/tgsi_scan.h"
+
+
+struct draw_fragment_shader
+{
+ struct pipe_shader_state base;
+ struct tgsi_shader_info info;
+};
+
+
+#endif /* DRAW_FS_H */
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8759c38cab..d94340367c 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -44,6 +44,7 @@
#include "tgsi/tgsi_dump.h"
#include "util/u_cpu_detect.h"
+#include "util/u_math.h"
#include "util/u_pointer.h"
#include "util/u_string.h"
@@ -71,12 +72,17 @@ init_globals(struct draw_llvm *llvm)
elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
- LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
+ LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS);
elem_types[DRAW_JIT_TEXTURE_DATA] =
LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
- DRAW_MAX_TEXTURE_LEVELS);
+ PIPE_MAX_TEXTURE_LEVELS);
+ elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
+ elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
+ LLVMArrayType(LLVMFloatType(), 4);
texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
@@ -101,6 +107,18 @@ init_globals(struct draw_llvm *llvm)
LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
llvm->target, texture_type,
DRAW_JIT_TEXTURE_DATA);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MIN_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_MAX_LOD);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_LOD_BIAS);
+ LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
+ llvm->target, texture_type,
+ DRAW_JIT_TEXTURE_BORDER_COLOR);
LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
llvm->target, texture_type);
@@ -1048,9 +1066,9 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS])
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS])
{
unsigned j;
struct draw_jit_texture *jit_tex;
@@ -1072,6 +1090,25 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
}
}
+
+void
+draw_llvm_set_sampler_state(struct draw_context *draw)
+{
+ unsigned i;
+
+ for (i = 0; i < draw->num_samplers; i++) {
+ struct draw_jit_texture *jit_tex = &draw->llvm->jit_context.textures[i];
+
+ if (draw->samplers[i]) {
+ jit_tex->min_lod = draw->samplers[i]->min_lod;
+ jit_tex->max_lod = draw->samplers[i]->max_lod;
+ jit_tex->lod_bias = draw->samplers[i]->lod_bias;
+ COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color);
+ }
+ }
+}
+
+
void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
{
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 6196b2f983..de89b657f3 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -41,7 +41,6 @@
#include <llvm-c/Target.h>
#include <llvm-c/ExecutionEngine.h>
-#define DRAW_MAX_TEXTURE_LEVELS 13 /* 4K x 4K for now */
struct draw_llvm;
struct llvm_vertex_shader;
@@ -52,9 +51,13 @@ struct draw_jit_texture
uint32_t height;
uint32_t depth;
uint32_t last_level;
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS];
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS];
- const void *data[DRAW_MAX_TEXTURE_LEVELS];
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS];
+ const void *data[PIPE_MAX_TEXTURE_LEVELS];
+ float min_lod;
+ float max_lod;
+ float lod_bias;
+ float border_color[4];
};
enum {
@@ -65,6 +68,10 @@ enum {
DRAW_JIT_TEXTURE_ROW_STRIDE,
DRAW_JIT_TEXTURE_IMG_STRIDE,
DRAW_JIT_TEXTURE_DATA,
+ DRAW_JIT_TEXTURE_MIN_LOD,
+ DRAW_JIT_TEXTURE_MAX_LOD,
+ DRAW_JIT_TEXTURE_LOD_BIAS,
+ DRAW_JIT_TEXTURE_BORDER_COLOR,
DRAW_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
};
@@ -275,12 +282,15 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
LLVMValueRef context_ptr);
void
+draw_llvm_set_sampler_state(struct draw_context *draw);
+
+void
draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned sampler_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t last_level,
- uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
- uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
- const void *data[DRAW_MAX_TEXTURE_LEVELS]);
+ uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
+ uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
+ const void *data[PIPE_MAX_TEXTURE_LEVELS]);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm_sample.c b/src/gallium/auxiliary/draw/draw_llvm_sample.c
index e9811010db..ac1fbb179c 100644
--- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -146,6 +146,10 @@ DRAW_LLVM_TEXTURE_MEMBER(last_level, DRAW_JIT_TEXTURE_LAST_LEVEL, TRUE)
DRAW_LLVM_TEXTURE_MEMBER(row_stride, DRAW_JIT_TEXTURE_ROW_STRIDE, FALSE)
DRAW_LLVM_TEXTURE_MEMBER(img_stride, DRAW_JIT_TEXTURE_IMG_STRIDE, FALSE)
DRAW_LLVM_TEXTURE_MEMBER(data_ptr, DRAW_JIT_TEXTURE_DATA, FALSE)
+DRAW_LLVM_TEXTURE_MEMBER(min_lod, DRAW_JIT_TEXTURE_MIN_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(max_lod, DRAW_JIT_TEXTURE_MAX_LOD, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(lod_bias, DRAW_JIT_TEXTURE_LOD_BIAS, TRUE)
+DRAW_LLVM_TEXTURE_MEMBER(border_color, DRAW_JIT_TEXTURE_BORDER_COLOR, FALSE)
static void
@@ -207,6 +211,10 @@ draw_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
sampler->dynamic_state.base.row_stride = draw_llvm_texture_row_stride;
sampler->dynamic_state.base.img_stride = draw_llvm_texture_img_stride;
sampler->dynamic_state.base.data_ptr = draw_llvm_texture_data_ptr;
+ sampler->dynamic_state.base.min_lod = draw_llvm_texture_min_lod;
+ sampler->dynamic_state.base.max_lod = draw_llvm_texture_max_lod;
+ sampler->dynamic_state.base.lod_bias = draw_llvm_texture_lod_bias;
+ sampler->dynamic_state.base.border_color = draw_llvm_texture_border_color;
sampler->dynamic_state.static_state = static_state;
sampler->dynamic_state.context_ptr = context_ptr;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index eac21110be..d1aba76309 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -688,10 +688,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
aaline->tex_slot = draw_current_shader_outputs(draw);
aaline->pos_slot = draw_current_shader_position_output(draw);;
- /* advertise the extra post-transformed vertex attribute */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aaline->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aaline->fs->generic_attrib);
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -744,7 +743,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index d406a86ccb..5ea552f51c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -701,9 +701,9 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
aapoint->pos_slot = draw_current_shader_position_output(draw);
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
- draw->extra_shader_outputs.slot = aapoint->tex_slot;
+ /* allocate the extra post-transformed vertex attribute */
+ (void) draw_alloc_extra_vertex_attrib(draw, TGSI_SEMANTIC_GENERIC,
+ aapoint->fs->generic_attrib);
/* find psize slot in post-transform vertex */
aapoint->psize_slot = -1;
@@ -754,7 +754,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
draw->suspend_flushing = FALSE;
- draw->extra_shader_outputs.slot = 0;
+ draw_remove_extra_vertex_attribs(draw);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 8a3d499feb..a10d8e9edc 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -263,6 +263,8 @@ do_clip_tri( struct draw_stage *stage,
clipmask &= ~(1<<plane_idx);
assert(n < MAX_CLIPPED_VERTICES);
+ if (n >= MAX_CLIPPED_VERTICES)
+ return;
inlist[n] = inlist[0]; /* prevent rotation of vertices */
for (i = 1; i <= n; i++) {
@@ -272,16 +274,22 @@ do_clip_tri( struct draw_stage *stage,
if (!IS_NEGATIVE(dp_prev)) {
assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = vert_prev;
}
if (DIFFERENT_SIGNS(dp, dp_prev)) {
struct vertex_header *new_vert;
- assert(tmpnr < MAX_CLIPPED_VERTICES+1);
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
new_vert = clipper->stage.tmp[tmpnr++];
assert(outcount < MAX_CLIPPED_VERTICES);
+ if (outcount >= MAX_CLIPPED_VERTICES)
+ return;
outlist[outcount++] = new_vert;
if (IS_NEGATIVE(dp)) {
@@ -321,27 +329,32 @@ do_clip_tri( struct draw_stage *stage,
/* If flat-shading, copy provoking vertex color to polygon vertex[0]
*/
- if (clipper->flat) {
- if (stage->draw->rasterizer->flatshade_first) {
- if (inlist[0] != header->v[0]) {
- assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
- copy_colors(stage, inlist[0], header->v[0]);
+ if (n >= 3) {
+ if (clipper->flat) {
+ if (stage->draw->rasterizer->flatshade_first) {
+ if (inlist[0] != header->v[0]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[0]);
+ }
}
- }
- else {
- if (inlist[0] != header->v[2]) {
- assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
- copy_colors(stage, inlist[0], header->v[2]);
+ else {
+ if (inlist[0] != header->v[2]) {
+ assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
+ if (tmpnr >= MAX_CLIPPED_VERTICES + 1)
+ return;
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
}
}
- }
-
- /* Emit the polygon as triangles to the setup stage:
- */
- if (n >= 3)
+
+ /* Emit the polygon as triangles to the setup stage:
+ */
emit_poly( stage, inlist, n, header );
+ }
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index 8b92543987..c575a8ac7c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -172,7 +172,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
wide_lines = (rast->line_width > draw->pipeline.wide_line_threshold
&& !rast->line_smooth);
- /* drawing large points? */
+ /* drawing large/sprite points (but not AA points)? */
if (rast->sprite_coord_enable && draw->pipeline.point_sprite)
wide_points = TRUE;
else if (rast->point_smooth && draw->pipeline.aapoint)
@@ -207,7 +207,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = TRUE;
}
- if (wide_points || rast->sprite_coord_enable) {
+ if (wide_points) {
draw->pipeline.wide_point->next = next;
next = draw->pipeline.wide_point;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index ee2945c7c9..3646c6a714 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -57,26 +57,24 @@
#include "util/u_memory.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
+#include "draw_fs.h"
#include "draw_vs.h"
#include "draw_pipe.h"
struct widepoint_stage {
- struct draw_stage stage;
+ struct draw_stage stage; /**< base class */
float half_point_size;
float xbias;
float ybias;
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_enable[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
- uint texcoord_mode;
+ /** for automatic texcoord generation/replacement */
+ uint num_texcoord_gen;
+ uint texcoord_gen_slot[PIPE_MAX_SHADER_OUTPUTS];
int psize_slot;
-
- int point_coord_fs_input; /**< input for pointcoord */
};
@@ -96,30 +94,20 @@ widepoint_stage( struct draw_stage *stage )
static void set_texcoords(const struct widepoint_stage *wide,
struct vertex_header *v, const float tc[4])
{
+ const struct draw_context *draw = wide->stage.draw;
+ const struct pipe_rasterizer_state *rast = draw->rasterizer;
+ const uint texcoord_mode = rast->sprite_coord_mode;
uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_enable[i]) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
- if (wide->point_coord_fs_input >= 0) {
- /* put gl_PointCoord into the extra vertex slot */
- uint slot = wide->stage.draw->extra_shader_outputs.slot;
+ for (i = 0; i < wide->num_texcoord_gen; i++) {
+ const uint slot = wide->texcoord_gen_slot[i];
v->data[slot][0] = tc[0];
- if (wide->texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
+ if (texcoord_mode == PIPE_SPRITE_COORD_LOWER_LEFT)
v->data[slot][1] = 1.0f - tc[1];
else
v->data[slot][1] = tc[1];
- v->data[slot][2] = 0.0F;
- v->data[slot][3] = 1.0F;
+ v->data[slot][2] = tc[2];
+ v->data[slot][3] = tc[3];
}
}
@@ -201,18 +189,9 @@ static void widepoint_point( struct draw_stage *stage,
}
-static int
-find_pntc_input_attrib(struct draw_context *draw)
-{
- /* Scan the fragment program's input decls to find the pointcoord
- * attribute. The xy components will store the point coord.
- */
- return 0; /* XXX fix this */
-}
-
-
-static void widepoint_first_point( struct draw_stage *stage,
- struct prim_header *header )
+static void
+widepoint_first_point(struct draw_stage *stage,
+ struct prim_header *header)
{
struct widepoint_stage *wide = widepoint_stage(stage);
struct draw_context *draw = stage->draw;
@@ -244,31 +223,49 @@ static void widepoint_first_point( struct draw_stage *stage,
stage->point = draw_pipe_passthrough_point;
}
+ draw_remove_extra_vertex_attribs(draw);
+
if (rast->point_quad_rasterization) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
- uint i, j = 0;
- wide->texcoord_mode = rast->sprite_coord_mode;
- for (i = 0; i < vs->info.num_outputs; i++) {
- if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_enable[j] = (rast->sprite_coord_enable >> j) & 1;
- j++;
+ const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
+ uint i;
+
+ wide->num_texcoord_gen = 0;
+
+ /* Loop over fragment shader inputs looking for generic inputs
+ * for which bit 'k' in sprite_coord_enable is set.
+ */
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ const int generic_index = fs->info.input_semantic_index[i];
+ /* Note that sprite_coord enable is a bitfield of
+ * PIPE_MAX_SHADER_OUTPUTS bits.
+ */
+ if (generic_index < PIPE_MAX_SHADER_OUTPUTS &&
+ (rast->sprite_coord_enable & (1 << generic_index))) {
+ /* OK, this generic attribute needs to be replaced with a
+ * texcoord (see above).
+ */
+ int slot = draw_find_shader_output(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+
+ if (slot > 0) {
+ /* there's already a post-vertex shader attribute
+ * for this fragment shader input attribute.
+ */
+ }
+ else {
+ /* need to allocate a new post-vertex shader attribute */
+ slot = draw_alloc_extra_vertex_attrib(draw,
+ TGSI_SEMANTIC_GENERIC,
+ generic_index);
+ }
+
+ /* add this slot to the texcoord-gen list */
+ wide->texcoord_gen_slot[wide->num_texcoord_gen++] = slot;
+ }
}
}
- wide->num_texcoords = j;
-
- /* find fragment shader PointCoord input */
- wide->point_coord_fs_input = find_pntc_input_attrib(draw);
-
- /* setup extra vp output (point coord implemented as a texcoord) */
- draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
- draw->extra_shader_outputs.semantic_index = 0;
- draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
- }
- else {
- wide->point_coord_fs_input = -1;
- draw->extra_shader_outputs.slot = 0;
}
wide->psize_slot = -1;
@@ -295,7 +292,8 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )
stage->point = widepoint_first_point;
stage->next->flush( stage->next, flags );
- stage->draw->extra_shader_outputs.slot = 0;
+
+ draw_remove_extra_vertex_attribs(draw);
/* restore original rasterizer state */
if (draw->rast_handle) {
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 362f563ba6..d417f825a0 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -250,6 +250,11 @@ struct draw_context
struct tgsi_sampler **samplers;
} gs;
+ /** Fragment shader state */
+ struct {
+ struct draw_fragment_shader *fragment_shader;
+ } fs;
+
/** Stream output (vertex feedback) state */
struct {
struct pipe_stream_output_state state;
@@ -266,9 +271,10 @@ struct draw_context
/* If a prim stage introduces new vertex attributes, they'll be stored here
*/
struct {
- uint semantic_name;
- uint semantic_index;
- int slot;
+ uint num;
+ uint semantic_name[10];
+ uint semantic_index[10];
+ uint slot[10];
} extra_shader_outputs;
unsigned reduced_prim;
@@ -362,6 +368,11 @@ void draw_gs_destroy( struct draw_context *draw );
uint draw_current_shader_outputs(const struct draw_context *draw);
uint draw_current_shader_position_output(const struct draw_context *draw);
+int draw_alloc_extra_vertex_attrib(struct draw_context *draw,
+ uint semantic_name, uint semantic_index);
+void draw_remove_extra_vertex_attribs(struct draw_context *draw);
+
+
/*******************************************************************************
* Vertex processing (was passthrough) code:
*/
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index dce3c3745b..00f419a486 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -614,17 +614,15 @@ lp_build_div(struct lp_build_context *bld,
/**
- * Linear interpolation.
- *
- * This also works for integer values with a few caveats.
+ * Linear interpolation -- without any checks.
*
* @sa http://www.stereopsis.com/doubleblend.html
*/
-LLVMValueRef
-lp_build_lerp(struct lp_build_context *bld,
- LLVMValueRef x,
- LLVMValueRef v0,
- LLVMValueRef v1)
+static INLINE LLVMValueRef
+lp_build_lerp_simple(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
{
LLVMValueRef delta;
LLVMValueRef res;
@@ -639,12 +637,80 @@ lp_build_lerp(struct lp_build_context *bld,
res = lp_build_add(bld, v0, res);
- if(bld->type.fixed)
+ if (bld->type.fixed) {
/* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
* but it will be wrong for other uses. Basically we need a more
* powerful lp_type, capable of further distinguishing the values
* interpretation from the value storage. */
res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Linear interpolation.
+ */
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef v0,
+ LLVMValueRef v1)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(lp_check_value(type, x));
+ assert(lp_check_value(type, v0));
+ assert(lp_check_value(type, v1));
+
+ if (type.norm) {
+ struct lp_type wide_type;
+ struct lp_build_context wide_bld;
+ LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh;
+ LLVMValueRef shift;
+
+ assert(type.length >= 2);
+ assert(!type.sign);
+
+ /*
+ * Create a wider type, enough to hold the intermediate result of the
+ * multiplication.
+ */
+ memset(&wide_type, 0, sizeof wide_type);
+ wide_type.fixed = TRUE;
+ wide_type.width = type.width*2;
+ wide_type.length = type.length/2;
+
+ lp_build_context_init(&wide_bld, bld->builder, wide_type);
+
+ lp_build_unpack2(bld->builder, type, wide_type, x, &xl, &xh);
+ lp_build_unpack2(bld->builder, type, wide_type, v0, &v0l, &v0h);
+ lp_build_unpack2(bld->builder, type, wide_type, v1, &v1l, &v1h);
+
+ /*
+ * Scale x from [0, 255] to [0, 256]
+ */
+
+ shift = lp_build_const_int_vec(wide_type, type.width - 1);
+
+ xl = lp_build_add(&wide_bld, xl,
+ LLVMBuildAShr(bld->builder, xl, shift, ""));
+ xh = lp_build_add(&wide_bld, xh,
+ LLVMBuildAShr(bld->builder, xh, shift, ""));
+
+ /*
+ * Lerp both halves.
+ */
+
+ resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l);
+ resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h);
+
+ res = lp_build_pack2(bld->builder, wide_type, type, resl, resh);
+ } else {
+ res = lp_build_lerp_simple(bld, x, v0, v1);
+ }
return res;
}
@@ -923,35 +989,122 @@ lp_build_round_sse41(struct lp_build_context *bld,
enum lp_build_round_sse41_mode mode)
{
const struct lp_type type = bld->type;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef i32t = LLVMInt32Type();
const char *intrinsic;
+ LLVMValueRef res;
assert(type.floating);
- assert(type.width*type.length == 128);
+
assert(lp_check_value(type, a));
assert(util_cpu_caps.has_sse4_1);
- switch(type.width) {
- case 32:
- intrinsic = "llvm.x86.sse41.round.ps";
- break;
- case 64:
- intrinsic = "llvm.x86.sse41.round.pd";
- break;
- default:
- assert(0);
- return bld->undef;
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef args[3];
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ss";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.sd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ undef = LLVMGetUndef(vec_type);
+
+ args[0] = undef;
+ args[1] = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+ args[2] = LLVMConstInt(i32t, mode, 0);
+
+ res = lp_build_intrinsic(bld->builder, intrinsic,
+ vec_type, args, Elements(args));
+
+ res = LLVMBuildExtractElement(bld->builder, res, index0, "");
+ }
+ else {
+ assert(type.width*type.length == 128);
+
+ switch(type.width) {
+ case 32:
+ intrinsic = "llvm.x86.sse41.round.ps";
+ break;
+ case 64:
+ intrinsic = "llvm.x86.sse41.round.pd";
+ break;
+ default:
+ assert(0);
+ return bld->undef;
+ }
+
+ res = lp_build_intrinsic_binary(bld->builder, intrinsic,
+ bld->vec_type, a,
+ LLVMConstInt(i32t, mode, 0));
+ }
+
+ return res;
+}
+
+
+static INLINE LLVMValueRef
+lp_build_iround_nearest_sse2(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMTypeRef ret_type = lp_build_int_vec_type(type);
+ const char *intrinsic;
+ LLVMValueRef res;
+
+ assert(type.floating);
+ /* using the double precision conversions is a bit more complicated */
+ assert(type.width == 32);
+
+ assert(lp_check_value(type, a));
+ assert(util_cpu_caps.has_sse2);
+
+ /* This is relying on MXCSR rounding mode, which should always be nearest. */
+ if (type.length == 1) {
+ LLVMTypeRef vec_type;
+ LLVMValueRef undef;
+ LLVMValueRef arg;
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+
+ vec_type = LLVMVectorType(bld->elem_type, 4);
+
+ intrinsic = "llvm.x86.sse.cvtss2si";
+
+ undef = LLVMGetUndef(vec_type);
+
+ arg = LLVMBuildInsertElement(bld->builder, undef, a, index0, "");
+
+ res = lp_build_intrinsic_unary(bld->builder, intrinsic,
+ ret_type, arg);
+ }
+ else {
+ assert(type.width*type.length == 128);
+
+ intrinsic = "llvm.x86.sse2.cvtps2dq";
+
+ res = lp_build_intrinsic_unary(bld->builder, intrinsic,
+ ret_type, a);
}
- return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
- LLVMConstInt(LLVMInt32Type(), mode, 0));
+ return res;
}
/**
- * Return the integer part of a float (vector) value. The returned value is
- * a float (vector).
- * Ex: trunc(-1.5) = 1.0
+ * Return the integer part of a float (vector) value (== round toward zero).
+ * The returned value is a float (vector).
+ * Ex: trunc(-1.5) = -1.0
*/
LLVMValueRef
lp_build_trunc(struct lp_build_context *bld,
@@ -962,8 +1115,10 @@ lp_build_trunc(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
@@ -990,8 +1145,10 @@ lp_build_round(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1016,8 +1173,10 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1042,8 +1201,10 @@ lp_build_ceil(struct lp_build_context *bld,
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128)
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
else {
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef res;
@@ -1068,9 +1229,9 @@ lp_build_fract(struct lp_build_context *bld,
/**
- * Return the integer part of a float (vector) value. The returned value is
- * an integer (vector).
- * Ex: itrunc(-1.5) = 1
+ * Return the integer part of a float (vector) value (== round toward zero).
+ * The returned value is an integer (vector).
+ * Ex: itrunc(-1.5) = -1
*/
LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
@@ -1097,31 +1258,40 @@ lp_build_iround(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse2 &&
+ ((type.width == 32) && (type.length == 1 || type.length == 4))) {
+ return lp_build_iround_nearest_sse2(bld, a);
+ }
+ else if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef half;
- /* get sign bit */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-
- /* sign * 0.5 */
half = lp_build_const_vec(type, 0.5);
- half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
- half = LLVMBuildOr(bld->builder, sign, half, "");
- half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+
+ if (type.sign) {
+ LLVMTypeRef vec_type = bld->vec_type;
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+ }
res = LLVMBuildFAdd(bld->builder, a, half, "");
}
@@ -1142,37 +1312,42 @@ lp_build_ifloor(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
}
else {
- /* Take the sign bit and add it to 1 constant */
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
- LLVMValueRef offset;
-
- /* sign = a < 0 ? ~0 : 0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
-
- /* offset = -0.99999(9)f */
- offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
-
- /* offset = a < 0 ? offset : 0.0f */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
-
- res = LLVMBuildFAdd(bld->builder, a, offset, "ifloor.res");
+ res = a;
+
+ if (type.sign) {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = bld->vec_type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "ifloor.sign");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? offset : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "ifloor.offset");
+
+ res = LLVMBuildFAdd(bld->builder, res, offset, "ifloor.res");
+ }
}
/* round to nearest (toward zero) */
@@ -1192,35 +1367,39 @@ lp_build_iceil(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMValueRef res;
assert(type.floating);
assert(lp_check_value(type, a));
- if (util_cpu_caps.has_sse4_1 && type.width*type.length == 128) {
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
}
else {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef vec_type = bld->vec_type;
unsigned mantissa = lp_mantissa(type);
- LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- LLVMValueRef sign;
LLVMValueRef offset;
- /* sign = a < 0 ? 0 : ~0 */
- sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- sign = LLVMBuildAnd(bld->builder, sign, mask, "");
- sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
- sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
-
/* offset = 0.99999(9)f */
offset = lp_build_const_vec(type, (double)(((unsigned long long)1 << mantissa) - 10)/((unsigned long long)1 << mantissa));
- offset = LLVMConstBitCast(offset, int_vec_type);
- /* offset = a < 0 ? 0.0 : offset */
- offset = LLVMBuildAnd(bld->builder, offset, sign, "");
- offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ if (type.sign) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+
+ /* sign = a < 0 ? 0 : ~0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "iceil.sign");
+ sign = LLVMBuildNot(bld->builder, sign, "iceil.not");
+
+ /* offset = a < 0 ? 0.0 : offset */
+ offset = LLVMConstBitCast(offset, int_vec_type);
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "iceil.offset");
+ }
res = LLVMBuildFAdd(bld->builder, a, offset, "iceil.res");
}
@@ -1232,6 +1411,46 @@ lp_build_iceil(struct lp_build_context *bld,
}
+/**
+ * Combined ifloor() & fract().
+ *
+ * Preferred to calling the functions separately, as it will ensure that the
+ * stratergy (floor() vs ifloor()) that results in less redundant work is used.
+ */
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef ipart;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
+ /*
+ * floor() is easier.
+ */
+
+ ipart = lp_build_floor(bld, a);
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
+ }
+ else {
+ /*
+ * ifloor() is easier.
+ */
+
+ *out_ipart = lp_build_ifloor(bld, a);
+ ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ }
+}
+
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
@@ -2041,6 +2260,71 @@ lp_build_exp2(struct lp_build_context *bld,
/**
+ * Extract the exponent of a IEEE-754 floating point value.
+ *
+ * Optionally apply an integer bias.
+ *
+ * Result is an integer value with
+ *
+ * ifloor(log2(x)) + bias
+ */
+LLVMValueRef
+lp_build_extract_exponent(struct lp_build_context *bld,
+ LLVMValueRef x,
+ int bias)
+{
+ const struct lp_type type = bld->type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef res;
+
+ assert(type.floating);
+
+ assert(lp_check_value(bld->type, x));
+
+ x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+ res = LLVMBuildLShr(bld->builder, x, lp_build_const_int_vec(type, mantissa), "");
+ res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(type, 255), "");
+ res = LLVMBuildSub(bld->builder, res, lp_build_const_int_vec(type, 127 - bias), "");
+
+ return res;
+}
+
+
+/**
+ * Extract the mantissa of the a floating.
+ *
+ * Result is a floating point value with
+ *
+ * x / floor(log2(x))
+ */
+LLVMValueRef
+lp_build_extract_mantissa(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ const struct lp_type type = bld->type;
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mantmask = lp_build_const_int_vec(type, (1ULL << mantissa) - 1);
+ LLVMValueRef one = LLVMConstBitCast(bld->one, bld->int_vec_type);
+ LLVMValueRef res;
+
+ assert(lp_check_value(bld->type, x));
+
+ assert(type.floating);
+
+ x = LLVMBuildBitCast(bld->builder, x, bld->int_vec_type, "");
+
+ /* res = x / 2**ipart */
+ res = LLVMBuildAnd(bld->builder, x, mantmask, "");
+ res = LLVMBuildOr(bld->builder, res, one, "");
+ res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+
+ return res;
+}
+
+
+
+/**
* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[
* These coefficients can be generate with
* http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html
@@ -2159,3 +2443,62 @@ lp_build_log2(struct lp_build_context *bld,
lp_build_log2_approx(bld, x, NULL, NULL, &res);
return res;
}
+
+
+/**
+ * Faster (and less accurate) log2.
+ *
+ * log2(x) = floor(log2(x)) - 1 + x / 2**floor(log2(x))
+ *
+ * Piece-wise linear approximation, with exact results when x is a
+ * power of two.
+ *
+ * See http://www.flipcode.com/archives/Fast_log_Function.shtml
+ */
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef ipart;
+ LLVMValueRef fpart;
+
+ assert(lp_check_value(bld->type, x));
+
+ assert(bld->type.floating);
+
+ /* ipart = floor(log2(x)) - 1 */
+ ipart = lp_build_extract_exponent(bld, x, -1);
+ ipart = LLVMBuildSIToFP(bld->builder, ipart, bld->vec_type, "");
+
+ /* fpart = x / 2**ipart */
+ fpart = lp_build_extract_mantissa(bld, x);
+
+ /* ipart + fpart */
+ return LLVMBuildFAdd(bld->builder, ipart, fpart, "");
+}
+
+
+/**
+ * Fast implementation of iround(log2(x)).
+ *
+ * Not an approximation -- it should give accurate results all the time.
+ */
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x)
+{
+ LLVMValueRef sqrt2 = lp_build_const_vec(bld->type, M_SQRT2);
+ LLVMValueRef ipart;
+
+ assert(bld->type.floating);
+
+ assert(lp_check_value(bld->type, x));
+
+ /* x * 2^(0.5) i.e., add 0.5 to the log2(x) */
+ x = LLVMBuildFMul(bld->builder, x, sqrt2, "");
+
+ /* ipart = floor(log2(x) + 0.5) */
+ ipart = lp_build_extract_exponent(bld, x, 0);
+
+ return ipart;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 31efa9921c..c78b61decf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -171,6 +171,12 @@ LLVMValueRef
lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart);
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a);
@@ -209,9 +215,26 @@ lp_build_exp2(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
+lp_build_extract_exponent(struct lp_build_context *bld,
+ LLVMValueRef x,
+ int bias);
+
+LLVMValueRef
+lp_build_extract_mantissa(struct lp_build_context *bld,
+ LLVMValueRef x);
+
+LLVMValueRef
lp_build_log2(struct lp_build_context *bld,
LLVMValueRef a);
+LLVMValueRef
+lp_build_fast_log2(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_ilog2(struct lp_build_context *bld,
+ LLVMValueRef x);
+
void
lp_build_exp2_approx(struct lp_build_context *bld,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 8b477313d4..6967dd2622 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -63,6 +63,7 @@
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
@@ -96,58 +97,104 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
LLVMValueRef res;
unsigned mantissa;
- unsigned n;
- unsigned long long ubound;
- unsigned long long mask;
- double scale;
- double bias;
assert(src_type.floating);
+ assert(dst_width <= src_type.width);
+ src_type.sign = FALSE;
mantissa = lp_mantissa(src_type);
- /* We cannot carry more bits than the mantissa */
- n = MIN2(mantissa, dst_width);
+ if (dst_width <= mantissa) {
+ /*
+ * Apply magic coefficients that will make the desired result to appear
+ * in the lowest significant bits of the mantissa, with correct rounding.
+ *
+ * This only works if the destination width fits in the mantissa.
+ */
- /* This magic coefficients will make the desired result to appear in the
- * lowest significant bits of the mantissa.
- */
- ubound = ((unsigned long long)1 << n);
- mask = ubound - 1;
- scale = (double)mask/ubound;
- bias = (double)((unsigned long long)1 << (mantissa - n));
+ unsigned long long ubound;
+ unsigned long long mask;
+ double scale;
+ double bias;
- res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
- res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
- res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+ ubound = (1ULL << dst_width);
+ mask = ubound - 1;
+ scale = (double)mask/ubound;
+ bias = (double)(1ULL << (mantissa - dst_width));
- if(dst_width > n) {
- int shift = dst_width - n;
- res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
+ res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+ res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
+ }
+ else if (dst_width == (mantissa + 1)) {
+ /*
+ * The destination width matches exactly what can be represented in
+ * floating point (i.e., mantissa + 1 bits). So do a straight
+ * multiplication followed by casting. No further rounding is necessary.
+ */
+
+ double scale;
- /* TODO: Fill in the empty lower bits for additional precision? */
- /* YES: this fixes progs/trivial/tri-z-eq.c.
- * Otherwise vertex Z=1.0 values get converted to something like
- * 0xfffffb00 and the test for equality with 0xffffffff fails.
+ scale = (double)((1ULL << dst_width) - 1);
+
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+ }
+ else {
+ /*
+ * The destination exceeds what can be represented in the floating point.
+ * So multiply by the largest power two we get away with, and when
+ * subtract the most significant bit to rescale to normalized values.
+ *
+ * The largest power of two factor we can get away is
+ * (1 << (src_type.width - 1)), because we need to use signed . In theory it
+ * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
+ * INT_MIN should be returned in FPToSI, which is the correct result for
+ * values near 1.0!
+ *
+ * This means we get (src_type.width - 1) correct bits for values near 0.0,
+ * and (mantissa + 1) correct bits for values near 1.0. Equally or more
+ * important, we also get exact results for 0.0 and 1.0.
*/
-#if 0
- {
- LLVMValueRef msb;
- msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
- msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
- msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
- res = LLVMBuildOr(builder, res, msb, "");
- }
-#elif 0
- while(shift > 0) {
- res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
- shift -= n;
- n *= 2;
+
+ unsigned n = MIN2(src_type.width - 1, dst_width);
+
+ double scale = (double)(1ULL << n);
+ unsigned lshift = dst_width - n;
+ unsigned rshift = n;
+ LLVMValueRef lshifted;
+ LLVMValueRef rshifted;
+
+ res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
+ res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+
+ /*
+ * Align the most significant bit to its final place.
+ *
+ * This will cause 1.0 to overflow to 0, but the later adjustment will
+ * get it right.
+ */
+ if (lshift) {
+ lshifted = LLVMBuildShl(builder, res,
+ lp_build_const_int_vec(src_type, lshift), "");
+ } else {
+ lshifted = res;
}
-#endif
+
+ /*
+ * Align the most significant bit to the right.
+ */
+ rshifted = LLVMBuildAShr(builder, res,
+ lp_build_const_int_vec(src_type, rshift), "");
+
+ /*
+ * Subtract the MSB to the LSB, therefore re-scaling from
+ * (1 << dst_width) to ((1 << dst_width) - 1).
+ */
+
+ res = LLVMBuildSub(builder, lshifted, rshifted, "");
}
- else
- res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");
return res;
}
@@ -177,6 +224,16 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
assert(dst_type.floating);
+ /* Special-case int8->float, though most cases could be handled
+ * this way:
+ */
+ if (src_width == 8) {
+ scale = 1.0/255.0;
+ res = LLVMBuildSIToFP(builder, src, vec_type, "");
+ res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");
+ return res;
+ }
+
mantissa = lp_mantissa(dst_type);
n = MIN2(mantissa, src_width);
@@ -241,6 +298,87 @@ lp_build_conv(LLVMBuilderRef builder,
}
num_tmps = num_srcs;
+
+ /* Special case 4x4f --> 1x16ub
+ */
+ if (src_type.floating == 1 &&
+ src_type.fixed == 0 &&
+ src_type.sign == 1 &&
+ src_type.norm == 0 &&
+ src_type.width == 32 &&
+ src_type.length == 4 &&
+
+ dst_type.floating == 0 &&
+ dst_type.fixed == 0 &&
+ dst_type.sign == 0 &&
+ dst_type.norm == 1 &&
+ dst_type.width == 8 &&
+ dst_type.length == 16 &&
+
+ util_cpu_caps.has_sse2)
+ {
+ int i;
+
+ for (i = 0; i < num_dsts; i++, src += 4) {
+ struct lp_type int16_type = dst_type;
+ struct lp_type int32_type = dst_type;
+ LLVMValueRef lo, hi;
+ LLVMValueRef src_int0;
+ LLVMValueRef src_int1;
+ LLVMValueRef src_int2;
+ LLVMValueRef src_int3;
+ LLVMTypeRef int16_vec_type;
+ LLVMTypeRef int32_vec_type;
+ LLVMTypeRef src_vec_type;
+ LLVMTypeRef dst_vec_type;
+ LLVMValueRef const_255f;
+ LLVMValueRef a, b, c, d;
+
+ int16_type.width *= 2;
+ int16_type.length /= 2;
+ int16_type.sign = 1;
+
+ int32_type.width *= 4;
+ int32_type.length /= 4;
+ int32_type.sign = 1;
+
+ src_vec_type = lp_build_vec_type(src_type);
+ dst_vec_type = lp_build_vec_type(dst_type);
+ int16_vec_type = lp_build_vec_type(int16_type);
+ int32_vec_type = lp_build_vec_type(int32_type);
+
+ const_255f = lp_build_const_vec(src_type, 255.0f);
+
+ a = LLVMBuildFMul(builder, src[0], const_255f, "");
+ b = LLVMBuildFMul(builder, src[1], const_255f, "");
+ c = LLVMBuildFMul(builder, src[2], const_255f, "");
+ d = LLVMBuildFMul(builder, src[3], const_255f, "");
+
+ {
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type = src_type;
+ bld.vec_type = src_vec_type;
+ bld.int_elem_type = lp_build_elem_type(int32_type);
+ bld.int_vec_type = int32_vec_type;
+ bld.undef = lp_build_undef(src_type);
+ bld.zero = lp_build_zero(src_type);
+ bld.one = lp_build_one(src_type);
+
+ src_int0 = lp_build_iround(&bld, a);
+ src_int1 = lp_build_iround(&bld, b);
+ src_int2 = lp_build_iround(&bld, c);
+ src_int3 = lp_build_iround(&bld, d);
+ }
+ /* relying on clamping behavior of sse2 intrinsics here */
+ lo = lp_build_pack2(builder, int32_type, int16_type, src_int0, src_int1);
+ hi = lp_build_pack2(builder, int32_type, int16_type, src_int2, src_int3);
+ dst[i] = lp_build_pack2(builder, int16_type, dst_type, lo, hi);
+ }
+ return;
+ }
+
/*
* Clamp if necessary
*/
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index d3a5afff8c..93e56553d7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -57,6 +57,8 @@ lp_disassemble(const void* func)
#ifdef HAVE_UDIS86
ud_t ud_obj;
uint64_t max_jmp_pc;
+ uint inst_no;
+ boolean emit_addrs = TRUE, emit_line_nos = FALSE;
ud_init(&ud_obj);
@@ -76,13 +78,18 @@ lp_disassemble(const void* func)
while (ud_disassemble(&ud_obj)) {
+ if (emit_addrs) {
#ifdef PIPE_ARCH_X86
- debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
+ debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
#endif
#ifdef PIPE_ARCH_X86_64
- debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
+ debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
#endif
-
+ }
+ else if (emit_line_nos) {
+ debug_printf("%6d:\t", inst_no);
+ inst_no++;
+ }
#if 0
debug_printf("%-16s ", ud_insn_hex(&ud_obj));
#endif
@@ -115,8 +122,10 @@ lp_disassemble(const void* func)
}
}
- if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
- ud_obj.mnemonic == UD_Iinvalid)
+ if (ud_obj.mnemonic == UD_Iinvalid ||
+ (ud_insn_off(&ud_obj) >= max_jmp_pc &&
+ (ud_obj.mnemonic == UD_Iret ||
+ ud_obj.mnemonic == UD_Ijmp)))
break;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 369c1bbf09..eb11dcd4ef 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -36,11 +36,12 @@
#include "util/u_string.h"
-#define GALLIVM_DEBUG_TGSI 0x1
-#define GALLIVM_DEBUG_IR 0x2
-#define GALLIVM_DEBUG_ASM 0x4
-#define GALLIVM_DEBUG_NO_OPT 0x8
-#define GALLIVM_DEBUG_PERF 0x10
+#define GALLIVM_DEBUG_TGSI (1 << 0)
+#define GALLIVM_DEBUG_IR (1 << 1)
+#define GALLIVM_DEBUG_ASM (1 << 2)
+#define GALLIVM_DEBUG_NO_OPT (1 << 3)
+#define GALLIVM_DEBUG_PERF (1 << 4)
+#define GALLIVM_DEBUG_NO_BRILINEAR (1 << 5)
#ifdef DEBUG
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
index 5bc9c741a8..a2cee199a0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -38,273 +38,15 @@
#include "lp_bld_flow.h"
-#define LP_BUILD_FLOW_MAX_VARIABLES 64
-#define LP_BUILD_FLOW_MAX_DEPTH 32
-
-/**
- * Enumeration of all possible flow constructs.
- */
-enum lp_build_flow_construct_kind {
- LP_BUILD_FLOW_SCOPE,
- LP_BUILD_FLOW_SKIP,
- LP_BUILD_FLOW_IF
-};
-
-
-/**
- * Variable declaration scope.
- */
-struct lp_build_flow_scope
-{
- /** Number of variables declared in this scope */
- unsigned num_variables;
-};
-
-
-/**
- * Early exit. Useful to skip to the end of a function or block when
- * the execution mask becomes zero or when there is an error condition.
- */
-struct lp_build_flow_skip
-{
- /** Block to skip to */
- LLVMBasicBlockRef block;
-
- /** Number of variables declared at the beginning */
- unsigned num_variables;
-
- LLVMValueRef *phi; /**< array [num_variables] */
-};
-
-
-/**
- * if/else/endif.
- */
-struct lp_build_flow_if
-{
- unsigned num_variables;
-
- LLVMValueRef *phi; /**< array [num_variables] */
-
- LLVMValueRef condition;
- LLVMBasicBlockRef entry_block, true_block, false_block, merge_block;
-};
-
-
-/**
- * Union of all possible flow constructs' data
- */
-union lp_build_flow_construct_data
-{
- struct lp_build_flow_scope scope;
- struct lp_build_flow_skip skip;
- struct lp_build_flow_if ifthen;
-};
-
-
-/**
- * Element of the flow construct stack.
- */
-struct lp_build_flow_construct
-{
- enum lp_build_flow_construct_kind kind;
- union lp_build_flow_construct_data data;
-};
-
-
/**
- * All necessary data to generate LLVM control flow constructs.
+ * Insert a new block, right where builder is pointing to.
*
- * Besides keeping track of the control flow construct themselves we also
- * need to keep track of variables in order to generate SSA Phi values.
- */
-struct lp_build_flow_context
-{
- LLVMBuilderRef builder;
-
- /**
- * Control flow stack.
- */
- struct lp_build_flow_construct constructs[LP_BUILD_FLOW_MAX_DEPTH];
- unsigned num_constructs;
-
- /**
- * Variable stack
- */
- LLVMValueRef *variables[LP_BUILD_FLOW_MAX_VARIABLES];
- unsigned num_variables;
-};
-
-
-struct lp_build_flow_context *
-lp_build_flow_create(LLVMBuilderRef builder)
-{
- struct lp_build_flow_context *flow;
-
- flow = CALLOC_STRUCT(lp_build_flow_context);
- if(!flow)
- return NULL;
-
- flow->builder = builder;
-
- return flow;
-}
-
-
-void
-lp_build_flow_destroy(struct lp_build_flow_context *flow)
-{
- assert(flow->num_constructs == 0);
- assert(flow->num_variables == 0);
- FREE(flow);
-}
-
-
-/**
- * Begin/push a new flow control construct, such as a loop, skip block
- * or variable scope.
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_push(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs < LP_BUILD_FLOW_MAX_DEPTH);
- if(flow->num_constructs >= LP_BUILD_FLOW_MAX_DEPTH)
- return NULL;
-
- flow->constructs[flow->num_constructs].kind = kind;
- return &flow->constructs[flow->num_constructs++].data;
-}
-
-
-/**
- * Return the current/top flow control construct on the stack.
- * \param kind the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_peek(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs);
- if(!flow->num_constructs)
- return NULL;
-
- assert(flow->constructs[flow->num_constructs - 1].kind == kind);
- if(flow->constructs[flow->num_constructs - 1].kind != kind)
- return NULL;
-
- return &flow->constructs[flow->num_constructs - 1].data;
-}
-
-
-/**
- * End/pop the current/top flow control construct on the stack.
- * \param kind the expected type of the top-most construct
- */
-static union lp_build_flow_construct_data *
-lp_build_flow_pop(struct lp_build_flow_context *flow,
- enum lp_build_flow_construct_kind kind)
-{
- assert(flow->num_constructs);
- if(!flow->num_constructs)
- return NULL;
-
- assert(flow->constructs[flow->num_constructs - 1].kind == kind);
- if(flow->constructs[flow->num_constructs - 1].kind != kind)
- return NULL;
-
- return &flow->constructs[--flow->num_constructs].data;
-}
-
-
-/**
- * Begin a variable scope.
+ * This is useful important not only for aesthetic reasons, but also for
+ * performance reasons, as frequently run blocks should be laid out next to
+ * each other and fall-throughs maximized.
*
+ * See also llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp.
*
- */
-void
-lp_build_flow_scope_begin(struct lp_build_flow_context *flow)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_push(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- scope->num_variables = 0;
-}
-
-
-/**
- * Declare a variable.
- *
- * A variable is a named entity which can have different LLVMValueRef's at
- * different points of the program. This is relevant for control flow because
- * when there are multiple branches to a same location we need to replace
- * the variable's value with a Phi function as explained in
- * http://en.wikipedia.org/wiki/Static_single_assignment_form .
- *
- * We keep track of variables by keeping around a pointer to where they're
- * current.
- *
- * There are a few cautions to observe:
- *
- * - Variable's value must not be NULL. If there is no initial value then
- * LLVMGetUndef() should be used.
- *
- * - Variable's value must be kept up-to-date. If the variable is going to be
- * modified by a function then a pointer should be passed so that its value
- * is accurate. Failure to do this will cause some of the variables'
- * transient values to be lost, leading to wrong results.
- *
- * - A program should be written from top to bottom, by always appending
- * instructions to the bottom with a single LLVMBuilderRef. Inserting and/or
- * modifying existing statements will most likely lead to wrong results.
- *
- */
-void
-lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
- LLVMValueRef *variable)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- assert(*variable);
- if(!*variable)
- return;
-
- assert(flow->num_variables < LP_BUILD_FLOW_MAX_VARIABLES);
- if(flow->num_variables >= LP_BUILD_FLOW_MAX_VARIABLES)
- return;
-
- flow->variables[flow->num_variables++] = variable;
- ++scope->num_variables;
-}
-
-
-void
-lp_build_flow_scope_end(struct lp_build_flow_context *flow)
-{
- struct lp_build_flow_scope *scope;
-
- scope = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SCOPE)->scope;
- if(!scope)
- return;
-
- assert(flow->num_variables >= scope->num_variables);
- if(flow->num_variables < scope->num_variables) {
- flow->num_variables = 0;
- return;
- }
-
- flow->num_variables -= scope->num_variables;
-}
-
-
-/**
* Note: this function has no dependencies on the flow code and could
* be used elsewhere.
*/
@@ -334,52 +76,18 @@ lp_build_insert_new_block(LLVMBuilderRef builder, const char *name)
}
-static LLVMBasicBlockRef
-lp_build_flow_insert_block(struct lp_build_flow_context *flow)
-{
- return lp_build_insert_new_block(flow->builder, "");
-}
-
-
/**
* Begin a "skip" block. Inside this block we can test a condition and
* skip to the end of the block if the condition is false.
*/
void
-lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
+lp_build_flow_skip_begin(struct lp_build_skip_context *skip,
+ LLVMBuilderRef builder)
{
- struct lp_build_flow_skip *skip;
- LLVMBuilderRef builder;
- unsigned i;
-
- skip = &lp_build_flow_push(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
+ skip->builder = builder;
/* create new basic block */
- skip->block = lp_build_flow_insert_block(flow);
-
- skip->num_variables = flow->num_variables;
- if(!skip->num_variables) {
- skip->phi = NULL;
- return;
- }
-
- /* Allocate a Phi node for each variable in this skip scope */
- skip->phi = MALLOC(skip->num_variables * sizeof *skip->phi);
- if(!skip->phi) {
- skip->num_variables = 0;
- return;
- }
-
- builder = LLVMCreateBuilder();
- LLVMPositionBuilderAtEnd(builder, skip->block);
-
- /* create a Phi node for each variable */
- for(i = 0; i < skip->num_variables; ++i)
- skip->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
-
- LLVMDisposeBuilder(builder);
+ skip->block = lp_build_insert_new_block(skip->builder, "skip");
}
@@ -388,83 +96,50 @@ lp_build_flow_skip_begin(struct lp_build_flow_context *flow)
* skip block if the condition is true.
*/
void
-lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+lp_build_flow_skip_cond_break(struct lp_build_skip_context *skip,
LLVMValueRef cond)
{
- struct lp_build_flow_skip *skip;
- LLVMBasicBlockRef current_block;
LLVMBasicBlockRef new_block;
- unsigned i;
-
- skip = &lp_build_flow_peek(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
- current_block = LLVMGetInsertBlock(flow->builder);
-
- new_block = lp_build_flow_insert_block(flow);
-
- /* for each variable, update the Phi node with a (variable, block) pair */
- for(i = 0; i < skip->num_variables; ++i) {
- assert(*flow->variables[i]);
- assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
- LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
- }
+ new_block = lp_build_insert_new_block(skip->builder, "");
/* if cond is true, goto skip->block, else goto new_block */
- LLVMBuildCondBr(flow->builder, cond, skip->block, new_block);
+ LLVMBuildCondBr(skip->builder, cond, skip->block, new_block);
- LLVMPositionBuilderAtEnd(flow->builder, new_block);
+ LLVMPositionBuilderAtEnd(skip->builder, new_block);
}
void
-lp_build_flow_skip_end(struct lp_build_flow_context *flow)
+lp_build_flow_skip_end(struct lp_build_skip_context *skip)
{
- struct lp_build_flow_skip *skip;
- LLVMBasicBlockRef current_block;
- unsigned i;
-
- skip = &lp_build_flow_pop(flow, LP_BUILD_FLOW_SKIP)->skip;
- if(!skip)
- return;
-
- current_block = LLVMGetInsertBlock(flow->builder);
-
- /* add (variable, block) tuples to the phi nodes */
- for(i = 0; i < skip->num_variables; ++i) {
- assert(*flow->variables[i]);
- assert(LLVMTypeOf(skip->phi[i]) == LLVMTypeOf(*flow->variables[i]));
- LLVMAddIncoming(skip->phi[i], flow->variables[i], &current_block, 1);
- *flow->variables[i] = skip->phi[i];
- }
-
/* goto block */
- LLVMBuildBr(flow->builder, skip->block);
- LLVMPositionBuilderAtEnd(flow->builder, skip->block);
-
- FREE(skip->phi);
+ LLVMBuildBr(skip->builder, skip->block);
+ LLVMPositionBuilderAtEnd(skip->builder, skip->block);
}
/**
* Check if the mask predicate is zero. If so, jump to the end of the block.
*/
-static void
+void
lp_build_mask_check(struct lp_build_mask_context *mask)
{
- LLVMBuilderRef builder = mask->flow->builder;
+ LLVMBuilderRef builder = mask->skip.builder;
+ LLVMValueRef value;
LLVMValueRef cond;
+ value = lp_build_mask_value(mask);
+
/* cond = (mask == 0) */
cond = LLVMBuildICmp(builder,
LLVMIntEQ,
- LLVMBuildBitCast(builder, mask->value, mask->reg_type, ""),
+ LLVMBuildBitCast(builder, value, mask->reg_type, ""),
LLVMConstNull(mask->reg_type),
"");
/* if cond, goto end of block */
- lp_build_flow_skip_cond_break(mask->flow, cond);
+ lp_build_flow_skip_cond_break(&mask->skip, cond);
}
@@ -477,21 +152,27 @@ lp_build_mask_check(struct lp_build_mask_context *mask)
*/
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- struct lp_build_flow_context *flow,
+ LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value)
{
memset(mask, 0, sizeof *mask);
- mask->flow = flow;
mask->reg_type = LLVMIntType(type.width * type.length);
- mask->value = value;
+ mask->var = lp_build_alloca(builder,
+ lp_build_int_vec_type(type),
+ "execution_mask");
- lp_build_flow_scope_begin(flow);
- lp_build_flow_scope_declare(flow, &mask->value);
- lp_build_flow_skip_begin(flow);
+ LLVMBuildStore(builder, value, mask->var);
- lp_build_mask_check(mask);
+ lp_build_flow_skip_begin(&mask->skip, builder);
+}
+
+
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask)
+{
+ return LLVMBuildLoad(mask->skip.builder, mask->var, "");
}
@@ -504,9 +185,10 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value)
{
- mask->value = LLVMBuildAnd( mask->flow->builder, mask->value, value, "");
-
- lp_build_mask_check(mask);
+ value = LLVMBuildAnd(mask->skip.builder,
+ lp_build_mask_value(mask),
+ value, "");
+ LLVMBuildStore(mask->skip.builder, value, mask->var);
}
@@ -516,9 +198,8 @@ lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask)
{
- lp_build_flow_skip_end(mask->flow);
- lp_build_flow_scope_end(mask->flow);
- return mask->value;
+ lp_build_flow_skip_end(&mask->skip);
+ return lp_build_mask_value(mask);
}
@@ -528,59 +209,27 @@ lp_build_loop_begin(LLVMBuilderRef builder,
LLVMValueRef start,
struct lp_build_loop_state *state)
{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
+ state->block = lp_build_insert_new_block(builder, "loop_begin");
- state->block = LLVMAppendBasicBlock(function, "loop");
+ state->counter_var = lp_build_alloca(builder, LLVMTypeOf(start), "loop_counter");
+
+ LLVMBuildStore(builder, start, state->counter_var);
LLVMBuildBr(builder, state->block);
LLVMPositionBuilderAtEnd(builder, state->block);
- state->counter = LLVMBuildPhi(builder, LLVMTypeOf(start), "");
-
- LLVMAddIncoming(state->counter, &start, &block, 1);
-
+ state->counter = LLVMBuildLoad(builder, state->counter_var, "");
}
void
-lp_build_loop_end(LLVMBuilderRef builder,
- LLVMValueRef end,
- LLVMValueRef step,
- struct lp_build_loop_state *state)
-{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
- LLVMValueRef next;
- LLVMValueRef cond;
- LLVMBasicBlockRef after_block;
-
- if (!step)
- step = LLVMConstInt(LLVMTypeOf(end), 1, 0);
-
- next = LLVMBuildAdd(builder, state->counter, step, "");
-
- cond = LLVMBuildICmp(builder, LLVMIntNE, next, end, "");
-
- after_block = LLVMAppendBasicBlock(function, "");
-
- LLVMBuildCondBr(builder, cond, after_block, state->block);
-
- LLVMAddIncoming(state->counter, &next, &block, 1);
-
- LLVMPositionBuilderAtEnd(builder, after_block);
-}
-
-void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
- int llvm_cond,
+ LLVMIntPredicate llvm_cond,
struct lp_build_loop_state *state)
{
- LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(block);
LLVMValueRef next;
LLVMValueRef cond;
LLVMBasicBlockRef after_block;
@@ -590,15 +239,27 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
next = LLVMBuildAdd(builder, state->counter, step, "");
+ LLVMBuildStore(builder, next, state->counter_var);
+
cond = LLVMBuildICmp(builder, llvm_cond, next, end, "");
- after_block = LLVMAppendBasicBlock(function, "");
+ after_block = lp_build_insert_new_block(builder, "loop_end");
LLVMBuildCondBr(builder, cond, after_block, state->block);
- LLVMAddIncoming(state->counter, &next, &block, 1);
-
LLVMPositionBuilderAtEnd(builder, after_block);
+
+ state->counter = LLVMBuildLoad(builder, state->counter_var, "");
+}
+
+
+void
+lp_build_loop_end(LLVMBuilderRef builder,
+ LLVMValueRef end,
+ LLVMValueRef step,
+ struct lp_build_loop_state *state)
+{
+ lp_build_loop_end_cond(builder, end, step, LLVMIntNE, state);
}
@@ -616,24 +277,16 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
Is built with:
- LLVMValueRef x = LLVMGetUndef(); // or something else
+ // x needs an alloca variable
+ x = lp_build_alloca(builder, type, "x");
- flow = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow);
+ lp_build_if(ctx, builder, cond);
+ LLVMBuildStore(LLVMBuildAdd(1, 2), x);
+ lp_build_else(ctx);
+ LLVMBuildStore(LLVMBuildAdd(2, 3). x);
+ lp_build_endif(ctx);
- // x needs a phi node
- lp_build_flow_scope_declare(flow, &x);
-
- lp_build_if(ctx, flow, builder, cond);
- x = LLVMAdd(1, 2);
- lp_build_else(ctx);
- x = LLVMAdd(2, 3);
- lp_build_endif(ctx);
-
- lp_build_flow_scope_end(flow);
-
- lp_build_flow_destroy(flow);
*/
@@ -642,47 +295,19 @@ lp_build_loop_end_cond(LLVMBuilderRef builder,
* Begin an if/else/endif construct.
*/
void
-lp_build_if(struct lp_build_if_state *ctx,
- struct lp_build_flow_context *flow,
+lp_build_if(struct lp_build_if_state *ifthen,
LLVMBuilderRef builder,
LLVMValueRef condition)
{
LLVMBasicBlockRef block = LLVMGetInsertBlock(builder);
- struct lp_build_flow_if *ifthen;
- unsigned i;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->builder = builder;
- ctx->flow = flow;
- /* push/create new scope */
- ifthen = &lp_build_flow_push(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
- ifthen->num_variables = flow->num_variables;
+ memset(ifthen, 0, sizeof *ifthen);
+ ifthen->builder = builder;
ifthen->condition = condition;
ifthen->entry_block = block;
- /* create a Phi node for each variable in this flow scope */
- ifthen->phi = MALLOC(ifthen->num_variables * sizeof(*ifthen->phi));
- if (!ifthen->phi) {
- ifthen->num_variables = 0;
- return;
- }
-
/* create endif/merge basic block for the phi functions */
ifthen->merge_block = lp_build_insert_new_block(builder, "endif-block");
- LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
-
- /* create a phi node for each variable */
- for (i = 0; i < flow->num_variables; i++) {
- ifthen->phi[i] = LLVMBuildPhi(builder, LLVMTypeOf(*flow->variables[i]), "");
-
- /* add add the initial value of the var from the entry block */
- if (!LLVMIsUndef(*flow->variables[i]))
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i],
- &ifthen->entry_block, 1);
- }
/* create/insert true_block before merge_block */
ifthen->true_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-true-block");
@@ -696,27 +321,16 @@ lp_build_if(struct lp_build_if_state *ctx,
* Begin else-part of a conditional
*/
void
-lp_build_else(struct lp_build_if_state *ctx)
+lp_build_else(struct lp_build_if_state *ifthen)
{
- struct lp_build_flow_context *flow = ctx->flow;
- struct lp_build_flow_if *ifthen;
- unsigned i;
-
- ifthen = &lp_build_flow_peek(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
- /* for each variable, update the Phi node with a (variable, block) pair */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
- }
+ /* Append an unconditional Br(anch) instruction on the true_block */
+ LLVMBuildBr(ifthen->builder, ifthen->merge_block);
/* create/insert false_block before the merge block */
ifthen->false_block = LLVMInsertBasicBlock(ifthen->merge_block, "if-false-block");
/* successive code goes into the else block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->false_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->false_block);
}
@@ -724,75 +338,30 @@ lp_build_else(struct lp_build_if_state *ctx)
* End a conditional.
*/
void
-lp_build_endif(struct lp_build_if_state *ctx)
+lp_build_endif(struct lp_build_if_state *ifthen)
{
- struct lp_build_flow_context *flow = ctx->flow;
- struct lp_build_flow_if *ifthen;
- LLVMBasicBlockRef curBlock = LLVMGetInsertBlock(ctx->builder);
- unsigned i;
-
- ifthen = &lp_build_flow_pop(flow, LP_BUILD_FLOW_IF)->ifthen;
- assert(ifthen);
-
/* Insert branch to the merge block from current block */
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
+ LLVMBuildBr(ifthen->builder, ifthen->merge_block);
- if (ifthen->false_block) {
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- /* for each variable, update the Phi node with a (variable, block) pair */
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &curBlock, 1);
- /* replace the variable ref with the phi function */
- *flow->variables[i] = ifthen->phi[i];
- }
- }
- else {
- /* no else clause */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
- for (i = 0; i < flow->num_variables; i++) {
- assert(*flow->variables[i]);
- LLVMAddIncoming(ifthen->phi[i], flow->variables[i], &ifthen->true_block, 1);
-
- /* replace the variable ref with the phi function */
- *flow->variables[i] = ifthen->phi[i];
- }
- }
-
- FREE(ifthen->phi);
-
- /***
- *** Now patch in the various branch instructions.
- ***/
+ /*
+ * Now patch in the various branch instructions.
+ */
/* Insert the conditional branch instruction at the end of entry_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->entry_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->entry_block);
if (ifthen->false_block) {
/* we have an else clause */
- LLVMBuildCondBr(ctx->builder, ifthen->condition,
+ LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->false_block);
}
else {
/* no else clause */
- LLVMBuildCondBr(ctx->builder, ifthen->condition,
+ LLVMBuildCondBr(ifthen->builder, ifthen->condition,
ifthen->true_block, ifthen->merge_block);
}
- /* Insert branch from end of true_block to merge_block */
- if (ifthen->false_block) {
- /* Append an unconditional Br(anch) instruction on the true_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
- LLVMBuildBr(ctx->builder, ifthen->merge_block);
- }
- else {
- /* No else clause.
- * Note that we've already inserted the branch at the end of
- * true_block. See the very first LLVMBuildBr() call in this function.
- */
- }
-
/* Resume building code at end of the ifthen->merge_block */
- LLVMPositionBuilderAtEnd(ctx->builder, ifthen->merge_block);
+ LLVMPositionBuilderAtEnd(ifthen->builder, ifthen->merge_block);
}
@@ -830,6 +399,7 @@ lp_build_alloca(LLVMBuilderRef builder,
}
res = LLVMBuildAlloca(first_builder, type, name);
+ LLVMBuildStore(builder, LLVMConstNull(type), res);
LLVMDisposeBuilder(first_builder);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
index fffb493a93..e729ee6eaa 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -41,52 +41,49 @@
struct lp_type;
-struct lp_build_flow_context;
-
-
-struct lp_build_flow_context *
-lp_build_flow_create(LLVMBuilderRef builder);
-
-void
-lp_build_flow_destroy(struct lp_build_flow_context *flow);
-
-void
-lp_build_flow_scope_begin(struct lp_build_flow_context *flow);
-
-void
-lp_build_flow_scope_declare(struct lp_build_flow_context *flow,
- LLVMValueRef *variable);
+/**
+ * Early exit. Useful to skip to the end of a function or block when
+ * the execution mask becomes zero or when there is an error condition.
+ */
+struct lp_build_skip_context
+{
+ LLVMBuilderRef builder;
-void
-lp_build_flow_scope_end(struct lp_build_flow_context *flow);
+ /** Block to skip to */
+ LLVMBasicBlockRef block;
+};
void
-lp_build_flow_skip_begin(struct lp_build_flow_context *flow);
+lp_build_flow_skip_begin(struct lp_build_skip_context *ctx,
+ LLVMBuilderRef builder);
void
-lp_build_flow_skip_cond_break(struct lp_build_flow_context *flow,
+lp_build_flow_skip_cond_break(struct lp_build_skip_context *ctx,
LLVMValueRef cond);
void
-lp_build_flow_skip_end(struct lp_build_flow_context *flow);
+lp_build_flow_skip_end(struct lp_build_skip_context *ctx);
struct lp_build_mask_context
{
- struct lp_build_flow_context *flow;
+ struct lp_build_skip_context skip;
LLVMTypeRef reg_type;
- LLVMValueRef value;
+ LLVMValueRef var;
};
void
lp_build_mask_begin(struct lp_build_mask_context *mask,
- struct lp_build_flow_context *flow,
+ LLVMBuilderRef builder,
struct lp_type type,
LLVMValueRef value);
+LLVMValueRef
+lp_build_mask_value(struct lp_build_mask_context *mask);
+
/**
* Bitwise AND the mask with the given value, if a previous mask was set.
*/
@@ -94,6 +91,9 @@ void
lp_build_mask_update(struct lp_build_mask_context *mask,
LLVMValueRef value);
+void
+lp_build_mask_check(struct lp_build_mask_context *mask);
+
LLVMValueRef
lp_build_mask_end(struct lp_build_mask_context *mask);
@@ -108,6 +108,7 @@ lp_build_mask_end(struct lp_build_mask_context *mask);
struct lp_build_loop_state
{
LLVMBasicBlockRef block;
+ LLVMValueRef counter_var;
LLVMValueRef counter;
};
@@ -128,22 +129,28 @@ void
lp_build_loop_end_cond(LLVMBuilderRef builder,
LLVMValueRef end,
LLVMValueRef step,
- int cond, /* LLVM condition */
+ LLVMIntPredicate cond,
struct lp_build_loop_state *state);
+/**
+ * if/else/endif.
+ */
struct lp_build_if_state
{
LLVMBuilderRef builder;
- struct lp_build_flow_context *flow;
+ LLVMValueRef condition;
+ LLVMBasicBlockRef entry_block;
+ LLVMBasicBlockRef true_block;
+ LLVMBasicBlockRef false_block;
+ LLVMBasicBlockRef merge_block;
};
void
lp_build_if(struct lp_build_if_state *ctx,
- struct lp_build_flow_context *flow,
LLVMBuilderRef builder,
LLVMValueRef condition);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
index 0a5038bc98..2bce289555 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -35,6 +35,7 @@
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_arit.h"
#include "lp_bld_type.h"
@@ -42,7 +43,7 @@
#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
-
+#include "lp_bld_logic.h"
/**
* Extract Y, U, V channels from packed UYVY.
@@ -59,7 +60,7 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
LLVMValueRef *v)
{
struct lp_type type;
- LLVMValueRef shift, mask;
+ LLVMValueRef mask;
memset(&type, 0, sizeof type);
type.width = 32;
@@ -69,14 +70,37 @@ uyvy_to_yuv_soa(LLVMBuilderRef builder,
assert(lp_check_value(type, i));
/*
- * y = (uyvy >> 16*i) & 0xff
+ * y = (uyvy >> (16*i + 8)) & 0xff
* u = (uyvy ) & 0xff
* v = (uyvy >> 16 ) & 0xff
*/
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
- shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
- *y = LLVMBuildLShr(builder, packed, shift, "");
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n == 4) {
+ LLVMValueRef sel, tmp, tmp2;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, builder, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
+ tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(type, 16), "");
+ sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ *y = lp_build_select(&bld32, sel, tmp, tmp2);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(type, 8), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
*u = packed;
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
@@ -103,7 +127,7 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
LLVMValueRef *v)
{
struct lp_type type;
- LLVMValueRef shift, mask;
+ LLVMValueRef mask;
memset(&type, 0, sizeof type);
type.width = 32;
@@ -118,8 +142,30 @@ yuyv_to_yuv_soa(LLVMBuilderRef builder,
* v = (yuyv >> 24 ) & 0xff
*/
- shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
- *y = LLVMBuildLShr(builder, packed, shift, "");
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * Avoid shift with per-element count.
+ * No support on x86, gets translated to roughly 5 instructions
+ * per element. Didn't measure performance but cuts shader size
+ * by quite a bit (less difference if cpu has no sse4.1 support).
+ */
+ if (util_cpu_caps.has_sse2 && n == 4) {
+ LLVMValueRef sel, tmp;
+ struct lp_build_context bld32;
+
+ lp_build_context_init(&bld32, builder, type);
+
+ tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 16), "");
+ sel = lp_build_compare(builder, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(type, 0));
+ *y = lp_build_select(&bld32, sel, packed, tmp);
+ } else
+#endif
+ {
+ LLVMValueRef shift;
+ shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(type, 16), "");
+ *y = LLVMBuildLShr(builder, packed, shift, "");
+ }
+
*u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 8), "");
*v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(type, 24), "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 761f33b578..5598ca5c48 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -44,6 +44,7 @@ static const struct debug_named_value lp_bld_debug_flags[] = {
{ "asm", GALLIVM_DEBUG_ASM, NULL },
{ "nopt", GALLIVM_DEBUG_NO_OPT, NULL },
{ "perf", GALLIVM_DEBUG_PERF, NULL },
+ { "no_brilinear", GALLIVM_DEBUG_NO_BRILINEAR, NULL },
DEBUG_NAMED_VALUE_END
};
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h
index f26fdac466..0b4b1ca7d1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -47,4 +47,10 @@ lp_build_init(void);
extern void
lp_func_delete_body(LLVMValueRef func);
+
+extern LLVMValueRef
+lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name);
+
+
#endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index d5c62a3f73..026b60ac36 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -92,9 +92,23 @@ lp_build_compare(LLVMBuilderRef builder,
if(func == PIPE_FUNC_ALWAYS)
return ones;
- /* TODO: optimize the constant case */
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
- /* XXX: It is not clear if we should use the ordered or unordered operators */
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
+ }
+#endif
#if HAVE_LLVM < 0x0207
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
@@ -225,6 +239,8 @@ lp_build_compare(LLVMBuilderRef builder,
#endif
#endif /* HAVE_LLVM < 0x0207 */
+ /* XXX: It is not clear if we should use the ordered or unordered operators */
+
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
@@ -446,10 +462,12 @@ lp_build_select(struct lp_build_context *bld,
LLVMTypeRef arg_type;
LLVMValueRef args[3];
- if (type.width == 64) {
+ if (type.floating &&
+ type.width == 64) {
intrinsic = "llvm.x86.sse41.blendvpd";
arg_type = LLVMVectorType(LLVMDoubleType(), 2);
- } else if (type.width == 32) {
+ } else if (type.floating &&
+ type.width == 32) {
intrinsic = "llvm.x86.sse41.blendvps";
arg_type = LLVMVectorType(LLVMFloatType(), 4);
} else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 48baf7c425..f56ddee7fd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -178,3 +178,13 @@ lp_func_delete_body(LLVMValueRef FF)
llvm::Function *func = llvm::unwrap<llvm::Function>(FF);
func->deleteBody();
}
+
+
+extern "C"
+LLVMValueRef
+lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
+ const char *Name)
+{
+ return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
index 153ba5b15b..f418e96aff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -29,6 +29,8 @@
#include "util/u_debug.h"
#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "lp_bld_const.h"
#include "lp_bld_printf.h"
@@ -119,3 +121,22 @@ lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...)
return LLVMBuildCall(builder, func_printf, params, argcount + 1, "");
}
+
+
+/**
+ * Print a float[4] vector.
+ */
+LLVMValueRef
+lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec)
+{
+ char format[1000];
+ LLVMValueRef x, y, z, w;
+
+ x = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(0), "");
+ y = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(1), "");
+ z = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(2), "");
+ w = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(3), "");
+
+ util_snprintf(format, sizeof(format), "%s %%f %%f %%f %%f\n", msg);
+ return lp_build_printf(builder, format, x, y, z, w);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
index 83bd8f1d55..b6222c62eb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
@@ -35,5 +35,9 @@
LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module, const char *str, int len);
LLVMValueRef lp_build_printf(LLVMBuilderRef builder, const char *fmt, ...);
+LLVMValueRef
+lp_build_print_vec4(LLVMBuilderRef builder, const char *msg, LLVMValueRef vec);
+
+
#endif
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
index 7b1088939b..c18c8b4710 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c
@@ -81,11 +81,15 @@ LLVMValueRef
lp_build_scalar_ddx(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef idx_left = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_right = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_RIGHT, 0);
- LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "");
- LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "");
- return lp_build_sub(bld, a_right, a_left);
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef idx_left = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_right = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_RIGHT, 0);
+ LLVMValueRef a_left = LLVMBuildExtractElement(bld->builder, a, idx_left, "left");
+ LLVMValueRef a_right = LLVMBuildExtractElement(bld->builder, a, idx_right, "right");
+ if (bld->type.floating)
+ return LLVMBuildFSub(bld->builder, a_right, a_left, "ddx");
+ else
+ return LLVMBuildSub(bld->builder, a_right, a_left, "ddx");
}
@@ -93,9 +97,13 @@ LLVMValueRef
lp_build_scalar_ddy(struct lp_build_context *bld,
LLVMValueRef a)
{
- LLVMValueRef idx_top = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_TOP_LEFT, 0);
- LLVMValueRef idx_bottom = LLVMConstInt(LLVMInt32Type(), LP_BLD_QUAD_BOTTOM_LEFT, 0);
- LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "");
- LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "");
- return lp_build_sub(bld, a_bottom, a_top);
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef idx_top = LLVMConstInt(i32t, LP_BLD_QUAD_TOP_LEFT, 0);
+ LLVMValueRef idx_bottom = LLVMConstInt(i32t, LP_BLD_QUAD_BOTTOM_LEFT, 0);
+ LLVMValueRef a_top = LLVMBuildExtractElement(bld->builder, a, idx_top, "top");
+ LLVMValueRef a_bottom = LLVMBuildExtractElement(bld->builder, a, idx_bottom, "bottom");
+ if (bld->type.floating)
+ return LLVMBuildFSub(bld->builder, a_bottom, a_top, "ddy");
+ else
+ return LLVMBuildSub(bld->builder, a_bottom, a_top, "ddy");
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index e89ee7c230..844d1d935b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -39,12 +39,52 @@
#include "lp_bld_arit.h"
#include "lp_bld_const.h"
#include "lp_bld_debug.h"
+#include "lp_bld_printf.h"
#include "lp_bld_flow.h"
#include "lp_bld_sample.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_type.h"
+/*
+ * Bri-linear factor. Should be greater than one.
+ */
+#define BRILINEAR_FACTOR 2
+
+
+/**
+ * Does the given texture wrap mode allow sampling the texture border color?
+ * XXX maybe move this into gallium util code.
+ */
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter)
+{
+ switch (mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return FALSE;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ return FALSE;
+ } else {
+ return TRUE;
+ }
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return TRUE;
+ default:
+ assert(0 && "unexpected wrap mode");
+ return FALSE;
+ }
+}
+
+
/**
* Initialize lp_sampler_static_state object with the gallium sampler
* and texture state.
@@ -93,31 +133,40 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->wrap_r = sampler->wrap_r;
state->min_img_filter = sampler->min_img_filter;
state->mag_img_filter = sampler->mag_img_filter;
- if (view->last_level) {
+
+ if (view->last_level && sampler->max_lod > 0.0f) {
state->min_mip_filter = sampler->min_mip_filter;
} else {
state->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}
+ if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ if (sampler->lod_bias != 0.0f) {
+ state->lod_bias_non_zero = 1;
+ }
+
+ /* If min_lod == max_lod we can greatly simplify mipmap selection.
+ * This is a case that occurs during automatic mipmap generation.
+ */
+ if (sampler->min_lod == sampler->max_lod) {
+ state->min_max_lod_equal = 1;
+ } else {
+ if (sampler->min_lod > 0.0f) {
+ state->apply_min_lod = 1;
+ }
+
+ if (sampler->max_lod < (float)view->last_level) {
+ state->apply_max_lod = 1;
+ }
+ }
+ }
+
state->compare_mode = sampler->compare_mode;
if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
state->compare_func = sampler->compare_func;
}
state->normalized_coords = sampler->normalized_coords;
- state->lod_bias = sampler->lod_bias;
- if (!view->last_level &&
- sampler->min_img_filter == sampler->mag_img_filter) {
- state->min_lod = 0.0f;
- state->max_lod = 0.0f;
- } else {
- state->min_lod = MAX2(sampler->min_lod, 0.0f);
- state->max_lod = sampler->max_lod;
- }
- state->border_color[0] = sampler->border_color[0];
- state->border_color[1] = sampler->border_color[1];
- state->border_color[2] = sampler->border_color[2];
- state->border_color[3] = sampler->border_color[3];
/*
* FIXME: Handle the remainder of pipe_sampler_view.
@@ -126,6 +175,220 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Generate code to compute coordinate gradient (rho).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ *
+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4])
+{
+ struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned dims = bld->dims;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho_x, rho_y;
+ LLVMValueRef rho_vec;
+ LLVMValueRef float_size;
+ LLVMValueRef rho;
+
+ dsdx = ddx[0];
+ dsdy = ddy[0];
+
+ if (dims <= 1) {
+ rho_x = dsdx;
+ rho_y = dsdy;
+ }
+ else {
+ rho_x = float_size_bld->undef;
+ rho_y = float_size_bld->undef;
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dsdx, index0, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dsdy, index0, "");
+
+ dtdx = ddx[1];
+ dtdy = ddy[1];
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, dtdx, index1, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, dtdy, index1, "");
+
+ if (dims >= 3) {
+ drdx = ddx[2];
+ drdy = ddy[2];
+
+ rho_x = LLVMBuildInsertElement(bld->builder, rho_x, drdx, index2, "");
+ rho_y = LLVMBuildInsertElement(bld->builder, rho_y, drdy, index2, "");
+ }
+ }
+
+ rho_x = lp_build_abs(float_size_bld, rho_x);
+ rho_y = lp_build_abs(float_size_bld, rho_y);
+
+ rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
+
+ float_size = lp_build_int_to_float(float_size_bld, bld->int_size);
+
+ rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ LLVMValueRef rho_s, rho_t, rho_r;
+
+ rho_s = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho_t = LLVMBuildExtractElement(bld->builder, rho_vec, index1, "");
+
+ rho = lp_build_max(float_bld, rho_s, rho_t);
+
+ if (dims >= 3) {
+ rho_r = LLVMBuildExtractElement(bld->builder, rho_vec, index0, "");
+ rho = lp_build_max(float_bld, rho, rho_r);
+ }
+ }
+ }
+
+ return rho;
+}
+
+
+/*
+ * Bri-linear lod computation
+ *
+ * Use a piece-wise linear approximation of log2 such that:
+ * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
+ * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
+ * with the steepness specified in 'factor'
+ * - exact result for 0.5, 1.5, etc.
+ *
+ *
+ * 1.0 - /----*
+ * /
+ * /
+ * /
+ * 0.5 - *
+ * /
+ * /
+ * /
+ * 0.0 - *----/
+ *
+ * | |
+ * 2^0 2^1
+ *
+ * This is a technique also commonly used in hardware:
+ * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
+ *
+ * TODO: For correctness, this should only be applied when texture is known to
+ * have regular mipmaps, i.e., mipmaps derived from the base level.
+ *
+ * TODO: This could be done in fixed point, where applicable.
+ */
+static void
+lp_build_brilinear_lod(struct lp_build_context *bld,
+ LLVMValueRef lod,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_fpart;
+ double pre_offset = (factor - 0.5)/factor - 0.5;
+ double post_offset = 1 - factor;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod = %f\n", lod);
+ }
+
+ lod = lp_build_add(bld, lod,
+ lp_build_const_vec(bld->type, pre_offset));
+
+ lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->type, post_offset));
+
+ /*
+ * It's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_fpart = lod_fpart;
+
+ if (0) {
+ lp_build_printf(bld->builder, "lod_ipart = %i\n", *out_lod_ipart);
+ lp_build_printf(bld->builder, "lod_fpart = %f\n\n", *out_lod_fpart);
+ }
+}
+
+
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+ LLVMValueRef rho,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_ipart;
+ LLVMValueRef lod_fpart;
+
+ const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+ const double post_offset = 1 - 2*factor;
+
+ assert(bld->type.floating);
+
+ assert(lp_check_value(bld->type, rho));
+
+ /*
+ * The pre factor will make the intersections with the exact powers of two
+ * happen precisely where we want then to be, which means that the integer
+ * part will not need any post adjustments.
+ */
+ rho = lp_build_mul(bld, rho,
+ lp_build_const_vec(bld->type, pre_factor));
+
+ /* ipart = ifloor(log2(rho)) */
+ lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+ /* fpart = rho / 2**ipart */
+ lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->type, post_offset));
+
+ /*
+ * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_ipart = lod_ipart;
+ *out_lod_fpart = lod_fpart;
+}
+
+
+/**
* Generate code to compute texture level of detail (lambda).
* \param ddx partial derivatives of (s, t, r, q) with respect to X
* \param ddy partial derivatives of (s, t, r, q) with respect to Y
@@ -138,83 +401,81 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
* XXX: The resulting lod is scalar, so ignore all but the first element of
* derivatives, lod_bias, etc that are passed by the shader.
*/
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
+ unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth)
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
{
- if (bld->static_state->min_lod == bld->static_state->max_lod) {
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod;
+
+ *out_lod_ipart = bld->int_bld.zero;
+ *out_lod_fpart = bld->float_bld.zero;
+
+ if (bld->static_state->min_max_lod_equal) {
/* User is forcing sampling from a particular mipmap level.
* This is hit during mipmap generation.
*/
- return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = min_lod;
}
else {
- struct lp_build_context *float_bld = &bld->float_bld;
- LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
- bld->static_state->lod_bias);
- LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->min_lod);
- LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
- bld->static_state->max_lod);
+ LLVMValueRef sampler_lod_bias =
+ bld->dynamic_state->lod_bias(bld->dynamic_state, bld->builder, unit);
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef lod;
if (explicit_lod) {
lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
index0, "");
}
else {
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef dsdx, dsdy;
- LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
LLVMValueRef rho;
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
- }
+ rho = lp_build_rho(bld, ddx, ddy);
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
+ /*
+ * Compute lod = log2(rho)
*/
- rho = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildFMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
- rho = lp_build_max(float_bld, rho, max);
+
+ if (!lod_bias &&
+ !bld->static_state->lod_bias_non_zero &&
+ !bld->static_state->apply_max_lod &&
+ !bld->static_state->apply_min_lod) {
+ /*
+ * Special case when there are no post-log2 adjustments, which
+ * saves instructions but keeping the integer and fractional lod
+ * computations separate from the start.
+ */
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+ mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+ *out_lod_fpart = bld->float_bld.zero;
+ return;
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
+ !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ return;
}
}
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
+ if (0) {
+ lod = lp_build_log2(float_bld, rho);
+ }
+ else {
+ lod = lp_build_fast_log2(float_bld, rho);
+ }
/* add shader lod bias */
if (lod_bias) {
@@ -225,13 +486,43 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
}
/* add sampler lod bias */
- lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+ if (bld->static_state->lod_bias_non_zero)
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
+
/* clamp lod */
- lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+ if (bld->static_state->apply_max_lod) {
+ LLVMValueRef max_lod =
+ bld->dynamic_state->max_lod(bld->dynamic_state, bld->builder, unit);
- return lod;
+ lod = lp_build_min(float_bld, lod, max_lod);
+ }
+ if (bld->static_state->apply_min_lod) {
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->builder, unit);
+
+ lod = lp_build_max(float_bld, lod, min_lod);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ }
+ else {
+ lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
+ }
+
+ lp_build_name(*out_lod_fpart, "lod_fpart");
+ }
+ else {
+ *out_lod_ipart = lp_build_iround(float_bld, lod);
}
+
+ lp_build_name(*out_lod_ipart, "lod_ipart");
+
+ return;
}
@@ -245,10 +536,9 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
LLVMValueRef *level_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *int_bld = &bld->int_bld;
LLVMValueRef last_level, level;
@@ -258,7 +548,7 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
bld->builder, unit);
/* convert float lod to integer */
- level = lp_build_iround(float_bld, lod);
+ level = lod_ipart;
/* clamp level to legal range of levels */
*level_out = lp_build_clamp(int_bld, level, zero, last_level);
@@ -273,43 +563,77 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out)
+ LLVMValueRef *level1_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMBuilderRef builder = bld->builder;
struct lp_build_context *int_bld = &bld->int_bld;
- LLVMValueRef last_level, level;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef last_level;
+ LLVMValueRef clamp_min;
+ LLVMValueRef clamp_max;
+
+ *level0_out = lod_ipart;
+ *level1_out = lp_build_add(int_bld, lod_ipart, int_bld->one);
last_level = bld->dynamic_state->last_level(bld->dynamic_state,
bld->builder, unit);
- /* convert float lod to integer */
- level = lp_build_ifloor(float_bld, lod);
-
- /* compute level 0 and clamp to legal range of levels */
- *level0_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
- /* compute level 1 and clamp to legal range of levels */
- level = lp_build_add(int_bld, level, int_bld->one);
- *level1_out = lp_build_clamp(int_bld, level,
- int_bld->zero,
- last_level);
-
- *weight_out = lp_build_fract(float_bld, lod);
+ /*
+ * Clamp both lod_ipart and lod_ipart + 1 to [0, last_level], with the
+ * minimum number of comparisons, and zeroing lod_fpart in the extreme
+ * ends in the process.
+ */
+
+ /* lod_ipart < 0 */
+ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
+ lod_ipart, int_bld->zero,
+ "clamp_lod_to_zero");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_min,
+ int_bld->zero, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ /* lod_ipart >= last_level */
+ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, last_level,
+ "clamp_lod_to_last");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
+ lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
+ lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
}
+/**
+ * Return pointer to a single mipmap level.
+ * \param data_array array of pointers to mipmap levels
+ * \param level integer mipmap level
+ */
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level)
+ LLVMValueRef level)
{
LLVMValueRef indexes[2], data_ptr;
indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
indexes[1] = level;
- data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
+ data_ptr = LLVMBuildGEP(bld->builder, bld->data_array, indexes, 2, "");
data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
return data_ptr;
}
@@ -317,10 +641,10 @@ lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level)
+ int level)
{
LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
- return lp_build_get_mipmap_level(bld, data_array, lvl);
+ return lp_build_get_mipmap_level(bld, lvl);
}
@@ -329,13 +653,24 @@ lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
* Return max(1, base_size >> level);
*/
static LLVMValueRef
-lp_build_minify(struct lp_build_sample_context *bld,
+lp_build_minify(struct lp_build_context *bld,
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
- size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
- return size;
+ assert(lp_check_value(bld->type, base_size));
+ assert(lp_check_value(bld->type, level));
+
+ if (level == bld->zero) {
+ /* if we're using mipmap level zero, no minification is needed */
+ return base_size;
+ }
+ else {
+ LLVMValueRef size =
+ LLVMBuildLShr(bld->builder, base_size, level, "minify");
+ assert(bld->type.sign);
+ size = lp_build_max(bld, size, bld->one);
+ return size;
+ }
}
@@ -364,71 +699,113 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
*/
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
- unsigned dims,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef ilevel0,
- LLVMValueRef ilevel1,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef *width0_vec,
- LLVMValueRef *width1_vec,
- LLVMValueRef *height0_vec,
- LLVMValueRef *height1_vec,
- LLVMValueRef *depth0_vec,
- LLVMValueRef *depth1_vec,
- LLVMValueRef *row_stride0_vec,
- LLVMValueRef *row_stride1_vec,
- LLVMValueRef *img_stride0_vec,
- LLVMValueRef *img_stride1_vec)
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_size,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec)
{
- const unsigned mip_filter = bld->static_state->min_mip_filter;
- LLVMValueRef ilevel0_vec, ilevel1_vec;
+ const unsigned dims = bld->dims;
+ LLVMValueRef ilevel_vec;
- ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
- ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
/*
- * Compute width, height, depth at mipmap level 'ilevel0'
+ * Compute width, height, depth at mipmap level 'ilevel'
*/
- *width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
+ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+
if (dims >= 2) {
- *height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
- *row_stride0_vec = lp_build_get_level_stride_vec(bld,
- row_stride_array,
- ilevel0);
+ *row_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->row_stride_array,
+ ilevel);
if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- *img_stride0_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel0);
- if (dims == 3) {
- *depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
- }
+ *img_stride_vec = lp_build_get_level_stride_vec(bld,
+ bld->img_stride_array,
+ ilevel);
}
}
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* compute width, height, depth for second mipmap level at 'ilevel1' */
- *width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
- if (dims >= 2) {
- *height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
- *row_stride1_vec = lp_build_get_level_stride_vec(bld,
- row_stride_array,
- ilevel1);
- if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
- *img_stride1_vec = lp_build_get_level_stride_vec(bld,
- img_stride_array,
- ilevel1);
- if (dims == 3) {
- *depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
- }
- }
+}
+
+
+/**
+ * Extract and broadcast texture size.
+ *
+ * @param size_type type of the texture size vector (either
+ * bld->int_size_type or bld->float_size_type)
+ * @param coord_type type of the texture size vector (either
+ * bld->int_coord_type or bld->coord_type)
+ * @param int_size vector with the integer texture size (width, height,
+ * depth)
+ */
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+ struct lp_type size_type,
+ struct lp_type coord_type,
+ LLVMValueRef size,
+ LLVMValueRef *out_width,
+ LLVMValueRef *out_height,
+ LLVMValueRef *out_depth)
+{
+ const unsigned dims = bld->dims;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
+ *out_width = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 0, 0));
+ if (dims >= 2) {
+ *out_height = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 1, 0));
+ if (dims == 3) {
+ *out_depth = lp_build_extract_broadcast(bld->builder,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 2, 0));
}
}
}
+/**
+ * Unnormalize coords.
+ *
+ * @param int_size vector with the integer texture size (width, height, depth)
+ */
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+ LLVMValueRef flt_size,
+ LLVMValueRef *s,
+ LLVMValueRef *t,
+ LLVMValueRef *r)
+{
+ const unsigned dims = bld->dims;
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef depth;
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &width,
+ &height,
+ &depth);
+
+ /* s = s * width, t = t * height */
+ *s = lp_build_mul(&bld->coord_bld, *s, width);
+ if (dims >= 2) {
+ *t = lp_build_mul(&bld->coord_bld, *t, height);
+ if (dims >= 3) {
+ *r = lp_build_mul(&bld->coord_bld, *r, depth);
+ }
+ }
+}
+
/** Helper used by lp_build_cube_lookup() */
static LLVMValueRef
@@ -547,25 +924,16 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
{
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
+ LLVMValueRef face_s_var;
+ LLVMValueRef face_t_var;
+ LLVMValueRef face_var;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- *face_s = bld->coord_bld.undef;
- *face_t = bld->coord_bld.undef;
- *face = bld->int_bld.undef;
-
- lp_build_name(*face_s, "face_s");
- lp_build_name(*face_t, "face_t");
- lp_build_name(*face, "face");
+ face_s_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_s_var");
+ face_t_var = lp_build_alloca(bld->builder, bld->coord_bld.vec_type, "face_t_var");
+ face_var = lp_build_alloca(bld->builder, bld->int_bld.vec_type, "face_var");
- lp_build_flow_scope_declare(flow_ctx, face_s);
- lp_build_flow_scope_declare(flow_ctx, face_t);
- lp_build_flow_scope_declare(flow_ctx, face);
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
+ lp_build_if(&if_ctx, bld->builder, arx_ge_ary_arz);
{
/* +/- X face */
LLVMValueRef sign = lp_build_sgn(float_bld, rx);
@@ -575,57 +943,52 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld,
*face = lp_build_cube_face(bld, rx,
PIPE_TEX_FACE_POS_X,
PIPE_TEX_FACE_NEG_X);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx);
{
- struct lp_build_flow_context *flow_ctx2;
struct lp_build_if_state if_ctx2;
- LLVMValueRef face_s2 = bld->coord_bld.undef;
- LLVMValueRef face_t2 = bld->coord_bld.undef;
- LLVMValueRef face2 = bld->int_bld.undef;
-
- flow_ctx2 = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx2);
- lp_build_flow_scope_declare(flow_ctx2, &face_s2);
- lp_build_flow_scope_declare(flow_ctx2, &face_t2);
- lp_build_flow_scope_declare(flow_ctx2, &face2);
-
ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
- lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
+ lp_build_if(&if_ctx2, bld->builder, ary_ge_arx_arz);
{
/* +/- Y face */
LLVMValueRef sign = lp_build_sgn(float_bld, ry);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
- face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
- face2 = lp_build_cube_face(bld, ry,
+ *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
+ *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
+ *face = lp_build_cube_face(bld, ry,
PIPE_TEX_FACE_POS_Y,
PIPE_TEX_FACE_NEG_Y);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_else(&if_ctx2);
{
/* +/- Z face */
LLVMValueRef sign = lp_build_sgn(float_bld, rz);
LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
- face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
- face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
- face2 = lp_build_cube_face(bld, rz,
+ *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
+ *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
+ *face = lp_build_cube_face(bld, rz,
PIPE_TEX_FACE_POS_Z,
PIPE_TEX_FACE_NEG_Z);
+ LLVMBuildStore(bld->builder, *face_s, face_s_var);
+ LLVMBuildStore(bld->builder, *face_t, face_t_var);
+ LLVMBuildStore(bld->builder, *face, face_var);
}
lp_build_endif(&if_ctx2);
- lp_build_flow_scope_end(flow_ctx2);
- lp_build_flow_destroy(flow_ctx2);
- *face_s = face_s2;
- *face_t = face_t2;
- *face = face2;
}
lp_build_endif(&if_ctx);
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+
+ *face_s = LLVMBuildLoad(bld->builder, face_s_var, "face_s");
+ *face_t = LLVMBuildLoad(bld->builder, face_t_var, "face_t");
+ *face = LLVMBuildLoad(bld->builder, face_var, "face");
}
}
@@ -659,11 +1022,21 @@ lp_build_sample_partial_offset(struct lp_build_context *bld,
* Pixel blocks have power of two dimensions. LLVM should convert the
* rem/div to bit arithmetic.
* TODO: Verify this.
+ * It does indeed BUT it does transform it to scalar (and back) when doing so
+ * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
+ * The generated code looks seriously unfunny and is quite expensive.
*/
-
+#if 0
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+#else
+ unsigned logbase2 = util_unsigned_logbase2(block_length);
+ LLVMValueRef block_shift = lp_build_const_int_vec(bld->type, logbase2);
+ LLVMValueRef block_mask = lp_build_const_int_vec(bld->type, block_length - 1);
+ subcoord = LLVMBuildAnd(bld->builder, coord, block_mask, "");
+ coord = LLVMBuildLShr(bld->builder, coord, block_shift, "");
+#endif
}
offset = lp_build_mul(bld, coord, stride);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 8b042d5242..ffed27cee8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -82,12 +82,10 @@ struct lp_sampler_static_state
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned normalized_coords:1;
- float lod_bias, min_lod, max_lod;
- float border_color[4];
-
- /* Aero hacks */
- unsigned force_nearest_s:1;
- unsigned force_nearest_t:1;
+ unsigned min_max_lod_equal:1; /**< min_lod == max_lod ? */
+ unsigned lod_bias_non_zero:1;
+ unsigned apply_min_lod:1; /**< min_lod > 0 ? */
+ unsigned apply_max_lod:1; /**< max_lod < last_level ? */
};
@@ -104,45 +102,67 @@ struct lp_sampler_static_state
struct lp_sampler_dynamic_state
{
- /** Obtain the base texture width. */
+ /** Obtain the base texture width (returns int32) */
LLVMValueRef
(*width)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture height. */
+ /** Obtain the base texture height (returns int32) */
LLVMValueRef
(*height)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the base texture depth. */
+ /** Obtain the base texture depth (returns int32) */
LLVMValueRef
(*depth)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
- /** Obtain the number of mipmap levels (minus one). */
+ /** Obtain the number of mipmap levels minus one (returns int32) */
LLVMValueRef
(*last_level)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image rows/blocks (returns int32) */
LLVMValueRef
(*row_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain stride in bytes between image slices (returns int32) */
LLVMValueRef
(*img_stride)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain pointer to array of pointers to mimpap levels */
LLVMValueRef
(*data_ptr)( const struct lp_sampler_dynamic_state *state,
LLVMBuilderRef builder,
unsigned unit);
+ /** Obtain texture min lod (returns float) */
+ LLVMValueRef
+ (*min_lod)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture max lod (returns float) */
+ LLVMValueRef
+ (*max_lod)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture lod bias (returns float) */
+ LLVMValueRef
+ (*lod_bias)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
+
+ /** Obtain texture border color (returns ptr to float[4]) */
+ LLVMValueRef
+ (*border_color)(const struct lp_sampler_dynamic_state *state,
+ LLVMBuilderRef builder, unsigned unit);
};
@@ -159,10 +179,16 @@ struct lp_build_sample_context
const struct util_format_description *format_desc;
+ /* See texture_dims() */
+ unsigned dims;
+
/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
+ /** float vector type */
+ struct lp_build_context float_vec_bld;
+
/** regular scalar float type */
struct lp_type int_type;
struct lp_build_context int_bld;
@@ -171,17 +197,32 @@ struct lp_build_sample_context
struct lp_type coord_type;
struct lp_build_context coord_bld;
- /** Unsigned integer coordinates */
- struct lp_type uint_coord_type;
- struct lp_build_context uint_coord_bld;
-
/** Signed integer coordinates */
struct lp_type int_coord_type;
struct lp_build_context int_coord_bld;
+ /** Unsigned integer texture size */
+ struct lp_type int_size_type;
+ struct lp_build_context int_size_bld;
+
+ /** Unsigned integer texture size */
+ struct lp_type float_size_type;
+ struct lp_build_context float_size_bld;
+
/** Output texels type and build context */
struct lp_type texel_type;
struct lp_build_context texel_bld;
+
+ /* Common dynamic state values */
+ LLVMValueRef width;
+ LLVMValueRef height;
+ LLVMValueRef depth;
+ LLVMValueRef row_stride_array;
+ LLVMValueRef img_stride_array;
+ LLVMValueRef data_array;
+
+ /** Integer vector with texture width, height, depth */
+ LLVMValueRef int_size;
};
@@ -218,7 +259,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
}
-static INLINE int
+static INLINE unsigned
texture_dims(enum pipe_texture_target tex)
{
switch (tex) {
@@ -237,6 +278,11 @@ texture_dims(enum pipe_texture_target tex)
}
+boolean
+lp_sampler_wrap_mode_uses_border_color(unsigned mode,
+ unsigned min_img_filter,
+ unsigned mag_img_filter);
+
/**
* Derive the sampler static state.
*/
@@ -246,15 +292,16 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
-LLVMValueRef
+void
lp_build_lod_selector(struct lp_build_sample_context *bld,
+ unsigned unit,
const LLVMValueRef ddx[4],
const LLVMValueRef ddy[4],
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth);
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart);
void
lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
@@ -265,40 +312,44 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
void
lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
unsigned unit,
- LLVMValueRef lod,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
LLVMValueRef *level0_out,
- LLVMValueRef *level1_out,
- LLVMValueRef *weight_out);
+ LLVMValueRef *level1_out);
LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, LLVMValueRef level);
+ LLVMValueRef level);
LLVMValueRef
lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
- LLVMValueRef data_array, int level);
+ int level);
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
- unsigned dims,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef ilevel0,
- LLVMValueRef ilevel1,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef *width0_vec,
- LLVMValueRef *width1_vec,
- LLVMValueRef *height0_vec,
- LLVMValueRef *height1_vec,
- LLVMValueRef *depth0_vec,
- LLVMValueRef *depth1_vec,
- LLVMValueRef *row_stride0_vec,
- LLVMValueRef *row_stride1_vec,
- LLVMValueRef *img_stride0_vec,
- LLVMValueRef *img_stride1_vec);
+ LLVMValueRef ilevel,
+ LLVMValueRef *out_size_vec,
+ LLVMValueRef *row_stride_vec,
+ LLVMValueRef *img_stride_vec);
+
+
+void
+lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
+ struct lp_type size_type,
+ struct lp_type coord_type,
+ LLVMValueRef size,
+ LLVMValueRef *out_width,
+ LLVMValueRef *out_height,
+ LLVMValueRef *out_depth);
+
+
+void
+lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
+ LLVMValueRef flt_size,
+ LLVMValueRef *s,
+ LLVMValueRef *t,
+ LLVMValueRef *r);
void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index 7e064900e7..d3e3b242af 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -45,6 +45,7 @@
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
+#include "lp_bld_bitarit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
@@ -80,20 +81,21 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
LLVMValueRef *out_offset,
LLVMValueRef *out_i)
{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
- length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
if(is_pot)
coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ else {
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
+ coord = LLVMBuildAdd(bld->builder, coord, bias, "");
coord = LLVMBuildURem(bld->builder, coord, length, "");
+ }
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
@@ -111,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
assert(0);
}
- lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
out_offset, out_i);
}
@@ -144,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
LLVMValueRef *i0,
LLVMValueRef *i1)
{
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
LLVMValueRef lmask, umask, mask;
@@ -186,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
* multiplication.
*/
- *i0 = uint_coord_bld->zero;
- *i1 = uint_coord_bld->zero;
+ *i0 = int_coord_bld->zero;
+ *i1 = int_coord_bld->zero;
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
@@ -197,17 +198,18 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
}
else {
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
+ coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
}
mask = lp_build_compare(bld->builder, int_coord_bld->type,
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
*offset1 = LLVMBuildAnd(bld->builder,
- lp_build_add(uint_coord_bld, *offset0, stride),
+ lp_build_add(int_coord_bld, *offset0, stride),
mask, "");
break;
@@ -222,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
- *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
- *offset1 = lp_build_add(uint_coord_bld,
+ *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(int_coord_bld,
*offset0,
LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
@@ -236,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
- *offset0 = uint_coord_bld->zero;
- *offset1 = uint_coord_bld->zero;
+ *offset0 = int_coord_bld->zero;
+ *offset1 = int_coord_bld->zero;
break;
}
}
@@ -250,9 +252,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -262,11 +262,12 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8;
+ LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, t_ipart, r_ipart;
LLVMValueRef x_stride;
LLVMValueRef x_offset, offset;
@@ -280,30 +281,33 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ int_size,
+ &width_vec,
+ &height_vec,
+ &depth_vec);
+
if (bld->static_state->normalized_coords) {
- /* s = s * width, t = t * height */
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
- coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- if (dims >= 2) {
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
- coord_vec_type, "");
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- if (dims >= 3) {
- LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
- coord_vec_type, "");
- r = lp_build_mul(&bld->coord_bld, r, fp_depth);
- }
- }
- }
+ LLVMValueRef scaled_size;
+ LLVMValueRef flt_size;
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- if (dims >= 2)
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
- if (dims >= 3)
- r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ /* scale size by 256 (8 fractional bits) */
+ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
+
+ lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
+ }
+ else {
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ }
/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -321,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
/* get pixel, row, image strides */
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);
/* Do texcoord wrapping, compute texel offset */
@@ -340,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y_offset, &y_subcoord);
- offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
+ offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
if (dims >= 3) {
LLVMValueRef z_offset;
lp_build_sample_wrap_nearest_int(bld,
@@ -349,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
bld->static_state->pot_height,
bld->static_state->wrap_r,
&z_offset, &z_subcoord);
- offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
/* The r coord is the cube face in [0,5] */
- z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
- offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+ z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
+ offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
}
}
@@ -414,9 +418,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ LLVMValueRef int_size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -426,11 +428,12 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef *colors_lo,
LLVMValueRef *colors_hi)
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef width_vec, height_vec, depth_vec;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
@@ -455,30 +458,33 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
h16_vec_type = lp_build_vec_type(h16.type);
u8n_vec_type = lp_build_vec_type(u8n.type);
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ int_size,
+ &width_vec,
+ &height_vec,
+ &depth_vec);
+
if (bld->static_state->normalized_coords) {
- /* s = s * width, t = t * height */
- LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
- LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
- coord_vec_type, "");
- s = lp_build_mul(&bld->coord_bld, s, fp_width);
- if (dims >= 2) {
- LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
- coord_vec_type, "");
- t = lp_build_mul(&bld->coord_bld, t, fp_height);
- }
- if (dims >= 3) {
- LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
- coord_vec_type, "");
- r = lp_build_mul(&bld->coord_bld, r, fp_depth);
- }
- }
+ LLVMValueRef scaled_size;
+ LLVMValueRef flt_size;
- /* scale coords by 256 (8 fractional bits) */
- s = lp_build_mul_imm(&bld->coord_bld, s, 256);
- if (dims >= 2)
- t = lp_build_mul_imm(&bld->coord_bld, t, 256);
- if (dims >= 3)
- r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ /* scale size by 256 (8 fractional bits) */
+ scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
+
+ lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
+ }
+ else {
+ /* scale coords by 256 (8 fractional bits) */
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ if (dims >= 2)
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+ if (dims >= 3)
+ r = lp_build_mul_imm(&bld->coord_bld, r, 256);
+ }
/* convert float to int */
s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
@@ -489,10 +495,8 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/* subtract 0.5 (add -128) */
i32_c128 = lp_build_const_int_vec(i32.type, -128);
- if (!bld->static_state->force_nearest_s) {
- s = LLVMBuildAdd(builder, s, i32_c128, "");
- }
- if (dims >= 2 && !bld->static_state->force_nearest_t) {
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ if (dims >= 2) {
t = LLVMBuildAdd(builder, t, i32_c128, "");
}
if (dims >= 3) {
@@ -516,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
/* get pixel, row and image strides */
- x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ x_stride = lp_build_const_vec(bld->int_coord_bld.type,
bld->format_desc->block.bits/8);
y_stride = row_stride_vec;
z_stride = img_stride_vec;
@@ -547,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
for (z = 0; z < 2; z++) {
for (x = 0; x < 2; x++) {
- offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
offset[z][0][x], y_offset0);
- offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
offset[z][1][x], y_offset1);
}
}
@@ -565,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
&z_subcoord[0], &z_subcoord[1]);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
- offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset0);
- offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
offset[1][y][x], z_offset1);
}
}
}
else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
LLVMValueRef z_offset;
- z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+ z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
for (y = 0; y < 2; y++) {
for (x = 0; x < 2; x++) {
/* The r coord is the cube face in [0,5] */
- offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+ offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
offset[0][y][x], z_offset);
}
}
@@ -709,82 +713,56 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
/*
* Linear interpolation with 8.8 fixed point.
*/
- if (bld->static_state->force_nearest_s) {
- /* special case 1-D lerp */
- packed_lo = lp_build_lerp(&h16,
- t_fpart_lo,
- neighbors_lo[0][0][0],
- neighbors_lo[0][0][1]);
-
- packed_hi = lp_build_lerp(&h16,
- t_fpart_hi,
- neighbors_hi[0][1][0],
- neighbors_hi[0][1][0]);
- }
- else if (bld->static_state->force_nearest_t) {
- /* special case 1-D lerp */
+ if (dims == 1) {
+ /* 1-D lerp */
packed_lo = lp_build_lerp(&h16,
- s_fpart_lo,
- neighbors_lo[0][0][0],
- neighbors_lo[0][0][1]);
+ s_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1]);
packed_hi = lp_build_lerp(&h16,
- s_fpart_hi,
- neighbors_hi[0][0][0],
- neighbors_hi[0][0][1]);
+ s_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1]);
}
else {
- /* general 1/2/3-D lerping */
- if (dims == 1) {
- packed_lo = lp_build_lerp(&h16,
- s_fpart_lo,
- neighbors_lo[0][0][0],
- neighbors_lo[0][0][1]);
-
- packed_hi = lp_build_lerp(&h16,
- s_fpart_hi,
- neighbors_hi[0][0][0],
- neighbors_hi[0][0][1]);
- }
- else {
- /* 2-D lerp */
- packed_lo = lp_build_lerp_2d(&h16,
- s_fpart_lo, t_fpart_lo,
- neighbors_lo[0][0][0],
- neighbors_lo[0][0][1],
- neighbors_lo[0][1][0],
- neighbors_lo[0][1][1]);
-
- packed_hi = lp_build_lerp_2d(&h16,
- s_fpart_hi, t_fpart_hi,
- neighbors_hi[0][0][0],
- neighbors_hi[0][0][1],
- neighbors_hi[0][1][0],
- neighbors_hi[0][1][1]);
-
- if (dims >= 3) {
- LLVMValueRef packed_lo2, packed_hi2;
-
- /* lerp in the second z slice */
- packed_lo2 = lp_build_lerp_2d(&h16,
- s_fpart_lo, t_fpart_lo,
- neighbors_lo[1][0][0],
- neighbors_lo[1][0][1],
- neighbors_lo[1][1][0],
- neighbors_lo[1][1][1]);
-
- packed_hi2 = lp_build_lerp_2d(&h16,
- s_fpart_hi, t_fpart_hi,
- neighbors_hi[1][0][0],
- neighbors_hi[1][0][1],
- neighbors_hi[1][1][0],
- neighbors_hi[1][1][1]);
- /* interp between two z slices */
- packed_lo = lp_build_lerp(&h16, r_fpart_lo,
- packed_lo, packed_lo2);
- packed_hi = lp_build_lerp(&h16, r_fpart_hi,
- packed_hi, packed_hi2);
- }
+ /* 2-D lerp */
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0][0],
+ neighbors_lo[0][0][1],
+ neighbors_lo[0][1][0],
+ neighbors_lo[0][1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0][0],
+ neighbors_hi[0][0][1],
+ neighbors_hi[0][1][0],
+ neighbors_hi[0][1][1]);
+
+ if (dims >= 3) {
+ LLVMValueRef packed_lo2, packed_hi2;
+
+ /* lerp in the second z slice */
+ packed_lo2 = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[1][0][0],
+ neighbors_lo[1][0][1],
+ neighbors_lo[1][1][0],
+ neighbors_lo[1][1][1]);
+
+ packed_hi2 = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[1][0][0],
+ neighbors_hi[1][0][1],
+ neighbors_hi[1][1][0],
+ neighbors_hi[1][1][1]);
+ /* interp between two z slices */
+ packed_lo = lp_build_lerp(&h16, r_fpart_lo,
+ packed_lo, packed_lo2);
+ packed_hi = lp_build_lerp(&h16, r_fpart_hi,
+ packed_hi, packed_hi2);
}
}
@@ -806,76 +784,124 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
- LLVMValueRef width0_vec,
- LLVMValueRef width1_vec,
- LLVMValueRef height0_vec,
- LLVMValueRef height1_vec,
- LLVMValueRef depth0_vec,
- LLVMValueRef depth1_vec,
- LLVMValueRef row_stride0_vec,
- LLVMValueRef row_stride1_vec,
- LLVMValueRef img_stride0_vec,
- LLVMValueRef img_stride1_vec,
- LLVMValueRef data_ptr0,
- LLVMValueRef data_ptr1,
- LLVMValueRef *colors_lo,
- LLVMValueRef *colors_hi)
+ LLVMValueRef colors_lo_var,
+ LLVMValueRef colors_hi_var)
{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef size0;
+ LLVMValueRef size1;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
LLVMValueRef colors0_lo, colors0_hi;
LLVMValueRef colors1_lo, colors1_hi;
+
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &size0,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
- /* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_nearest(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
- &colors1_lo, &colors1_hi);
- }
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-
- /* sample the first mipmap level */
lp_build_sample_image_linear(bld,
- width0_vec, height0_vec, depth0_vec,
+ size0,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r,
&colors0_lo, &colors0_hi);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_linear(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r,
- &colors1_lo, &colors1_hi);
- }
}
+ /* Store the first level's colors in the output variables */
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* interpolate samples from the two mipmap levels */
- struct lp_build_context h16;
- lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
-
- *colors_lo = lp_build_lerp(&h16, lod_fpart,
- colors0_lo, colors1_lo);
- *colors_hi = lp_build_lerp(&h16, lod_fpart,
- colors0_hi, colors1_hi);
- }
- else {
- /* use first/only level's colors */
- *colors_lo = colors0_lo;
- *colors_hi = colors0_hi;
+ LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
+ LLVMTypeRef i32_type = LLVMIntType(32);
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
+ lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
+ lod_fpart, LLVMConstNull(i32_type),
+ "need_lerp");
+
+ lp_build_if(&if_ctx, builder, need_lerp);
+ {
+ struct lp_build_context h16_bld;
+
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
+
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &size1,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+ else {
+ lp_build_sample_image_linear(bld,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ &colors1_lo, &colors1_hi);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
+ lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
+
+#if HAVE_LLVM == 0x208
+ /* This is a work-around for a bug in LLVM 2.8.
+ * Evidently, something goes wrong in the construction of the
+ * lod_fpart short[8] vector. Adding this no-effect shuffle seems
+ * to force the vector to be properly constructed.
+ * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
+ */
+ {
+ LLVMValueRef shuffles[8], shuffle;
+ int i;
+ assert(h16_bld.type.length <= Elements(shuffles));
+ for (i = 0; i < h16_bld.type.length; i++)
+ shuffles[i] = lp_build_const_int32(2 * (i & 1));
+ shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
+ lod_fpart = LLVMBuildShuffleVector(builder,
+ lod_fpart, lod_fpart,
+ shuffle, "");
+ }
+#endif
+
+ colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_lo, colors1_lo);
+ colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
+ colors0_hi, colors1_hi);
+
+ LLVMBuildStore(builder, colors0_lo, colors_lo_var);
+ LLVMBuildStore(builder, colors0_hi, colors_hi_var);
+ }
+ lp_build_endif(&if_ctx);
}
}
@@ -896,35 +922,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef texel_out[4])
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ const unsigned dims = bld->dims;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
- LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
- LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
- LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
- LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
- LLVMValueRef data_ptr0, data_ptr1 = NULL;
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
LLVMValueRef face_ddx[4], face_ddy[4];
- struct lp_build_context h16;
- LLVMTypeRef h16_vec_type;
+ struct lp_build_context h16_bld;
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
/* we only support the common/simple wrap modes at this time */
assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
@@ -935,9 +948,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* make 16-bit fixed-pt builder context */
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- h16_vec_type = lp_build_vec_type(h16.type);
-
+ lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
/* cube face selection, compute pre-face coords, etc. */
if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
@@ -949,19 +960,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
/* recompute ddx, ddy using the new (s,t) face texcoords */
- face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
- face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
face_ddx[2] = NULL;
face_ddx[3] = NULL;
- face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
- face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
face_ddy[2] = NULL;
face_ddy[3] = NULL;
ddx = face_ddx;
ddy = face_ddy;
}
-
/*
* Compute the level of detail (float).
*/
@@ -970,15 +980,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
}
/*
* Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
- * If mipfilter=linear, also compute the weight between the two
- * mipmap levels: lod_fpart
*/
switch (mip_filter) {
default:
@@ -991,135 +1002,81 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ ilevel0 = i32t_zero;
}
break;
case PIPE_TEX_MIPFILTER_NEAREST:
- assert(lod);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
break;
case PIPE_TEX_MIPFILTER_LINEAR:
- {
- LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
- LLVMValueRef i255 = lp_build_const_int32(255);
- LLVMTypeRef i16_type = LLVMIntType(16);
-
- assert(lod);
-
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
- lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
- lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
- lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
- lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
- lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
-
- /* the lod_fpart values will be fixed pt values in [0,1) */
- }
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
break;
}
- /* compute image size(s) of source mipmap level(s) */
- lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
- ilevel0, ilevel1,
- row_stride_array, img_stride_array,
- &width0_vec, &width1_vec,
- &height0_vec, &height1_vec,
- &depth0_vec, &depth1_vec,
- &row_stride0_vec, &row_stride1_vec,
- &img_stride0_vec, &img_stride1_vec);
-
/*
- * Get pointer(s) to image data for mipmap level(s).
+ * Get/interpolate texture colors.
*/
- data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
- }
+ packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
+ packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
- /*
- * Get/interpolate texture colors.
- */
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
}
else {
/* Emit conditional to choose min image filter or mag image filter
* depending on the lod being > 0 or <= 0, respectively.
*/
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
- flow_ctx = lp_build_flow_create(builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- packed_lo = LLVMGetUndef(h16_vec_type);
- packed_hi = LLVMGetUndef(h16_vec_type);
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
- lp_build_flow_scope_declare(flow_ctx, &packed_lo);
- lp_build_flow_scope_declare(flow_ctx, &packed_hi);
-
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(builder, LLVMRealUGE,
- lod, float_bld->zero, "");
-
- lp_build_if(&if_ctx, flow_ctx, builder, minify);
+ lp_build_if(&if_ctx, builder, minify);
{
/* Use the minification filter */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ packed_lo, packed_hi);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
- lp_build_sample_mipmap(bld, mag_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- &packed_lo, &packed_hi);
+ lp_build_sample_mipmap(bld,
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ packed_lo, packed_hi);
}
lp_build_endif(&if_ctx);
-
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
}
- /* combine 'packed_lo', 'packed_hi' into 'packed' */
- {
- struct lp_build_context h16, u8n;
-
- lp_build_context_init(&h16, builder, lp_type_ufixed(16));
- lp_build_context_init(&u8n, builder, lp_type_unorm(8));
-
- packed = lp_build_pack2(builder, h16.type, u8n.type,
- packed_lo, packed_hi);
- }
+ /*
+ * combine the values stored in 'packed_lo' and 'packed_hi' variables
+ * into 'packed'
+ */
+ packed = lp_build_pack2(builder,
+ h16_bld.type, lp_type_unorm(8),
+ LLVMBuildLoad(builder, packed_lo, ""),
+ LLVMBuildLoad(builder, packed_hi, ""));
/*
* Convert to SoA and swizzle.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
index e1045bbbc2..5d9ecac4d5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h
@@ -50,15 +50,6 @@ lp_build_sample_aos(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef texel_out[4]);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 2d80db6dc9..53cc0c5f34 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -47,41 +47,18 @@
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_logic.h"
+#include "lp_bld_printf.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
#include "lp_bld_sample_aos.h"
+#include "lp_bld_struct.h"
#include "lp_bld_quad.h"
/**
- * Does the given texture wrap mode allow sampling the texture border color?
- * XXX maybe move this into gallium util code.
- */
-static boolean
-wrap_mode_uses_border_color(unsigned mode)
-{
- switch (mode) {
- case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- return FALSE;
- case PIPE_TEX_WRAP_CLAMP:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- return TRUE;
- default:
- assert(0 && "unexpected wrap mode");
- return FALSE;
- }
-}
-
-
-/**
* Generate code to fetch a texel from a texture at int coords (x, y, z).
* The computation depends on whether the texture is 1D, 2D or 3D.
* The result, texel, will be float vectors:
@@ -92,6 +69,7 @@ wrap_mode_uses_border_color(unsigned mode)
*/
static void
lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ unsigned unit,
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
@@ -103,21 +81,27 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
LLVMValueRef data_ptr,
LLVMValueRef texel_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const struct lp_sampler_static_state *static_state = bld->static_state;
+ const unsigned dims = bld->dims;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef offset;
LLVMValueRef i, j;
LLVMValueRef use_border = NULL;
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
- if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
+ if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
}
- if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
+ if (dims >= 2 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
@@ -130,7 +114,10 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
}
- if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
+ if (dims == 3 &&
+ lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
+ static_state->min_img_filter,
+ static_state->mag_img_filter)) {
LLVMValueRef b1, b2;
b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
@@ -144,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
/* convert x,y,z coords to linear offset from start of texture, in bytes */
- lp_build_sample_offset(&bld->uint_coord_bld,
+ lp_build_sample_offset(&bld->int_coord_bld,
bld->format_desc,
x, y, z, y_stride, z_stride,
&offset, &i, &j);
@@ -158,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
* coords which are out of bounds to become zero. Zero's guaranteed
* to be inside the texture image.
*/
- offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border);
+ offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
}
lp_build_fetch_rgba_soa(bld->builder,
@@ -168,8 +155,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
i, j,
texel_out);
- apply_sampler_swizzle(bld, texel_out);
-
/*
* Note: if we find an app which frequently samples the texture border
* we might want to implement a true conditional here to avoid sampling
@@ -187,15 +172,22 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
if (use_border) {
/* select texel color or border color depending on use_border */
+ LLVMValueRef border_color_ptr =
+ bld->dynamic_state->border_color(bld->dynamic_state,
+ bld->builder, unit);
int chan;
for (chan = 0; chan < 4; chan++) {
LLVMValueRef border_chan =
- lp_build_const_vec(bld->texel_type,
- bld->static_state->border_color[chan]);
+ lp_build_array_get(bld->builder, border_color_ptr,
+ lp_build_const_int32(chan));
+ LLVMValueRef border_chan_vec =
+ lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan);
texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
- border_chan, texel_out[chan]);
+ border_chan_vec, texel_out[chan]);
}
}
+
+ apply_sampler_swizzle(bld, texel_out);
}
@@ -210,11 +202,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef fract, flr, isOdd;
- /* fract = coord - floor(coord) */
- fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
-
- /* flr = ifloor(coord); */
- flr = lp_build_ifloor(coord_bld, coord);
+ lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
/* isOdd = flr & 1 */
isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
@@ -242,6 +230,7 @@ static void
lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
+ LLVMValueRef length_f,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *x0_out,
@@ -250,10 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
{
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
- LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
- LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
LLVMValueRef coord0, coord1, weight;
switch(wrap_mode) {
@@ -261,21 +248,25 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
/* mul by size and subtract 0.5 */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
/* repeat wrap */
if (is_pot) {
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
}
else {
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
+ LLVMValueRef mask;
+ coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
- coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+ coord1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
+ mask, "");
}
break;
@@ -290,53 +281,47 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- if (bld->static_state->normalized_coords) {
- /* clamp to [0,1] */
- coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
- /* mul by tex size and subtract 0.5 */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ {
+ struct lp_build_context abs_coord_bld = bld->coord_bld;
+ abs_coord_bld.type.sign = FALSE;
+
+ if (bld->static_state->normalized_coords) {
+ /* mul by tex size */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+ /* clamp to length max */
+ coord = lp_build_min(coord_bld, coord, length_f);
+ /* subtract 0.5 */
coord = lp_build_sub(coord_bld, coord, half);
+ /* clamp to [0, length - 0.5] */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ break;
}
- else {
- LLVMValueRef min, max;
- /* clamp to [0.5, length - 0.5] */
- min = half;
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- }
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* coord0 = floor(coord); */
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
- /* coord0 = max(coord0, 0) */
- coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
- /* coord1 = min(coord1, length-1) */
- coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
- break;
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
{
- LLVMValueRef min, max;
+ LLVMValueRef min;
if (bld->static_state->normalized_coords) {
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
+ /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ min = lp_build_const_vec(coord_bld->type, -1.0F);
+ coord = lp_build_clamp(coord_bld, coord, min, length_f);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -349,11 +334,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
-
- /* convert to int coords */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
@@ -375,15 +357,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
LLVMValueRef min, max;
-
+ struct lp_build_context abs_coord_bld = bld->coord_bld;
+ abs_coord_bld.type.sign = FALSE;
coord = lp_build_abs(coord_bld, coord);
if (bld->static_state->normalized_coords) {
@@ -398,16 +381,14 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
- LLVMValueRef min, max;
-
coord = lp_build_abs(coord_bld, coord);
if (bld->static_state->normalized_coords) {
@@ -415,15 +396,13 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_negate(coord_bld, half);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
-
+ /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
+ /* skip -0.5 clamp (always positive), do sub first */
coord = lp_build_sub(coord_bld, coord, half);
+ coord = lp_build_min(coord_bld, coord, length_f);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
@@ -452,14 +431,13 @@ static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
+ LLVMValueRef length_f,
boolean is_pot,
unsigned wrap_mode)
{
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
- LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+ LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
LLVMValueRef icoord;
switch(wrap_mode) {
@@ -468,10 +446,12 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
icoord = lp_build_ifloor(coord_bld, coord);
if (is_pot)
icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
- else
- /* Signed remainder won't give the right results for negative
- * dividends but unsigned remainder does.*/
+ else {
+ /* Add a bias to the texcoord to handle negative coords */
+ LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
+ icoord = LLVMBuildAdd(bld->builder, icoord, bias, "");
icoord = LLVMBuildURem(bld->builder, icoord, length, "");
+ }
break;
case PIPE_TEX_WRAP_CLAMP:
@@ -482,7 +462,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
}
/* floor */
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* use itrunc instead since we clamp to 0 anyway */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
@@ -516,7 +497,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
assert(bld->static_state->normalized_coords);
coord = lp_build_mul(coord_bld, coord, length_f);
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1] */
icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
@@ -531,7 +513,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length - 1] */
icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
@@ -545,7 +528,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
coord = lp_build_mul(coord_bld, coord, length_f);
}
- icoord = lp_build_ifloor(coord_bld, coord);
+ /* itrunc == ifloor here */
+ icoord = lp_build_itrunc(coord_bld, coord);
/* clamp to [0, length] */
icoord = lp_build_min(int_coord_bld, icoord, length);
@@ -566,9 +550,8 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ unsigned unit,
+ LLVMValueRef size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -577,26 +560,47 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
+ LLVMValueRef width_vec;
+ LLVMValueRef height_vec;
+ LLVMValueRef depth_vec;
+ LLVMValueRef flt_size;
+ LLVMValueRef flt_width_vec;
+ LLVMValueRef flt_height_vec;
+ LLVMValueRef flt_depth_vec;
LLVMValueRef x, y, z;
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size,
+ &width_vec, &height_vec, &depth_vec);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &flt_width_vec, &flt_height_vec, &flt_depth_vec);
+
/*
* Compute integer texcoords.
*/
- x = lp_build_sample_wrap_nearest(bld, s, width_vec,
+ x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec,
bld->static_state->pot_width,
bld->static_state->wrap_s);
lp_build_name(x, "tex.x.wrapped");
if (dims >= 2) {
- y = lp_build_sample_wrap_nearest(bld, t, height_vec,
+ y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec,
bld->static_state->pot_height,
bld->static_state->wrap_t);
lp_build_name(y, "tex.y.wrapped");
if (dims == 3) {
- z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
- bld->static_state->pot_height,
+ z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec,
+ bld->static_state->pot_depth,
bld->static_state->wrap_r);
lp_build_name(z, "tex.z.wrapped");
}
@@ -614,7 +618,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
/*
* Get texture colors.
*/
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x, y, z,
row_stride_vec, img_stride_vec,
data_ptr, colors_out);
@@ -627,9 +632,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
+ unsigned unit,
+ LLVMValueRef size,
LLVMValueRef row_stride_vec,
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
@@ -638,16 +642,37 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef r,
LLVMValueRef colors_out[4])
{
- const int dims = texture_dims(bld->static_state->target);
+ const unsigned dims = bld->dims;
+ LLVMValueRef width_vec;
+ LLVMValueRef height_vec;
+ LLVMValueRef depth_vec;
+ LLVMValueRef flt_size;
+ LLVMValueRef flt_width_vec;
+ LLVMValueRef flt_height_vec;
+ LLVMValueRef flt_depth_vec;
LLVMValueRef x0, y0, z0, x1, y1, z1;
LLVMValueRef s_fpart, t_fpart, r_fpart;
LLVMValueRef neighbors[2][2][4];
int chan;
+ lp_build_extract_image_sizes(bld,
+ bld->int_size_type,
+ bld->int_coord_type,
+ size,
+ &width_vec, &height_vec, &depth_vec);
+
+ flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
+
+ lp_build_extract_image_sizes(bld,
+ bld->float_size_type,
+ bld->coord_type,
+ flt_size,
+ &flt_width_vec, &flt_height_vec, &flt_depth_vec);
+
/*
* Compute integer texcoords.
*/
- lp_build_sample_wrap_linear(bld, s, width_vec,
+ lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec,
bld->static_state->pot_width,
bld->static_state->wrap_s,
&x0, &x1, &s_fpart);
@@ -655,7 +680,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
lp_build_name(x1, "tex.x1.wrapped");
if (dims >= 2) {
- lp_build_sample_wrap_linear(bld, t, height_vec,
+ lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y0, &y1, &t_fpart);
@@ -663,7 +688,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
lp_build_name(y1, "tex.y1.wrapped");
if (dims == 3) {
- lp_build_sample_wrap_linear(bld, r, depth_vec,
+ lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec,
bld->static_state->pot_depth,
bld->static_state->wrap_r,
&z0, &z1, &r_fpart);
@@ -688,11 +713,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
* Get texture colors.
*/
/* get x0/x1 texels */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y0, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[0][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y0, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[0][1]);
@@ -710,11 +737,13 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef colors0[4];
/* get x0/x1 texels at y1 */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y1, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[1][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y1, z0,
row_stride_vec, img_stride_vec,
data_ptr, neighbors[1][1]);
@@ -734,19 +763,23 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
LLVMValueRef colors1[4];
/* get x0/x1/y0/y1 texels at z1 */
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y0, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[0][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y0, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[0][1]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x0, y1, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[1][0]);
- lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
+ lp_build_sample_texel_soa(bld, unit,
+ width_vec, height_vec, depth_vec,
x1, y1, z1,
row_stride_vec, img_stride_vec,
data_ptr, neighbors1[1][1]);
@@ -786,74 +819,98 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
*/
static void
lp_build_sample_mipmap(struct lp_build_sample_context *bld,
+ unsigned unit,
unsigned img_filter,
unsigned mip_filter,
LLVMValueRef s,
LLVMValueRef t,
LLVMValueRef r,
+ LLVMValueRef ilevel0,
+ LLVMValueRef ilevel1,
LLVMValueRef lod_fpart,
- LLVMValueRef width0_vec,
- LLVMValueRef width1_vec,
- LLVMValueRef height0_vec,
- LLVMValueRef height1_vec,
- LLVMValueRef depth0_vec,
- LLVMValueRef depth1_vec,
- LLVMValueRef row_stride0_vec,
- LLVMValueRef row_stride1_vec,
- LLVMValueRef img_stride0_vec,
- LLVMValueRef img_stride1_vec,
- LLVMValueRef data_ptr0,
- LLVMValueRef data_ptr1,
LLVMValueRef *colors_out)
{
+ LLVMBuilderRef builder = bld->builder;
+ LLVMValueRef size0;
+ LLVMValueRef size1;
+ LLVMValueRef row_stride0_vec;
+ LLVMValueRef row_stride1_vec;
+ LLVMValueRef img_stride0_vec;
+ LLVMValueRef img_stride1_vec;
+ LLVMValueRef data_ptr0;
+ LLVMValueRef data_ptr1;
LLVMValueRef colors0[4], colors1[4];
- int chan;
+ unsigned chan;
+ /* sample the first mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel0,
+ &size0,
+ &row_stride0_vec, &img_stride0_vec);
+ data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
- /* sample the first mipmap level */
- lp_build_sample_image_nearest(bld,
- width0_vec, height0_vec, depth0_vec,
+ lp_build_sample_image_nearest(bld, unit,
+ size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
-
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_nearest(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ data_ptr0, s, t, r,
+ colors0);
}
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
-
- /* sample the first mipmap level */
- lp_build_sample_image_linear(bld,
- width0_vec, height0_vec, depth0_vec,
+ lp_build_sample_image_linear(bld, unit,
+ size0,
row_stride0_vec, img_stride0_vec,
- data_ptr0, s, t, r, colors0);
+ data_ptr0, s, t, r,
+ colors0);
+ }
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level */
- lp_build_sample_image_linear(bld,
- width1_vec, height1_vec, depth1_vec,
- row_stride1_vec, img_stride1_vec,
- data_ptr1, s, t, r, colors1);
- }
+ /* Store the first level's colors in the output variables */
+ for (chan = 0; chan < 4; chan++) {
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
}
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* interpolate samples from the two mipmap levels */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef need_lerp;
+
+ /* need_lerp = lod_fpart > 0 */
+ need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
+ lod_fpart,
+ bld->float_bld.zero,
+ "need_lerp");
+
+ lp_build_if(&if_ctx, builder, need_lerp);
+ {
+ /* sample the second mipmap level */
+ lp_build_mipmap_level_sizes(bld, ilevel1,
+ &size1,
+ &row_stride1_vec, &img_stride1_vec);
+ data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
+ if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ lp_build_sample_image_nearest(bld, unit,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+ else {
+ lp_build_sample_image_linear(bld, unit,
+ size1,
+ row_stride1_vec, img_stride1_vec,
+ data_ptr1, s, t, r,
+ colors1);
+ }
+
+ /* interpolate samples from the two mipmap levels */
+
+ lod_fpart = lp_build_broadcast_scalar(&bld->texel_bld, lod_fpart);
+
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
colors0[chan], colors1[chan]);
+ LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
+ }
}
- }
- else {
- /* use first/only level's colors */
- for (chan = 0; chan < 4; chan++) {
- colors_out[chan] = colors0[chan];
- }
+ lp_build_endif(&if_ctx);
}
}
@@ -874,30 +931,20 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
const LLVMValueRef *ddy,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
- LLVMValueRef width,
- LLVMValueRef height,
- LLVMValueRef depth,
- LLVMValueRef width_vec,
- LLVMValueRef height_vec,
- LLVMValueRef depth_vec,
- LLVMValueRef row_stride_array,
- LLVMValueRef img_stride_array,
- LLVMValueRef data_array,
LLVMValueRef *colors_out)
{
- struct lp_build_context *float_bld = &bld->float_bld;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMBuilderRef builder = bld->builder;
const unsigned mip_filter = bld->static_state->min_mip_filter;
const unsigned min_filter = bld->static_state->min_img_filter;
const unsigned mag_filter = bld->static_state->mag_img_filter;
- const int dims = texture_dims(bld->static_state->target);
- LLVMValueRef lod = NULL, lod_fpart = NULL;
+ LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
LLVMValueRef ilevel0, ilevel1 = NULL;
- LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
- LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
- LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
- LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
- LLVMValueRef data_ptr0, data_ptr1 = NULL;
LLVMValueRef face_ddx[4], face_ddy[4];
+ LLVMValueRef texels[4];
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
+ unsigned chan;
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
@@ -916,12 +963,12 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
/* recompute ddx, ddy using the new (s,t) face texcoords */
- face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
- face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
face_ddx[2] = NULL;
face_ddx[3] = NULL;
- face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
- face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
face_ddy[2] = NULL;
face_ddy[3] = NULL;
ddx = face_ddx;
@@ -936,127 +983,109 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, ddx, ddy,
- lod_bias, explicit_lod,
- width, height, depth);
+ lp_build_lod_selector(bld, unit, ddx, ddy,
+ lod_bias, explicit_lod,
+ mip_filter,
+ &lod_ipart, &lod_fpart);
+ } else {
+ lod_ipart = i32t_zero;
}
/*
- * Compute integer mipmap level(s) to fetch texels from.
+ * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
*/
- if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
+ switch (mip_filter) {
+ default:
+ assert(0 && "bad mip_filter value in lp_build_sample_soa()");
+ /* fall-through */
+ case PIPE_TEX_MIPFILTER_NONE:
/* always use mip level 0 */
if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
/* XXX this is a work-around for an apparent bug in LLVM 2.7.
* We should be able to set ilevel0 = const(0) but that causes
* bad x86 code to be emitted.
*/
- lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
- }
- else {
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- }
- }
- else {
- assert(lod);
- if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
- lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
}
else {
- assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
- lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
- &lod_fpart);
- lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
+ ilevel0 = i32t_zero;
}
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ assert(lod_ipart);
+ lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ assert(lod_ipart);
+ assert(lod_fpart);
+ lp_build_linear_mip_levels(bld, unit,
+ lod_ipart, &lod_fpart,
+ &ilevel0, &ilevel1);
+ break;
}
- /* compute image size(s) of source mipmap level(s) */
- lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
- ilevel0, ilevel1,
- row_stride_array, img_stride_array,
- &width0_vec, &width1_vec,
- &height0_vec, &height1_vec,
- &depth0_vec, &depth1_vec,
- &row_stride0_vec, &row_stride1_vec,
- &img_stride0_vec, &img_stride1_vec);
-
/*
- * Get pointer(s) to image data for mipmap level(s).
+ * Get/interpolate texture colors.
*/
- data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
- if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
+
+ for (chan = 0; chan < 4; ++chan) {
+ texels[chan] = lp_build_alloca(builder, bld->texel_bld.vec_type, "");
+ lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]);
}
- /*
- * Get/interpolate texture colors.
- */
if (min_filter == mag_filter) {
/* no need to distinquish between minification and magnification */
- lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
else {
/* Emit conditional to choose min image filter or mag image filter
- * depending on the lod being >0 or <= 0, respectively.
+ * depending on the lod being > 0 or <= 0, respectively.
*/
- struct lp_build_flow_context *flow_ctx;
struct lp_build_if_state if_ctx;
LLVMValueRef minify;
- flow_ctx = lp_build_flow_create(bld->builder);
- lp_build_flow_scope_begin(flow_ctx);
-
- lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
- lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
+ /* minify = lod >= 0.0 */
+ minify = LLVMBuildICmp(builder, LLVMIntSGE,
+ lod_ipart, int_bld->zero, "");
- /* minify = lod > 0.0 */
- minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
- lod, float_bld->zero, "");
-
- lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
+ lp_build_if(&if_ctx, builder, minify);
{
/* Use the minification filter */
- lp_build_sample_mipmap(bld, min_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ min_filter, mip_filter,
+ s, t, r,
+ ilevel0, ilevel1, lod_fpart,
+ texels);
}
lp_build_else(&if_ctx);
{
/* Use the magnification filter */
- lp_build_sample_mipmap(bld, mag_filter, mip_filter,
- s, t, r, lod_fpart,
- width0_vec, width1_vec,
- height0_vec, height1_vec,
- depth0_vec, depth1_vec,
- row_stride0_vec, row_stride1_vec,
- img_stride0_vec, img_stride1_vec,
- data_ptr0, data_ptr1,
- colors_out);
+ lp_build_sample_mipmap(bld, unit,
+ mag_filter, PIPE_TEX_MIPFILTER_NONE,
+ s, t, r,
+ i32t_zero, NULL, NULL,
+ texels);
}
lp_build_endif(&if_ctx);
+ }
- lp_build_flow_scope_end(flow_ctx);
- lp_build_flow_destroy(flow_ctx);
+ for (chan = 0; chan < 4; ++chan) {
+ colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
+ lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]);
}
}
+/**
+ * Do shadow test/comparison.
+ * \param p the texcoord Z (aka R, aka P) component
+ * \param texel the texel to compare against (use the X channel)
+ */
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
@@ -1064,30 +1093,30 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
{
struct lp_build_context *texel_bld = &bld->texel_bld;
LLVMValueRef res;
- unsigned chan;
+ const unsigned chan = 0;
- if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
+ if (bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
return;
- /* TODO: Compare before swizzling, to avoid redundant computations */
- res = NULL;
- for(chan = 0; chan < 4; ++chan) {
- LLVMValueRef cmp;
- cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
- cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
-
- if(res)
- res = lp_build_add(texel_bld, res, cmp);
- else
- res = cmp;
+ /* debug code */
+ if (0) {
+ LLVMValueRef indx = lp_build_const_int32(0);
+ LLVMValueRef coord = LLVMBuildExtractElement(bld->builder, p, indx, "");
+ LLVMValueRef tex = LLVMBuildExtractElement(bld->builder,
+ texel[chan], indx, "");
+ lp_build_printf(bld->builder, "shadow compare coord %f to texture %f\n",
+ coord, tex);
}
- assert(res);
- res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
+ /* result = (p FUNC texel) ? 1 : 0 */
+ res = lp_build_cmp(texel_bld, bld->static_state->compare_func,
+ p, texel[chan]);
+ res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero);
/* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
- for(chan = 0; chan < 3; ++chan)
- texel[chan] = res;
+ texel[0] =
+ texel[1] =
+ texel[2] = res;
texel[3] = texel_bld->one;
}
@@ -1131,15 +1160,14 @@ lp_build_sample_soa(LLVMBuilderRef builder,
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef texel_out[4])
{
+ unsigned dims = texture_dims(static_state->target);
struct lp_build_sample_context bld;
- LLVMValueRef width, width_vec;
- LLVMValueRef height, height_vec;
- LLVMValueRef depth, depth_vec;
- LLVMValueRef row_stride_array, img_stride_array;
- LLVMValueRef data_array;
+ LLVMTypeRef i32t = LLVMInt32Type();
+
LLVMValueRef s;
LLVMValueRef t;
LLVMValueRef r;
+ struct lp_type float_vec_type;
if (0) {
enum pipe_format fmt = static_state->format;
@@ -1154,38 +1182,57 @@ lp_build_sample_soa(LLVMBuilderRef builder,
bld.static_state = static_state;
bld.dynamic_state = dynamic_state;
bld.format_desc = util_format_description(static_state->format);
+ bld.dims = dims;
bld.float_type = lp_type_float(32);
bld.int_type = lp_type_int(32);
bld.coord_type = type;
- bld.uint_coord_type = lp_uint_type(type);
bld.int_coord_type = lp_int_type(type);
+ bld.float_size_type = lp_type_float(32);
+ bld.float_size_type.length = dims > 1 ? 4 : 1;
+ bld.int_size_type = lp_int_type(bld.float_size_type);
bld.texel_type = type;
+ float_vec_type = lp_type_float_vec(32);
+
lp_build_context_init(&bld.float_bld, builder, bld.float_type);
+ lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type);
lp_build_context_init(&bld.int_bld, builder, bld.int_type);
lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
- lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
+ lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type);
+ lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
/* Get the dynamic state */
- width = dynamic_state->width(dynamic_state, builder, unit);
- height = dynamic_state->height(dynamic_state, builder, unit);
- depth = dynamic_state->depth(dynamic_state, builder, unit);
- row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
- img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
- data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
+ bld.width = dynamic_state->width(dynamic_state, builder, unit);
+ bld.height = dynamic_state->height(dynamic_state, builder, unit);
+ bld.depth = dynamic_state->depth(dynamic_state, builder, unit);
+ bld.row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
+ bld.img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
+ bld.data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
/* Note that data_array is an array[level] of pointers to texture images */
s = coords[0];
t = coords[1];
r = coords[2];
- /* width, height, depth as uint vectors */
- width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
- height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
- depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
+ /* width, height, depth as single int vector */
+ if (dims <= 1) {
+ bld.int_size = bld.width;
+ }
+ else {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef,
+ bld.width, LLVMConstInt(i32t, 0, 0), "");
+ if (dims >= 2) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.height, LLVMConstInt(i32t, 1, 0), "");
+ if (dims >= 3) {
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
+ bld.depth, LLVMConstInt(i32t, 2, 0), "");
+ }
+ }
+ }
if (0) {
/* For debug: no-op texture sampling */
@@ -1195,13 +1242,9 @@ lp_build_sample_soa(LLVMBuilderRef builder,
lp_is_simple_wrap_mode(static_state->wrap_s) &&
lp_is_simple_wrap_mode(static_state->wrap_t)) {
/* do sampling/filtering with fixed pt arithmetic */
- printf("new sample\n");
lp_build_sample_aos(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
- width, height, depth,
- width_vec, height_vec, depth_vec,
- row_stride_array, img_stride_array,
- data_array, texel_out);
+ texel_out);
}
else {
@@ -1217,13 +1260,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
static_state->wrap_t);
}
- printf("old sample\n");
lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
lod_bias, explicit_lod,
- width, height, depth,
- width_vec, height_vec, depth_vec,
- row_stride_array, img_stride_array,
- data_array,
texel_out);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
index 2e9e8386de..4685a90e41 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -101,6 +101,83 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
/**
+ * Combined extract and broadcast (or a mere shuffle when the two types match)
+ */
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index)
+{
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef res;
+
+ assert(src_type.floating == dst_type.floating);
+ assert(src_type.width == dst_type.width);
+
+ assert(lp_check_value(src_type, vector));
+ assert(LLVMTypeOf(index) == i32t);
+
+ if (src_type.length == 1) {
+ if (dst_type.length == 1) {
+ /*
+ * Trivial scalar -> scalar.
+ */
+
+ res = vector;
+ }
+ else {
+ /*
+ * Broadcast scalar -> vector.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
+ else {
+ if (dst_type.length == src_type.length) {
+ /*
+ * Special shuffle of the same size.
+ */
+
+ LLVMValueRef shuffle;
+ shuffle = lp_build_broadcast(builder,
+ LLVMVectorType(i32t, dst_type.length),
+ index);
+ res = LLVMBuildShuffleVector(builder, vector,
+ LLVMGetUndef(lp_build_vec_type(dst_type)),
+ shuffle, "");
+ }
+ else {
+ LLVMValueRef scalar;
+ scalar = LLVMBuildExtractElement(builder, vector, index, "");
+ if (dst_type.length == 1) {
+ /*
+ * Trivial extract scalar from vector.
+ */
+
+ res = scalar;
+ }
+ else {
+ /*
+ * General case of different sized vectors.
+ */
+
+ res = lp_build_broadcast(builder,
+ lp_build_vec_type(dst_type),
+ vector);
+ }
+ }
+ }
+
+ return res;
+}
+
+
+/**
* Swizzle one channel into all other three channels.
*/
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
index f9b6a5e725..fdea8442ae 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h
@@ -55,6 +55,14 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar);
+LLVMValueRef
+lp_build_extract_broadcast(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef vector,
+ LLVMValueRef index);
+
+
/**
* Broadcast one channel of a vector composed of arrays of XYZW structures into
* all four channel.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 97318b3456..a4d3b750c3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -36,6 +36,9 @@
#define LP_BLD_TGSI_H
#include "gallivm/lp_bld.h"
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
struct tgsi_token;
@@ -55,6 +58,75 @@ enum lp_build_tex_modifier {
/**
+ * Describe a channel of a register.
+ *
+ * The value can be a:
+ * - immediate value (i.e. derived from a IMM register)
+ * - CONST[n].x/y/z/w
+ * - IN[n].x/y/z/w
+ * - undetermined (when .file == TGSI_FILE_NULL)
+ *
+ * This is one of the analysis results, and is used to described
+ * the output color in terms of inputs.
+ */
+struct lp_tgsi_channel_info
+{
+ unsigned file:4; /* TGSI_FILE_* */
+ unsigned swizzle:3; /* PIPE_SWIZZLE_x */
+ union {
+ uint32_t index;
+ float value; /* for TGSI_FILE_IMMEDIATE */
+ } u;
+};
+
+
+/**
+ * Describe a texture sampler interpolator.
+ *
+ * The interpolation is described in terms of regular inputs.
+ */
+struct lp_tgsi_texture_info
+{
+ struct lp_tgsi_channel_info coord[4];
+ unsigned target:8; /* TGSI_TEXTURE_* */
+ unsigned unit:8; /* Sampler unit */
+ unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */
+};
+
+
+struct lp_tgsi_info
+{
+ struct tgsi_shader_info base;
+
+ /*
+ * Whether any of the texture opcodes access a register file other than
+ * TGSI_FILE_INPUT.
+ *
+ * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little
+ * benefit.
+ */
+ unsigned indirect_textures:1;
+
+ /*
+ * Texture opcode description. Aimed at detecting and described direct
+ * texture opcodes.
+ */
+ unsigned num_texs;
+ struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS];
+
+ /*
+ * Output description. Aimed at detecting and describing simple blit
+ * shaders.
+ */
+ struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4];
+
+ /*
+ * Shortcut pointers into the above (for fragment shaders).
+ */
+ const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS];
+};
+
+/**
* Sampler code generation interface.
*
* Although texture sampling is a requirement for TGSI translation, it is
@@ -97,6 +169,11 @@ struct lp_build_sampler_aos
void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info);
+
+
+void
lp_build_tgsi_soa(LLVMBuilderRef builder,
const struct tgsi_token *tokens,
struct lp_type type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
new file mode 100644
index 0000000000..ad514463de
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -0,0 +1,479 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_tgsi.h"
+
+
+/**
+ * Analysis context.
+ *
+ * This is where we keep store the value of each channel of the IMM/TEMP/OUT
+ * register values, as we walk the shader.
+ */
+struct analysis_context
+{
+ struct lp_tgsi_info *info;
+
+ unsigned num_imms;
+ float imm[32][4];
+
+ struct lp_tgsi_channel_info temp[32][4];
+};
+
+
+/**
+ * Describe the specified channel of the src register.
+ */
+static void
+analyse_src(struct analysis_context *ctx,
+ struct lp_tgsi_channel_info *chan_info,
+ const struct tgsi_src_register *src,
+ unsigned chan)
+{
+ chan_info->file = TGSI_FILE_NULL;
+ if (!src->Indirect && !src->Absolute && !src->Negate) {
+ unsigned swizzle = tgsi_util_get_src_register_swizzle(src, chan);
+ if (src->File == TGSI_FILE_TEMPORARY) {
+ if (src->Index < Elements(ctx->temp)) {
+ *chan_info = ctx->temp[src->Index][swizzle];
+ }
+ } else {
+ chan_info->file = src->File;
+ if (src->File == TGSI_FILE_IMMEDIATE) {
+ assert(src->Index < Elements(ctx->imm));
+ if (src->Index < Elements(ctx->imm)) {
+ chan_info->u.value = ctx->imm[src->Index][swizzle];
+ }
+ } else {
+ chan_info->u.index = src->Index;
+ chan_info->swizzle = swizzle;
+ }
+ }
+ }
+}
+
+
+/**
+ * Whether this register channel refers to a specific immediate value.
+ */
+static boolean
+is_immediate(const struct lp_tgsi_channel_info *chan_info, float value)
+{
+ return chan_info->file == TGSI_FILE_IMMEDIATE &&
+ chan_info->u.value == value;
+}
+
+
+static void
+analyse_tex(struct analysis_context *ctx,
+ const struct tgsi_full_instruction *inst,
+ enum lp_build_tex_modifier modifier)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ unsigned chan;
+
+ if (info->num_texs < Elements(info->tex)) {
+ struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
+ bool indirect = FALSE;
+ unsigned readmask = 0;
+
+ tex_info->target = inst->Texture.Texture;
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ readmask = TGSI_WRITEMASK_X;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ readmask = TGSI_WRITEMASK_XY;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ readmask = TGSI_WRITEMASK_XYZ;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ /* We don't track explicit derivatives, although we could */
+ indirect = TRUE;
+ tex_info->unit = inst->Src[3].Register.Index;
+ } else {
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED ||
+ modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+ modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ readmask |= TGSI_WRITEMASK_W;
+ }
+ tex_info->unit = inst->Src[1].Register.Index;
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ struct lp_tgsi_channel_info *chan_info = &tex_info->coord[chan];
+ if (readmask & (1 << chan)) {
+ analyse_src(ctx, chan_info, &inst->Src[0].Register, chan);
+ if (chan_info->file != TGSI_FILE_INPUT) {
+ indirect = TRUE;
+ }
+ } else {
+ memset(chan_info, 0, sizeof *chan_info);
+ }
+ }
+
+ if (indirect) {
+ info->indirect_textures = TRUE;
+ }
+
+ ++info->num_texs;
+ } else {
+ info->indirect_textures = TRUE;
+ }
+}
+
+
+/**
+ * Process an instruction, and update the register values accordingly.
+ */
+static void
+analyse_instruction(struct analysis_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct lp_tgsi_info *info = ctx->info;
+ struct lp_tgsi_channel_info (*regs)[4];
+ unsigned max_regs;
+ unsigned i;
+ unsigned index;
+ unsigned chan;
+
+ for (i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ const struct tgsi_dst_register *dst = &inst->Dst[i].Register;
+
+ /*
+ * Get the lp_tgsi_channel_info array corresponding to the destination
+ * register file.
+ */
+
+ if (dst->File == TGSI_FILE_TEMPORARY) {
+ regs = ctx->temp;
+ max_regs = Elements(ctx->temp);
+ } else if (dst->File == TGSI_FILE_OUTPUT) {
+ regs = info->output;
+ max_regs = Elements(info->output);
+ } else if (dst->File == TGSI_FILE_ADDRESS ||
+ dst->File == TGSI_FILE_PREDICATE) {
+ continue;
+ } else {
+ assert(0);
+ continue;
+ }
+
+ /*
+ * Detect direct TEX instructions
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_TEX:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_NONE);
+ break;
+ case TGSI_OPCODE_TXD:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
+ break;
+ case TGSI_OPCODE_TXB:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
+ break;
+ case TGSI_OPCODE_TXL:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
+ break;
+ case TGSI_OPCODE_TXP:
+ analyse_tex(ctx, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Keep track of assignments and writes
+ */
+
+ if (dst->Indirect) {
+ /*
+ * It could be any register index so clear all register indices.
+ */
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ for (index = 0; index < max_regs; ++index) {
+ regs[index][chan].file = TGSI_FILE_NULL;
+ }
+ }
+ }
+ } else if (dst->Index < max_regs) {
+ /*
+ * Update this destination register value.
+ */
+
+ struct lp_tgsi_channel_info res[4];
+
+ memset(res, 0, sizeof res);
+
+ if (!inst->Instruction.Predicate &&
+ !inst->Instruction.Saturate) {
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+ analyse_src(ctx, &res[chan],
+ &inst->Src[0].Register, chan);
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) {
+ /*
+ * Propagate values across 1.0 and 0.0 multiplications.
+ */
+
+ struct lp_tgsi_channel_info src0;
+ struct lp_tgsi_channel_info src1;
+
+ analyse_src(ctx, &src0, &inst->Src[0].Register, chan);
+ analyse_src(ctx, &src1, &inst->Src[1].Register, chan);
+
+ if (is_immediate(&src0, 0.0f)) {
+ res[chan] = src0;
+ } else if (is_immediate(&src1, 0.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src0, 1.0f)) {
+ res[chan] = src1;
+ } else if (is_immediate(&src1, 1.0f)) {
+ res[chan] = src0;
+ }
+ }
+ }
+ }
+ }
+
+ for (chan = 0; chan < 4; ++chan) {
+ if (dst->WriteMask & (1 << chan)) {
+ regs[dst->Index][chan] = res[chan];
+ }
+ }
+ }
+ }
+
+ /*
+ * Clear all temporaries information in presence of a control flow opcode.
+ */
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_IFC:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_ENDIF:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_BRK:
+ case TGSI_OPCODE_BREAKC:
+ case TGSI_OPCODE_CONT:
+ case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_CALLNZ:
+ case TGSI_OPCODE_CAL:
+ case TGSI_OPCODE_BGNSUB:
+ case TGSI_OPCODE_ENDSUB:
+ case TGSI_OPCODE_SWITCH:
+ case TGSI_OPCODE_CASE:
+ case TGSI_OPCODE_DEFAULT:
+ case TGSI_OPCODE_ENDSWITCH:
+ case TGSI_OPCODE_RET:
+ case TGSI_OPCODE_END:
+ /* XXX: Are there more cases? */
+ memset(&ctx->temp, 0, sizeof ctx->temp);
+ memset(&info->output, 0, sizeof info->output);
+ default:
+ break;
+ }
+}
+
+
+static INLINE void
+dump_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ unsigned index;
+ unsigned chan;
+
+ tgsi_dump(tokens, 0);
+
+ for (index = 0; index < info->num_texs; ++index) {
+ const struct lp_tgsi_texture_info *tex_info = &info->tex[index];
+ debug_printf("TEX[%u] =", index);
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &tex_info->coord[chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf(" %s[%u].%c",
+ tgsi_file_names[chan_info->file],
+ chan_info->u.index,
+ "xyzw01"[chan_info->swizzle]);
+ } else {
+ debug_printf(" _");
+ }
+ }
+ debug_printf(", SAMP[%u], %s\n",
+ tex_info->unit,
+ tgsi_texture_names[tex_info->target]);
+ }
+
+ for (index = 0; index < PIPE_MAX_SHADER_OUTPUTS; ++index) {
+ for (chan = 0; chan < 4; ++chan) {
+ const struct lp_tgsi_channel_info *chan_info =
+ &info->output[index][chan];
+ if (chan_info->file != TGSI_FILE_NULL) {
+ debug_printf("OUT[%u].%c = ", index, "xyzw"[chan]);
+ if (chan_info->file == TGSI_FILE_IMMEDIATE) {
+ debug_printf("%f", chan_info->u.value);
+ } else {
+ const char *file_name;
+ switch (chan_info->file) {
+ case TGSI_FILE_CONSTANT:
+ file_name = "CONST";
+ break;
+ case TGSI_FILE_INPUT:
+ file_name = "IN";
+ break;
+ default:
+ file_name = "???";
+ break;
+ }
+ debug_printf("%s[%u].%c",
+ file_name,
+ chan_info->u.index,
+ "xyzw01"[chan_info->swizzle]);
+ }
+ debug_printf("\n");
+ }
+ }
+ }
+}
+
+
+/**
+ * Detect any direct relationship between the output color
+ */
+void
+lp_build_tgsi_info(const struct tgsi_token *tokens,
+ struct lp_tgsi_info *info)
+{
+ struct tgsi_parse_context parse;
+ struct analysis_context ctx;
+ unsigned index;
+ unsigned chan;
+
+ memset(info, 0, sizeof *info);
+
+ tgsi_scan_shader(tokens, &info->base);
+
+ memset(&ctx, 0, sizeof ctx);
+ ctx.info = info;
+
+ tgsi_parse_init(&parse, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *inst =
+ &parse.FullToken.FullInstruction;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ /* We reached the end of main function body. */
+ goto finished;
+ }
+
+ analyse_instruction(&ctx, inst);
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ const unsigned size =
+ parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+ assert(size <= 4);
+ if (ctx.num_imms < Elements(ctx.imm)) {
+ for (chan = 0; chan < size; ++chan) {
+ ctx.imm[ctx.num_imms][chan] =
+ parse.FullToken.FullImmediate.u[chan].Float;
+ }
+ ++ctx.num_imms;
+ }
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+finished:
+
+ tgsi_parse_free(&parse);
+
+
+ /*
+ * Link the output color values.
+ */
+
+ for (index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) {
+ const struct lp_tgsi_channel_info null_output[4];
+ info->cbuf[index] = null_output;
+ }
+
+ for (index = 0; index < info->base.num_outputs; ++index) {
+ unsigned semantic_name = info->base.output_semantic_name[index];
+ unsigned semantic_index = info->base.output_semantic_index[index];
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index < PIPE_MAX_COLOR_BUFS) {
+ info->cbuf[semantic_index] = info->output[index];
+ }
+ }
+
+ if (gallivm_debug & GALLIVM_DEBUG_TGSI) {
+ dump_info(tokens, info);
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 441aebae29..2bc90579a2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -887,21 +887,25 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
+ LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
for (i = 0; i < num_coords; i++) {
- ddx[i] = emit_fetch( bld, inst, 1, i );
- ddy[i] = emit_fetch( bld, inst, 2, i );
+ LLVMValueRef src1 = emit_fetch( bld, inst, 1, i );
+ LLVMValueRef src2 = emit_fetch( bld, inst, 2, i );
+ ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, "");
+ ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, "");
}
unit = inst->Src[3].Register.Index;
} else {
for (i = 0; i < num_coords; i++) {
- ddx[i] = lp_build_ddx( &bld->base, coords[i] );
- ddy[i] = lp_build_ddy( &bld->base, coords[i] );
+ ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] );
+ ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] );
}
unit = inst->Src[1].Register.Index;
}
for (i = num_coords; i < 3; i++) {
- ddx[i] = bld->base.undef;
- ddy[i] = bld->base.undef;
+ ddx[i] = LLVMGetUndef(bld->base.elem_type);
+ ddy[i] = LLVMGetUndef(bld->base.elem_type);
}
bld->sampler->emit_fetch_texel(bld->sampler,
@@ -913,6 +917,43 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
texel);
}
+static boolean
+near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
+ int pc)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ unsigned opcode;
+
+ if (pc + i >= bld->info->num_instructions)
+ return TRUE;
+
+ opcode = bld->instructions[pc + i].Instruction.Opcode;
+
+ if (opcode == TGSI_OPCODE_END)
+ return TRUE;
+
+ if (opcode == TGSI_OPCODE_TEX ||
+ opcode == TGSI_OPCODE_TXP ||
+ opcode == TGSI_OPCODE_TXD ||
+ opcode == TGSI_OPCODE_TXB ||
+ opcode == TGSI_OPCODE_TXL ||
+ opcode == TGSI_OPCODE_TXF ||
+ opcode == TGSI_OPCODE_TXQ ||
+ opcode == TGSI_OPCODE_CAL ||
+ opcode == TGSI_OPCODE_CALLNZ ||
+ opcode == TGSI_OPCODE_IF ||
+ opcode == TGSI_OPCODE_IFC ||
+ opcode == TGSI_OPCODE_BGNLOOP ||
+ opcode == TGSI_OPCODE_SWITCH)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
/**
* Kill fragment if any of the src register values are negative.
@@ -920,7 +961,8 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
static void
emit_kil(
struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst )
+ const struct tgsi_full_instruction *inst,
+ int pc)
{
const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[NUM_CHANNELS];
@@ -959,8 +1001,12 @@ emit_kil(
}
}
- if(mask)
+ if(mask) {
lp_build_mask_update(bld->mask, mask);
+
+ if (!near_end_of_shader(bld, pc))
+ lp_build_mask_check(bld->mask);
+ }
}
@@ -972,7 +1018,8 @@ emit_kil(
*/
static void
emit_kilp(struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst)
+ const struct tgsi_full_instruction *inst,
+ int pc)
{
LLVMValueRef mask;
@@ -987,6 +1034,9 @@ emit_kilp(struct lp_build_tgsi_soa_context *bld,
}
lp_build_mask_update(bld->mask, mask);
+
+ if (!near_end_of_shader(bld, pc))
+ lp_build_mask_check(bld->mask);
}
static void
@@ -1535,12 +1585,12 @@ emit_instruction(
case TGSI_OPCODE_KILP:
/* predicated kill */
- emit_kilp( bld, inst );
+ emit_kilp( bld, inst, (*pc)-1 );
break;
case TGSI_OPCODE_KIL:
/* conditional kill */
- emit_kil( bld, inst );
+ emit_kil( bld, inst, (*pc)-1 );
break;
case TGSI_OPCODE_PK2H:
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index b4d8107372..a6eb403962 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -222,7 +222,7 @@ pb_cache_buffer_vtbl = {
};
-static INLINE boolean
+static INLINE int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
@@ -230,26 +230,26 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
void *map;
if(buf->base.base.size < size)
- return FALSE;
+ return 0;
/* be lenient with size */
if(buf->base.base.size >= 2*size)
- return FALSE;
+ return 0;
if(!pb_check_alignment(desc->alignment, buf->base.base.alignment))
- return FALSE;
+ return 0;
if(!pb_check_usage(desc->usage, buf->base.base.usage))
- return FALSE;
+ return 0;
map = pb_map(buf->buffer, PB_USAGE_DONTBLOCK, NULL);
if (!map) {
- return FALSE;
+ return -1;
}
pb_unmap(buf->buffer);
- return TRUE;
+ return 1;
}
@@ -263,7 +263,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
struct pb_cache_buffer *curr_buf;
struct list_head *curr, *next;
int64_t now;
-
+ int ret = 0;
+
pipe_mutex_lock(mgr->mutex);
buf = NULL;
@@ -274,25 +275,30 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
now = os_time_get();
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc))
- buf = curr_buf;
+ if(!buf && (ret = pb_cache_is_buffer_compat(curr_buf, size, desc) > 0))
+ buf = curr_buf;
else if(os_time_timeout(curr_buf->start, curr_buf->end, now))
- _pb_cache_buffer_destroy(curr_buf);
+ _pb_cache_buffer_destroy(curr_buf);
else
/* This buffer (and all hereafter) are still hot in cache */
break;
+ if (ret == -1)
+ break;
curr = next;
next = curr->next;
}
/* keep searching in the hot buffers */
- if(!buf) {
+ if(!buf && ret != -1) {
while(curr != &mgr->delayed) {
curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head);
- if(pb_cache_is_buffer_compat(curr_buf, size, desc)) {
+ ret = pb_cache_is_buffer_compat(curr_buf, size, desc);
+ if (ret > 0) {
buf = curr_buf;
break;
}
+ if (ret == -1)
+ break;
/* no need to check the timeout here */
curr = next;
next = curr->next;
@@ -301,6 +307,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
if(buf) {
LIST_DEL(&buf->head);
+ --mgr->numDelayed;
pipe_mutex_unlock(mgr->mutex);
/* Increase refcount */
pipe_reference_init(&buf->base.base.reference, 1);
diff --git a/src/gallium/auxiliary/rbug/rbug_context.c b/src/gallium/auxiliary/rbug/rbug_context.c
index 1832425658..a3fd7e8430 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.c
+++ b/src/gallium/auxiliary/rbug/rbug_context.c
@@ -480,7 +480,7 @@ struct rbug_proto_context_list * rbug_demarshal_context_list(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST)
return NULL;
pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_context_info * rbug_demarshal_context_info(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO)
return NULL;
pos = 0;
@@ -533,7 +533,7 @@ struct rbug_proto_context_draw_block * rbug_demarshal_context_draw_block(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCK)
return NULL;
pos = 0;
@@ -561,7 +561,7 @@ struct rbug_proto_context_draw_step * rbug_demarshal_context_draw_step(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_STEP)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_STEP)
return NULL;
pos = 0;
@@ -589,7 +589,7 @@ struct rbug_proto_context_draw_unblock * rbug_demarshal_context_draw_unblock(str
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_UNBLOCK)
return NULL;
pos = 0;
@@ -617,7 +617,7 @@ struct rbug_proto_context_draw_rule * rbug_demarshal_context_draw_rule(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_RULE)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_RULE)
return NULL;
pos = 0;
@@ -649,7 +649,7 @@ struct rbug_proto_context_flush * rbug_demarshal_context_flush(struct rbug_proto
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_FLUSH)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_FLUSH)
return NULL;
pos = 0;
@@ -677,7 +677,7 @@ struct rbug_proto_context_list_reply * rbug_demarshal_context_list_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_LIST_REPLY)
return NULL;
pos = 0;
@@ -705,7 +705,7 @@ struct rbug_proto_context_info_reply * rbug_demarshal_context_info_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_INFO_REPLY)
return NULL;
pos = 0;
@@ -739,7 +739,7 @@ struct rbug_proto_context_draw_blocked * rbug_demarshal_context_draw_blocked(str
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
+ if (header->opcode != (int32_t)RBUG_OP_CONTEXT_DRAW_BLOCKED)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_core.c b/src/gallium/auxiliary/rbug/rbug_core.c
index 876ae5a0ce..1d47d13c9f 100644
--- a/src/gallium/auxiliary/rbug/rbug_core.c
+++ b/src/gallium/auxiliary/rbug/rbug_core.c
@@ -233,7 +233,7 @@ struct rbug_proto_noop * rbug_demarshal_noop(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_NOOP)
+ if (header->opcode != (int32_t)RBUG_OP_NOOP)
return NULL;
pos = 0;
@@ -259,7 +259,7 @@ struct rbug_proto_ping * rbug_demarshal_ping(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_PING)
+ if (header->opcode != (int32_t)RBUG_OP_PING)
return NULL;
pos = 0;
@@ -285,7 +285,7 @@ struct rbug_proto_error * rbug_demarshal_error(struct rbug_proto_header *header)
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_ERROR)
+ if (header->opcode != (int32_t)RBUG_OP_ERROR)
return NULL;
pos = 0;
@@ -312,7 +312,7 @@ struct rbug_proto_ping_reply * rbug_demarshal_ping_reply(struct rbug_proto_heade
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_PING_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_PING_REPLY)
return NULL;
pos = 0;
@@ -339,7 +339,7 @@ struct rbug_proto_error_reply * rbug_demarshal_error_reply(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_ERROR_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_ERROR_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_demarshal.c b/src/gallium/auxiliary/rbug/rbug_demarshal.c
index 47390fbcee..06caa45469 100644
--- a/src/gallium/auxiliary/rbug/rbug_demarshal.c
+++ b/src/gallium/auxiliary/rbug/rbug_demarshal.c
@@ -91,3 +91,67 @@ struct rbug_header * rbug_demarshal(struct rbug_proto_header *header)
return NULL;
}
}
+
+const char* rbug_proto_get_name(enum rbug_opcode opcode)
+{
+ switch(opcode) {
+ case RBUG_OP_NOOP:
+ return "RBUG_OP_NOOP";
+ case RBUG_OP_PING:
+ return "RBUG_OP_PING";
+ case RBUG_OP_ERROR:
+ return "RBUG_OP_ERROR";
+ case RBUG_OP_PING_REPLY:
+ return "RBUG_OP_PING_REPLY";
+ case RBUG_OP_ERROR_REPLY:
+ return "RBUG_OP_ERROR_REPLY";
+ case RBUG_OP_TEXTURE_LIST:
+ return "RBUG_OP_TEXTURE_LIST";
+ case RBUG_OP_TEXTURE_INFO:
+ return "RBUG_OP_TEXTURE_INFO";
+ case RBUG_OP_TEXTURE_WRITE:
+ return "RBUG_OP_TEXTURE_WRITE";
+ case RBUG_OP_TEXTURE_READ:
+ return "RBUG_OP_TEXTURE_READ";
+ case RBUG_OP_TEXTURE_LIST_REPLY:
+ return "RBUG_OP_TEXTURE_LIST_REPLY";
+ case RBUG_OP_TEXTURE_INFO_REPLY:
+ return "RBUG_OP_TEXTURE_INFO_REPLY";
+ case RBUG_OP_TEXTURE_READ_REPLY:
+ return "RBUG_OP_TEXTURE_READ_REPLY";
+ case RBUG_OP_CONTEXT_LIST:
+ return "RBUG_OP_CONTEXT_LIST";
+ case RBUG_OP_CONTEXT_INFO:
+ return "RBUG_OP_CONTEXT_INFO";
+ case RBUG_OP_CONTEXT_DRAW_BLOCK:
+ return "RBUG_OP_CONTEXT_DRAW_BLOCK";
+ case RBUG_OP_CONTEXT_DRAW_STEP:
+ return "RBUG_OP_CONTEXT_DRAW_STEP";
+ case RBUG_OP_CONTEXT_DRAW_UNBLOCK:
+ return "RBUG_OP_CONTEXT_DRAW_UNBLOCK";
+ case RBUG_OP_CONTEXT_DRAW_RULE:
+ return "RBUG_OP_CONTEXT_DRAW_RULE";
+ case RBUG_OP_CONTEXT_FLUSH:
+ return "RBUG_OP_CONTEXT_FLUSH";
+ case RBUG_OP_CONTEXT_LIST_REPLY:
+ return "RBUG_OP_CONTEXT_LIST_REPLY";
+ case RBUG_OP_CONTEXT_INFO_REPLY:
+ return "RBUG_OP_CONTEXT_INFO_REPLY";
+ case RBUG_OP_CONTEXT_DRAW_BLOCKED:
+ return "RBUG_OP_CONTEXT_DRAW_BLOCKED";
+ case RBUG_OP_SHADER_LIST:
+ return "RBUG_OP_SHADER_LIST";
+ case RBUG_OP_SHADER_INFO:
+ return "RBUG_OP_SHADER_INFO";
+ case RBUG_OP_SHADER_DISABLE:
+ return "RBUG_OP_SHADER_DISABLE";
+ case RBUG_OP_SHADER_REPLACE:
+ return "RBUG_OP_SHADER_REPLACE";
+ case RBUG_OP_SHADER_LIST_REPLY:
+ return "RBUG_OP_SHADER_LIST_REPLY";
+ case RBUG_OP_SHADER_INFO_REPLY:
+ return "RBUG_OP_SHADER_INFO_REPLY";
+ default:
+ return NULL;
+ }
+}
diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h
index 4f3eb75dc4..2fce725bc9 100644
--- a/src/gallium/auxiliary/rbug/rbug_proto.h
+++ b/src/gallium/auxiliary/rbug/rbug_proto.h
@@ -91,4 +91,9 @@ struct rbug_proto_header
*/
struct rbug_connection;
+/**
+ * Get printable string for opcode.
+ */
+const char* rbug_proto_get_name(enum rbug_opcode opcode);
+
#endif
diff --git a/src/gallium/auxiliary/rbug/rbug_shader.c b/src/gallium/auxiliary/rbug/rbug_shader.c
index fccd2f55ef..1742941cc1 100644
--- a/src/gallium/auxiliary/rbug/rbug_shader.c
+++ b/src/gallium/auxiliary/rbug/rbug_shader.c
@@ -305,7 +305,7 @@ struct rbug_proto_shader_list * rbug_demarshal_shader_list(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST)
return NULL;
pos = 0;
@@ -332,7 +332,7 @@ struct rbug_proto_shader_info * rbug_demarshal_shader_info(struct rbug_proto_hea
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO)
return NULL;
pos = 0;
@@ -360,7 +360,7 @@ struct rbug_proto_shader_disable * rbug_demarshal_shader_disable(struct rbug_pro
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_DISABLE)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_DISABLE)
return NULL;
pos = 0;
@@ -389,7 +389,7 @@ struct rbug_proto_shader_replace * rbug_demarshal_shader_replace(struct rbug_pro
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_REPLACE)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_REPLACE)
return NULL;
pos = 0;
@@ -418,7 +418,7 @@ struct rbug_proto_shader_list_reply * rbug_demarshal_shader_list_reply(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_LIST_REPLY)
return NULL;
pos = 0;
@@ -446,7 +446,7 @@ struct rbug_proto_shader_info_reply * rbug_demarshal_shader_info_reply(struct rb
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_SHADER_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_SHADER_INFO_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rbug/rbug_texture.c b/src/gallium/auxiliary/rbug/rbug_texture.c
index 5a918fe6bc..2ad577915e 100644
--- a/src/gallium/auxiliary/rbug/rbug_texture.c
+++ b/src/gallium/auxiliary/rbug/rbug_texture.c
@@ -417,7 +417,7 @@ struct rbug_proto_texture_list * rbug_demarshal_texture_list(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST)
return NULL;
pos = 0;
@@ -443,7 +443,7 @@ struct rbug_proto_texture_info * rbug_demarshal_texture_info(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO)
return NULL;
pos = 0;
@@ -470,7 +470,7 @@ struct rbug_proto_texture_write * rbug_demarshal_texture_write(struct rbug_proto
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_WRITE)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_WRITE)
return NULL;
pos = 0;
@@ -506,7 +506,7 @@ struct rbug_proto_texture_read * rbug_demarshal_texture_read(struct rbug_proto_h
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ)
return NULL;
pos = 0;
@@ -540,7 +540,7 @@ struct rbug_proto_texture_list_reply * rbug_demarshal_texture_list_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_LIST_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_LIST_REPLY)
return NULL;
pos = 0;
@@ -568,7 +568,7 @@ struct rbug_proto_texture_info_reply * rbug_demarshal_texture_info_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_INFO_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_INFO_REPLY)
return NULL;
pos = 0;
@@ -606,7 +606,7 @@ struct rbug_proto_texture_read_reply * rbug_demarshal_texture_read_reply(struct
if (!header)
return NULL;
- if (header->opcode != (int16_t)RBUG_OP_TEXTURE_READ_REPLY)
+ if (header->opcode != (int32_t)RBUG_OP_TEXTURE_READ_REPLY)
return NULL;
pos = 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
index 65d5ce795b..fbde1d191a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c
@@ -58,7 +58,6 @@
#include <unistd.h>
#include <sys/mman.h>
-#include "os/os_thread.h"
#include "util/u_mm.h"
#define EXEC_HEAP_SIZE (10*1024*1024)
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index 036c1ee48a..34bfa527db 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -23,26 +23,13 @@
#include "cell/ppu/cell_public.h"
#endif
+
static INLINE struct pipe_screen *
-sw_screen_create(struct sw_winsys *winsys)
+sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
{
- const char *default_driver;
- const char *driver;
struct pipe_screen *screen = NULL;
#if defined(GALLIUM_CELL)
- default_driver = "cell";
-#elif defined(GALLIUM_LLVMPIPE)
- default_driver = "llvmpipe";
-#elif defined(GALLIUM_SOFTPIPE)
- default_driver = "softpipe";
-#else
- default_driver = "";
-#endif
-
- driver = debug_get_option("GALLIUM_DRIVER", default_driver);
-
-#if defined(GALLIUM_CELL)
if (screen == NULL && strcmp(driver, "cell") == 0)
screen = cell_create_screen(winsys);
#endif
@@ -60,4 +47,26 @@ sw_screen_create(struct sw_winsys *winsys)
return screen;
}
+
+static INLINE struct pipe_screen *
+sw_screen_create(struct sw_winsys *winsys)
+{
+ const char *default_driver;
+ const char *driver;
+
+#if defined(GALLIUM_CELL)
+ default_driver = "cell";
+#elif defined(GALLIUM_LLVMPIPE)
+ default_driver = "llvmpipe";
+#elif defined(GALLIUM_SOFTPIPE)
+ default_driver = "softpipe";
+#else
+ default_driver = "";
+#endif
+
+ driver = debug_get_option("GALLIUM_DRIVER", default_driver);
+ return sw_screen_create_named(winsys, driver);
+}
+
+
#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
index 0b4e740403..e4effa713e 100644
--- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -13,22 +13,28 @@ static INLINE struct pipe_screen *
sw_screen_wrap(struct pipe_screen *screen)
{
struct sw_winsys *sws;
- struct pipe_screen *sw_screen;
+ struct pipe_screen *sw_screen = NULL;
+ const char *driver;
- sws = wrapper_sw_winsys_warp_pipe_screen(screen);
+ driver = debug_get_option("GALLIUM_DRIVER", "native");
+ if (strcmp(driver, "native") == 0)
+ return screen;
+
+ sws = wrapper_sw_winsys_wrap_pipe_screen(screen);
if (!sws)
goto err;
- sw_screen = sw_screen_create(sws);
- if (sw_screen == screen)
+ sw_screen = sw_screen_create_named(sws, driver);
+
+ if (!sw_screen)
goto err_winsys;
return sw_screen;
err_winsys:
- sws->destroy(sws);
+ return wrapper_sw_winsys_dewrap_pipe_screen(sws);
err:
- return screen;
+ return screen;
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index f71ffb7030..77bde86684 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -90,7 +90,8 @@ static const char *processor_type_names[] =
"GEOM"
};
-static const char *file_names[TGSI_FILE_COUNT] =
+const char *
+tgsi_file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -125,7 +126,8 @@ static const char *semantic_names[] =
"FACE",
"EDGEFLAG",
"PRIM_ID",
- "INSTANCEID"
+ "INSTANCEID",
+ "STENCIL"
};
static const char *immediate_type_names[] =
@@ -135,7 +137,8 @@ static const char *immediate_type_names[] =
"INT32"
};
-static const char *swizzle_names[] =
+const char *
+tgsi_swizzle_names[] =
{
"x",
"y",
@@ -143,7 +146,8 @@ static const char *swizzle_names[] =
"w"
};
-static const char *texture_names[] =
+const char *
+tgsi_texture_names[] =
{
"UNKNOWN",
"1D",
@@ -201,15 +205,15 @@ _dump_register_src(
struct dump_ctx *ctx,
const struct tgsi_full_src_register *src )
{
- ENM(src->Register.File, file_names);
+ ENM(src->Register.File, tgsi_file_names);
if (src->Register.Dimension) {
if (src->Dimension.Indirect) {
CHR( '[' );
- ENM( src->DimIndirect.File, file_names );
+ ENM( src->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( src->DimIndirect.Index );
TXT( "]." );
- ENM( src->DimIndirect.SwizzleX, swizzle_names );
+ ENM( src->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (src->Dimension.Index != 0) {
if (src->Dimension.Index > 0)
CHR( '+' );
@@ -224,11 +228,11 @@ _dump_register_src(
}
if (src->Register.Indirect) {
CHR( '[' );
- ENM( src->Indirect.File, file_names );
+ ENM( src->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( src->Indirect.Index );
TXT( "]." );
- ENM( src->Indirect.SwizzleX, swizzle_names );
+ ENM( src->Indirect.SwizzleX, tgsi_swizzle_names );
if (src->Register.Index != 0) {
if (src->Register.Index > 0)
CHR( '+' );
@@ -248,15 +252,15 @@ _dump_register_dst(
struct dump_ctx *ctx,
const struct tgsi_full_dst_register *dst )
{
- ENM(dst->Register.File, file_names);
+ ENM(dst->Register.File, tgsi_file_names);
if (dst->Register.Dimension) {
if (dst->Dimension.Indirect) {
CHR( '[' );
- ENM( dst->DimIndirect.File, file_names );
+ ENM( dst->DimIndirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->DimIndirect.Index );
TXT( "]." );
- ENM( dst->DimIndirect.SwizzleX, swizzle_names );
+ ENM( dst->DimIndirect.SwizzleX, tgsi_swizzle_names );
if (dst->Dimension.Index != 0) {
if (dst->Dimension.Index > 0)
CHR( '+' );
@@ -271,11 +275,11 @@ _dump_register_dst(
}
if (dst->Register.Indirect) {
CHR( '[' );
- ENM( dst->Indirect.File, file_names );
+ ENM( dst->Indirect.File, tgsi_file_names );
CHR( '[' );
SID( dst->Indirect.Index );
TXT( "]." );
- ENM( dst->Indirect.SwizzleX, swizzle_names );
+ ENM( dst->Indirect.SwizzleX, tgsi_swizzle_names );
if (dst->Register.Index != 0) {
if (dst->Register.Index > 0)
CHR( '+' );
@@ -351,7 +355,7 @@ iter_declaration(
TXT( "DCL " );
- ENM(decl->Declaration.File, file_names);
+ ENM(decl->Declaration.File, tgsi_file_names);
/* all geometry shader inputs are two dimensional */
if (decl->Declaration.File == TGSI_FILE_INPUT &&
@@ -585,10 +589,10 @@ iter_instruction(
inst->Predicate.SwizzleZ != TGSI_SWIZZLE_Z ||
inst->Predicate.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
- ENM( inst->Predicate.SwizzleX, swizzle_names );
- ENM( inst->Predicate.SwizzleY, swizzle_names );
- ENM( inst->Predicate.SwizzleZ, swizzle_names );
- ENM( inst->Predicate.SwizzleW, swizzle_names );
+ ENM( inst->Predicate.SwizzleX, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleY, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleZ, tgsi_swizzle_names );
+ ENM( inst->Predicate.SwizzleW, tgsi_swizzle_names );
}
TXT( ") " );
@@ -641,10 +645,10 @@ iter_instruction(
src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
src->Register.SwizzleW != TGSI_SWIZZLE_W) {
CHR( '.' );
- ENM( src->Register.SwizzleX, swizzle_names );
- ENM( src->Register.SwizzleY, swizzle_names );
- ENM( src->Register.SwizzleZ, swizzle_names );
- ENM( src->Register.SwizzleW, swizzle_names );
+ ENM( src->Register.SwizzleX, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleY, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleZ, tgsi_swizzle_names );
+ ENM( src->Register.SwizzleW, tgsi_swizzle_names );
}
if (src->Register.Absolute)
@@ -655,7 +659,7 @@ iter_instruction(
if (inst->Instruction.Texture) {
TXT( ", " );
- ENM( inst->Texture.Texture, texture_names );
+ ENM( inst->Texture.Texture, tgsi_texture_names );
}
switch (inst->Instruction.Opcode) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index dd78b36100..fc0429ad8d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -35,6 +35,15 @@
extern "C" {
#endif
+extern const char *
+tgsi_file_names[TGSI_FILE_COUNT];
+
+extern const char *
+tgsi_swizzle_names[];
+
+extern const char *
+tgsi_texture_names[];
+
void
tgsi_dump_str(
const struct tgsi_token *tokens,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 0757f05dfa..3a71540506 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -605,8 +605,10 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
if ((inst->Src[i].Register.File ==
inst->Dst[0].Register.File) &&
- (inst->Src[i].Register.Index ==
- inst->Dst[0].Register.Index)) {
+ ((inst->Src[i].Register.Index ==
+ inst->Dst[0].Register.Index) ||
+ inst->Src[i].Register.Indirect ||
+ inst->Dst[0].Register.Indirect)) {
/* loop over dest channels */
uint channelsWritten = 0x0;
FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e472947507..b3123ed016 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -163,6 +163,10 @@ OP12(USGE)
OP12(USHR)
OP12(USLT)
OP12(USNE)
+OP01(SWITCH)
+OP01(CASE)
+OP00(DEFAULT)
+OP00(ENDSWITCH)
#undef OP00
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 90198a4f60..6585da3e83 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -147,6 +147,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
+ info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid;
info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
info->num_inputs++;
}
@@ -157,9 +158,11 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
/* extra info for special outputs */
if (procType == TGSI_PROCESSOR_FRAGMENT &&
- fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
- info->writes_z = TRUE;
- }
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION)
+ info->writes_z = TRUE;
+ if (procType == TGSI_PROCESSOR_FRAGMENT &&
+ fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL)
+ info->writes_stencil = TRUE;
if (procType == TGSI_PROCESSOR_VERTEX &&
fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) {
info->writes_edgeflag = TRUE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index f8aa90cf06..104097fbc0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -45,6 +45,7 @@ struct tgsi_shader_info
ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */
ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
+ ubyte input_centroid[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_cylindrical_wrap[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */
@@ -60,6 +61,7 @@ struct tgsi_shader_info
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
boolean writes_z; /**< does fragment shader write Z value? */
+ boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
boolean uses_kill; /**< KIL or KILP instruction used? */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 13e2e8eb99..086d983a73 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2830,31 +2830,52 @@ static void soa_to_aos( struct x86_function *func,
* Check if the instructions dst register is the same as any src
* register and warn if there's a posible SOA dependency.
*/
-static void
+static boolean
check_soa_dependencies(const struct tgsi_full_instruction *inst)
{
- switch (inst->Instruction.Opcode) {
+ uint opcode = inst->Instruction.Opcode;
+
+ /* XXX: we only handle src/dst aliasing in a few opcodes currently.
+ * Need to use an additional temporay to hold the result in the
+ * cases where the code is too opaque to fix.
+ */
+
+ switch (opcode) {
case TGSI_OPCODE_ADD:
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_MUL:
+ case TGSI_OPCODE_RCP:
+ case TGSI_OPCODE_RSQ:
+ case TGSI_OPCODE_EXP:
+ case TGSI_OPCODE_LOG:
+ case TGSI_OPCODE_DP3:
+ case TGSI_OPCODE_DP4:
+ case TGSI_OPCODE_DP2A:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
+ case TGSI_OPCODE_POW:
case TGSI_OPCODE_XPD:
+ case TGSI_OPCODE_DPH:
+ case TGSI_OPCODE_COS:
+ case TGSI_OPCODE_SIN:
+ case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TXB:
+ case TGSI_OPCODE_TXP:
+ case TGSI_OPCODE_NRM:
+ case TGSI_OPCODE_NRM4:
+ case TGSI_OPCODE_DP2:
/* OK - these opcodes correctly handle SOA dependencies */
- break;
+ return TRUE;
default:
- if (tgsi_check_soa_dependencies(inst)) {
- uint opcode = inst->Instruction.Opcode;
+ if (!tgsi_check_soa_dependencies(inst))
+ return TRUE;
- /* XXX: we only handle src/dst aliasing in a few opcodes
- * currently. Need to use an additional temporay to hold
- * the result in the cases where the code is too opaque to
- * fix.
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: src/dst aliasing in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(inst, 1);
- }
- }
+ debug_printf("Warning: src/dst aliasing in instruction"
+ " is not handled:\n");
+ debug_printf("Warning: ");
+ tgsi_dump_instruction(inst, 1);
+
+ return FALSE;
}
}
@@ -2954,7 +2975,8 @@ tgsi_emit_sse2(
tgsi_get_processor_name(proc));
}
- check_soa_dependencies(&parse.FullToken.FullInstruction);
+ if (ok)
+ ok = check_soa_dependencies(&parse.FullToken.FullInstruction);
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 3cf6893a9b..7d13a17bdb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -96,7 +96,8 @@ struct ureg_program
unsigned semantic_name;
unsigned semantic_index;
unsigned interp;
- unsigned cylindrical_wrap;
+ unsigned char cylindrical_wrap;
+ unsigned char centroid;
} fs_input[UREG_MAX_INPUT];
unsigned nr_fs_inputs;
@@ -286,11 +287,12 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
struct ureg_src
-ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
- unsigned cylindrical_wrap)
+ unsigned cylindrical_wrap,
+ unsigned centroid)
{
unsigned i;
@@ -306,6 +308,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
ureg->fs_input[i].semantic_index = semantic_index;
ureg->fs_input[i].interp = interp_mode;
ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
+ ureg->fs_input[i].centroid = centroid;
ureg->nr_fs_inputs++;
} else {
set_bad(ureg);
@@ -1126,7 +1129,8 @@ emit_decl_fs(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
unsigned interpolate,
- unsigned cylindrical_wrap)
+ unsigned cylindrical_wrap,
+ unsigned centroid)
{
union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
@@ -1138,6 +1142,7 @@ emit_decl_fs(struct ureg_program *ureg,
out[0].decl.Interpolate = interpolate;
out[0].decl.Semantic = 1;
out[0].decl.CylindricalWrap = cylindrical_wrap;
+ out[0].decl.Centroid = centroid;
out[1].value = 0;
out[1].decl_range.First = index;
@@ -1287,7 +1292,8 @@ static void emit_decls( struct ureg_program *ureg )
ureg->fs_input[i].semantic_name,
ureg->fs_input[i].semantic_index,
ureg->fs_input[i].interp,
- ureg->fs_input[i].cylindrical_wrap);
+ ureg->fs_input[i].cylindrical_wrap,
+ ureg->fs_input[i].centroid);
}
} else {
for (i = 0; i < ureg->nr_gs_inputs; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 07fb01ab7b..acc463200a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -158,11 +158,27 @@ ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
*/
struct ureg_src
-ureg_DECL_fs_input_cyl(struct ureg_program *,
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
unsigned semantic_name,
unsigned semantic_index,
unsigned interp_mode,
- unsigned cylindrical_wrap);
+ unsigned cylindrical_wrap,
+ unsigned centroid);
+
+static INLINE struct ureg_src
+ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp_mode,
+ unsigned cylindrical_wrap)
+{
+ return ureg_DECL_fs_input_cyl_centroid(ureg,
+ semantic_name,
+ semantic_index,
+ interp_mode,
+ cylindrical_wrap,
+ 0);
+}
static INLINE struct ureg_src
ureg_DECL_fs_input(struct ureg_program *ureg,
@@ -170,11 +186,11 @@ ureg_DECL_fs_input(struct ureg_program *ureg,
unsigned semantic_index,
unsigned interp_mode)
{
- return ureg_DECL_fs_input_cyl(ureg,
+ return ureg_DECL_fs_input_cyl_centroid(ureg,
semantic_name,
semantic_index,
interp_mode,
- 0);
+ 0, 0);
}
struct ureg_src
diff --git a/src/gallium/auxiliary/util/u_atomic.h b/src/gallium/auxiliary/util/u_atomic.h
index a156823390..8434491a42 100644
--- a/src/gallium/auxiliary/util/u_atomic.h
+++ b/src/gallium/auxiliary/util/u_atomic.h
@@ -29,6 +29,8 @@
#define PIPE_ATOMIC_ASM_MSVC_X86
#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86))
#define PIPE_ATOMIC_ASM_GCC_X86
+#elif (defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86_64))
+#define PIPE_ATOMIC_ASM_GCC_X86_64
#elif defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 401)
#define PIPE_ATOMIC_GCC_INTRINSIC
#else
@@ -36,6 +38,51 @@
#endif
+#if defined(PIPE_ATOMIC_ASM_GCC_X86_64)
+#define PIPE_ATOMIC "GCC x86_64 assembly"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define p_atomic_set(_v, _i) (*(_v) = (_i))
+#define p_atomic_read(_v) (*(_v))
+
+static INLINE boolean
+p_atomic_dec_zero(int32_t *v)
+{
+ unsigned char c;
+
+ __asm__ __volatile__("lock; decl %0; sete %1":"+m"(*v), "=qm"(c)
+ ::"memory");
+
+ return c != 0;
+}
+
+static INLINE void
+p_atomic_inc(int32_t *v)
+{
+ __asm__ __volatile__("lock; incl %0":"+m"(*v));
+}
+
+static INLINE void
+p_atomic_dec(int32_t *v)
+{
+ __asm__ __volatile__("lock; decl %0":"+m"(*v));
+}
+
+static INLINE int32_t
+p_atomic_cmpxchg(int32_t *v, int32_t old, int32_t _new)
+{
+ return __sync_val_compare_and_swap(v, old, _new);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_ATOMIC_ASM_GCC_X86_64 */
+
#if defined(PIPE_ATOMIC_ASM_GCC_X86)
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index f93ef26ae7..a163f93cb8 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -268,7 +268,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
pipe->delete_fs_state(pipe, ctx->fs_texfetch_depth[i]);
}
- for (i = 0; i <= PIPE_MAX_COLOR_BUFS && ctx->fs_col[i]; i++)
+ for (i = 0; i <= PIPE_MAX_COLOR_BUFS; i++)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
@@ -964,16 +964,18 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
blitter_restore_CSOs(ctx);
}
-/* Clear a region of a depth stencil surface. */
-void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
- struct pipe_surface *dstsurf)
+/* draw a rectangle across a region using a custom dsa stage - for r600g */
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth)
{
struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_framebuffer_state fb_state;
- assert(dstsurf->texture);
- if (!dstsurf->texture)
+ assert(zsurf->texture);
+ if (!zsurf->texture)
return;
/* check the saved state */
@@ -981,8 +983,8 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
assert(blitter->saved_fb_state.nr_cbufs != ~0);
/* bind CSOs */
- pipe->bind_blend_state(pipe, ctx->blend_keep_color);
- pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
+ pipe->bind_blend_state(pipe, ctx->blend_write_color);
+ pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, 0));
@@ -990,15 +992,30 @@ void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
/* set a framebuffer state */
- fb_state.width = dstsurf->width;
- fb_state.height = dstsurf->height;
- fb_state.nr_cbufs = 0;
- fb_state.cbufs[0] = 0;
- fb_state.zsbuf = dstsurf;
+ fb_state.width = zsurf->width;
+ fb_state.height = zsurf->height;
+ fb_state.nr_cbufs = 1;
+ if (cbsurf) {
+ fb_state.cbufs[0] = cbsurf;
+ fb_state.nr_cbufs = 1;
+ } else {
+ fb_state.cbufs[0] = NULL;
+ fb_state.nr_cbufs = 0;
+ }
+ fb_state.zsbuf = zsurf;
pipe->set_framebuffer_state(pipe, &fb_state);
- blitter_set_dst_dimensions(ctx, dstsurf->width, dstsurf->height);
- blitter->draw_rectangle(blitter, 0, 0, dstsurf->width, dstsurf->height, 0,
+ blitter_set_dst_dimensions(ctx, zsurf->width, zsurf->height);
+ blitter->draw_rectangle(blitter, 0, 0, zsurf->width, zsurf->height, depth,
UTIL_BLITTER_ATTRIB_NONE, NULL);
blitter_restore_CSOs(ctx);
}
+
+/* flush a region of a depth stencil surface for r300g */
+void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *dstsurf)
+{
+ struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+ util_blitter_custom_depth_stencil(blitter, dstsurf, NULL,
+ ctx->dsa_flush_depth_stencil, 0.0f);
+}
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index e33d2e283f..f9f96f25c7 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -203,6 +203,12 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
void util_blitter_flush_depth_stencil(struct blitter_context *blitter,
struct pipe_surface *dstsurf);
+
+void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
+ struct pipe_surface *zsurf,
+ struct pipe_surface *cbsurf,
+ void *dsa_stage, float depth);
+
/* The functions below should be used to save currently bound constant state
* objects inside a driver. The objects are automatically restored at the end
* of the util_blitter_{clear, copy_region, fill_region} functions and then
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 016e73c4a1..1fbd83841c 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -109,9 +109,12 @@ PIPE_FORMAT_Z32_UNORM , plain, 1, 1, un32, , , , x___,
PIPE_FORMAT_Z32_FLOAT , plain, 1, 1, f32 , , , , x___, zs
PIPE_FORMAT_Z24_UNORM_S8_USCALED , plain, 1, 1, un24, u8 , , , xy__, zs
PIPE_FORMAT_S8_USCALED_Z24_UNORM , plain, 1, 1, u8 , un24, , , yx__, zs
+PIPE_FORMAT_X24S8_USCALED , plain, 1, 1, x24, u8 , , , _y__, zs
+PIPE_FORMAT_S8X24_USCALED , plain, 1, 1, u8 , x24 , , , _x__, zs
PIPE_FORMAT_Z24X8_UNORM , plain, 1, 1, un24, x8 , , , x___, zs
PIPE_FORMAT_X8Z24_UNORM , plain, 1, 1, x8 , un24, , , y___, zs
PIPE_FORMAT_Z32_FLOAT_S8X24_USCALED , plain, 1, 1, f32, u8 , x24 , , xy__, zs
+PIPE_FORMAT_X32_S8X24_USCALED , plain, 1, 1, x32, u8 , x24 , , _y__, zs
# YUV formats
# http://www.fourcc.org/yuv.php#UYVY
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
index 792d69c214..80081e22f7 100644
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -918,3 +918,56 @@ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned d
}
}
+
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_z24_unorm_s8_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)
+{
+ util_format_s8_uscaled_z24_unorm_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+
+}
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride,
+ const uint8_t *src_row, unsigned src_stride,
+ unsigned width, unsigned height)
+{
+ util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(dst_row, dst_stride,
+ src_row, src_stride,
+ width, height);
+}
diff --git a/src/gallium/auxiliary/util/u_format_zs.h b/src/gallium/auxiliary/util/u_format_zs.h
index 650db4b95f..1604cc3eee 100644
--- a/src/gallium/auxiliary/util/u_format_zs.h
+++ b/src/gallium/auxiliary/util/u_format_zs.h
@@ -192,5 +192,21 @@ util_format_z32_float_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned
void
util_format_z32_float_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_x24s8_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x24s8_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+void
+util_format_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_unpack_s_8uscaled(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
+
+void
+util_format_x32_s8x24_uscaled_pack_s_8uscaled(uint8_t *dst_row, unsigned dst_sride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
#endif /* U_FORMAT_ZS_H_ */
diff --git a/src/gallium/auxiliary/util/u_index_modify.c b/src/gallium/auxiliary/util/u_index_modify.c
new file mode 100644
index 0000000000..65b079ed53
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "pipe/p_context.h"
+#include "util/u_index_modify.h"
+#include "util/u_inlines.h"
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_screen* screen = context->screen;
+ struct pipe_resource* new_elts;
+ unsigned char *in_map;
+ unsigned short *out_map;
+ struct pipe_transfer *src_transfer, *dst_transfer;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
+ out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
+
+ in_map += start;
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, src_transfer);
+ pipe_buffer_unmap(context, new_elts, dst_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned short *in_map;
+ unsigned short *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned int *in_map;
+ unsigned int *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned int)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
diff --git a/src/gallium/auxiliary/util/u_index_modify.h b/src/gallium/auxiliary/util/u_index_modify.h
new file mode 100644
index 0000000000..01a6cae94f
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_index_modify.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef UTIL_INDEX_MODIFY_H
+#define UTIL_INDEX_MODIFY_H
+
+void util_shorten_ubyte_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start,
+ unsigned count);
+
+void util_rebuild_ushort_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+
+void util_rebuild_uint_elts(struct pipe_context *context,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count);
+#endif
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 69a7681494..37294b7203 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -118,6 +118,11 @@ __inline double __cdecl atan2(double val)
#endif
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880
+#endif
+
+
#if defined(_MSC_VER)
#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index aae8b8bdf1..5378f2d782 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -394,7 +394,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
return;
case PIPE_FORMAT_B4G4R4A4_UNORM:
{
- uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+ uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
}
return;
case PIPE_FORMAT_A8_UNORM:
@@ -434,8 +434,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
/* Integer versions of util_pack_z and util_pack_z_stencil - useful for
* constructing clear masks.
*/
-static INLINE uint
-util_pack_uint_z(enum pipe_format format, unsigned z)
+static INLINE uint32_t
+util_pack_mask_z(enum pipe_format format, uint32_t z)
{
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
@@ -452,29 +452,32 @@ util_pack_uint_z(enum pipe_format format, unsigned z)
case PIPE_FORMAT_S8_USCALED:
return 0;
default:
- debug_print_format("gallium: unhandled format in util_pack_z()", format);
+ debug_print_format("gallium: unhandled format in util_pack_mask_z()", format);
assert(0);
return 0;
}
}
-static INLINE uint
-util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
{
- unsigned packed = util_pack_uint_z(format, z);
-
- s &= 0xff;
+ uint32_t packed = util_pack_mask_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- return packed | (s << 24);
+ packed |= (uint32_t)s << 24;
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- return packed | s;
+ packed |= s;
+ break;
case PIPE_FORMAT_S8_USCALED:
- return packed | s;
+ packed |= s;
+ break;
default:
- return packed;
+ break;
}
+
+ return packed;
}
@@ -482,9 +485,11 @@ util_pack_uint_z_stencil(enum pipe_format format, double z, uint s)
/**
* Note: it's assumed that z is in [0,1]
*/
-static INLINE uint
+static INLINE uint32_t
util_pack_z(enum pipe_format format, double z)
{
+ union fi fui;
+
if (z == 0.0)
return 0;
@@ -492,24 +497,25 @@ util_pack_z(enum pipe_format format, double z)
case PIPE_FORMAT_Z16_UNORM:
if (z == 1.0)
return 0xffff;
- return (uint) (z * 0xffff);
+ return (uint32_t) (z * 0xffff);
case PIPE_FORMAT_Z32_UNORM:
/* special-case to avoid overflow */
if (z == 1.0)
return 0xffffffff;
- return (uint) (z * 0xffffffff);
+ return (uint32_t) (z * 0xffffffff);
case PIPE_FORMAT_Z32_FLOAT:
- return (uint)z;
+ fui.f = (float)z;
+ return fui.ui;
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
case PIPE_FORMAT_Z24X8_UNORM:
if (z == 1.0)
return 0xffffff;
- return (uint) (z * 0xffffff);
+ return (uint32_t) (z * 0xffffff);
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
if (z == 1.0)
return 0xffffff00;
- return ((uint) (z * 0xffffff)) << 8;
+ return ((uint32_t) (z * 0xffffff)) << 8;
case PIPE_FORMAT_S8_USCALED:
/* this case can get it via util_pack_z_stencil() */
return 0;
@@ -525,14 +531,14 @@ util_pack_z(enum pipe_format format, double z)
* Pack Z and/or stencil values into a 32-bit value described by format.
* Note: it's assumed that z is in [0,1] and s in [0,255]
*/
-static INLINE uint
-util_pack_z_stencil(enum pipe_format format, double z, uint s)
+static INLINE uint32_t
+util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
{
- unsigned packed = util_pack_z(format, z);
+ uint32_t packed = util_pack_z(format, z);
switch (format) {
case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- packed |= s << 24;
+ packed |= (uint32_t)s << 24;
break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
packed |= s;
diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h
index f5f43b0faa..fe59771371 100644
--- a/src/gallium/auxiliary/util/u_simple_list.h
+++ b/src/gallium/auxiliary/util/u_simple_list.h
@@ -46,6 +46,8 @@
do { \
(elem)->next->prev = (elem)->prev; \
(elem)->prev->next = (elem)->next; \
+ (elem)->next = elem; \
+ (elem)->prev = elem; \
} while (0)
/**
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 87959ab0aa..1df6c87267 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -71,6 +71,96 @@ _mm_castps_si128(__m128 a)
#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */
+union m128i {
+ __m128i m;
+ ubyte ub[16];
+ ushort us[8];
+ uint ui[4];
+};
+
+static INLINE void u_print_epi8(const char *name, __m128i r)
+{
+ union { __m128i m; ubyte ub[16]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x/"
+ "%02x\n",
+ name,
+ u.ub[0], u.ub[1], u.ub[2], u.ub[3],
+ u.ub[4], u.ub[5], u.ub[6], u.ub[7],
+ u.ub[8], u.ub[9], u.ub[10], u.ub[11],
+ u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
+}
+
+static INLINE void u_print_epi16(const char *name, __m128i r)
+{
+ union { __m128i m; ushort us[8]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x/"
+ "%04x\n",
+ name,
+ u.us[0], u.us[1], u.us[2], u.us[3],
+ u.us[4], u.us[5], u.us[6], u.us[7]);
+}
+
+static INLINE void u_print_epi32(const char *name, __m128i r)
+{
+ union { __m128i m; uint ui[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%08x/"
+ "%08x/"
+ "%08x/"
+ "%08x\n",
+ name,
+ u.ui[0], u.ui[1], u.ui[2], u.ui[3]);
+}
+
+static INLINE void u_print_ps(const char *name, __m128 r)
+{
+ union { __m128 m; float f[4]; } u;
+ u.m = r;
+
+ debug_printf("%s: "
+ "%f/"
+ "%f/"
+ "%f/"
+ "%f\n",
+ name,
+ u.f[0], u.f[1], u.f[2], u.f[3]);
+}
+
+
+#define U_DUMP_EPI32(a) u_print_epi32(#a, a)
+#define U_DUMP_EPI16(a) u_print_epi16(#a, a)
+#define U_DUMP_EPI8(a) u_print_epi8(#a, a)
+#define U_DUMP_PS(a) u_print_ps(#a, a)
+
+
#if defined(PIPE_ARCH_SSSE3)
@@ -78,8 +168,6 @@ _mm_castps_si128(__m128 a)
#else /* !PIPE_ARCH_SSSE3 */
-#include <emmintrin.h>
-
/**
* Describe _mm_shuffle_epi8() with gcc extended inline assembly, for cases
* where -mssse3 is not supported/enabled.
@@ -100,6 +188,68 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
#endif /* !PIPE_ARCH_SSSE3 */
-#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */
+
+
+/* Provide an SSE2 implementation of _mm_mullo_epi32() in terms of
+ * _mm_mul_epu32().
+ *
+ * I suspect this works fine for us because one of our operands is
+ * always positive, but not sure that this can be used for general
+ * signed integer multiplication.
+ *
+ * This seems close enough to the speed of SSE4 and the real
+ * _mm_mullo_epi32() intrinsic as to not justify adding an sse4
+ * dependency at this point.
+ */
+static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+{
+ __m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */
+ __m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */
+ __m128i ba = _mm_mul_epu32(b, a); /* multply dwords 0, 2 */
+ __m128i b4a4 = _mm_mul_epu32(b4, a4); /* multiply dwords 1, 3 */
+
+ /* Interleave the results, either with shuffles or (slightly
+ * faster) direct bit operations:
+ */
+#if 0
+ __m128i ba8 = _mm_shuffle_epi32(ba, 8);
+ __m128i b4a48 = _mm_shuffle_epi32(b4a4, 8);
+ __m128i result = _mm_unpacklo_epi32(ba8, b4a48);
+#else
+ __m128i mask = _mm_setr_epi32(~0,0,~0,0);
+ __m128i ba_mask = _mm_and_si128(ba, mask);
+ __m128i b4a4_mask_shift = _mm_slli_epi64(b4a4, 32);
+ __m128i result = _mm_or_si128(ba_mask, b4a4_mask_shift);
+#endif
+
+ return result;
+}
+
+
+static INLINE void
+transpose4_epi32(const __m128i * restrict a,
+ const __m128i * restrict b,
+ const __m128i * restrict c,
+ const __m128i * restrict d,
+ __m128i * restrict o,
+ __m128i * restrict p,
+ __m128i * restrict q,
+ __m128i * restrict r)
+{
+ __m128i t0 = _mm_unpacklo_epi32(*a, *b);
+ __m128i t1 = _mm_unpacklo_epi32(*c, *d);
+ __m128i t2 = _mm_unpackhi_epi32(*a, *b);
+ __m128i t3 = _mm_unpackhi_epi32(*c, *d);
+
+ *o = _mm_unpacklo_epi64(t0, t1);
+ *p = _mm_unpackhi_epi64(t0, t1);
+ *q = _mm_unpacklo_epi64(t2, t3);
+ *r = _mm_unpackhi_epi64(t2, t3);
+}
+
+#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
+
+
+#endif /* PIPE_ARCH_SSE */
#endif /* U_SSE_H_ */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index af99163b2e..f78b6838a7 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -332,7 +332,7 @@ util_clear_depth_stencil(struct pipe_context *pipe,
uint32_t *row = (uint32_t *)dst_map;
for (j = 0; j < width; j++) {
uint32_t tmp = *row & dst_mask;
- *row++ = tmp & (zstencil & ~dst_mask);
+ *row++ = tmp | (zstencil & ~dst_mask);
}
dst_map += dst_stride;
}
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index f7aa1403d0..44cadbfcdd 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -217,6 +217,81 @@ z24s8_get_tile_rgba(const unsigned *src,
}
}
+/*** PIPE_FORMAT_S8X24_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8x24_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)((*src++ >> 24) & 0xff);
+ }
+
+ p += dst_stride;
+ }
+}
+
+/*** PIPE_FORMAT_X24S8_USCALED ***/
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+x24s8_get_tile_rgba(const unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/**
+ * Return S component as four uint32_t in [0..255]. Z part ignored.
+ */
+static void
+s8_get_tile_rgba(const unsigned char *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float)(*src++ & 0xff);
+ }
+ p += dst_stride;
+ }
+}
/*** PIPE_FORMAT_Z32_FLOAT ***/
@@ -261,10 +336,19 @@ pipe_tile_raw_to_rgba(enum pipe_format format,
case PIPE_FORMAT_Z24X8_UNORM:
s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8_USCALED:
+ s8_get_tile_rgba((unsigned char *) src, w, h, dst, dst_stride);
+ break;
+ case PIPE_FORMAT_X24S8_USCALED:
+ s8x24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
case PIPE_FORMAT_X8Z24_UNORM:
z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
break;
+ case PIPE_FORMAT_S8X24_USCALED:
+ x24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride);
+ break;
case PIPE_FORMAT_Z32_FLOAT:
z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride);
break;