summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/Makefile36
-rw-r--r--src/gallium/auxiliary/SConscript37
-rw-r--r--src/gallium/auxiliary/draw/draw_cliptest_tmp.h114
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c113
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h23
-rw-r--r--src/gallium/auxiliary/draw/draw_decompose_tmp.h26
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_gs_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c74
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h54
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c32
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_validate.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h44
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c139
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h51
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c89
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_emit.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c29
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c27
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c35
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c288
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray.c200
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp.h238
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h103
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c610
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h19
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit.c208
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h309
-rw-r--r--src/gallium/auxiliary/draw/draw_so_emit_tmp.h6
-rw-r--r--src/gallium/auxiliary/draw/draw_split_tmp.h176
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c6
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_arit.c48
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_debug.c2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_format_aos.c4
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_misc.cpp14
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_pack.h2
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c103
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.h11
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c307
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c8
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_type.h8
-rw-r--r--src/gallium/auxiliary/os/os_stream.c58
-rw-r--r--src/gallium/auxiliary/os/os_stream.h25
-rw-r--r--src/gallium/auxiliary/os/os_stream_log.c3
-rw-r--r--src/gallium/auxiliary/os/os_stream_null.c8
-rw-r--r--src/gallium/auxiliary/os/os_stream_stdc.c9
-rw-r--r--src/gallium/auxiliary/os/os_stream_str.c1
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h3
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_cpu.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c499
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h100
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c5
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c27
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h12
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.h5
-rw-r--r--src/gallium/auxiliary/translate/translate.c2
-rw-r--r--src/gallium/auxiliary/translate/translate.h12
-rw-r--r--src/gallium/auxiliary/translate/translate_generic.c213
-rw-r--r--src/gallium/auxiliary/translate/translate_sse.c1378
-rw-r--r--src/gallium/auxiliary/util/u_bitmask.h3
-rw-r--r--src/gallium/auxiliary/util/u_blit.c130
-rw-r--r--src/gallium/auxiliary/util/u_blit.h12
-rw-r--r--src/gallium/auxiliary/util/u_blitter.c54
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h36
-rw-r--r--src/gallium/auxiliary/util/u_cpu_detect.c177
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.c81
-rw-r--r--src/gallium/auxiliary/util/u_debug_describe.h49
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.c181
-rw-r--r--src/gallium/auxiliary/util/u_debug_refcnt.h63
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.c97
-rw-r--r--src/gallium/auxiliary/util/u_debug_symbol.h7
-rw-r--r--src/gallium/auxiliary/util/u_dirty_surfaces.h30
-rw-r--r--src/gallium/auxiliary/util/u_draw.h1
-rw-r--r--src/gallium/auxiliary/util/u_dynarray.h3
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c1
-rw-r--r--src/gallium/auxiliary/util/u_inlines.h25
-rw-r--r--src/gallium/auxiliary/util/u_linkage.c149
-rw-r--r--src/gallium/auxiliary/util/u_linkage.h66
-rw-r--r--src/gallium/auxiliary/util/u_math.h13
-rw-r--r--src/gallium/auxiliary/util/u_pack_color.h8
-rw-r--r--src/gallium/auxiliary/util/u_rect.c51
-rw-r--r--src/gallium/auxiliary/util/u_rect.h60
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c1
-rw-r--r--src/gallium/auxiliary/util/u_split_prim.h13
-rw-r--r--src/gallium/auxiliary/util/u_staging.c38
-rw-r--r--src/gallium/auxiliary/util/u_staging.h34
-rw-r--r--src/gallium/auxiliary/util/u_surface.c46
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.c26
-rw-r--r--src/gallium/auxiliary/util/u_surfaces.h30
-rw-r--r--src/gallium/auxiliary/util/u_tile.h3
-rw-r--r--src/gallium/auxiliary/util/u_transfer.h1
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.h5
98 files changed, 4680 insertions, 2838 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 9544e90a96..eb86d83d2a 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
LIBNAME = gallium
C_SOURCES = \
- cso_cache/cso_context.c \
cso_cache/cso_cache.c \
+ cso_cache/cso_context.c \
cso_cache/cso_hash.c \
draw/draw_context.c \
draw/draw_gs.c \
@@ -26,7 +26,6 @@ C_SOURCES = \
draw/draw_pipe_wide_line.c \
draw/draw_pipe_wide_point.c \
draw/draw_pt.c \
- draw/draw_pt_elts.c \
draw/draw_pt_emit.c \
draw/draw_pt_fetch.c \
draw/draw_pt_fetch_emit.c \
@@ -35,24 +34,24 @@ C_SOURCES = \
draw/draw_pt_post_vs.c \
draw/draw_pt_so_emit.c \
draw/draw_pt_util.c \
- draw/draw_pt_varray.c \
- draw/draw_pt_vcache.c \
+ draw/draw_pt_vsplit.c \
draw/draw_vertex.c \
draw/draw_vs.c \
- draw/draw_vs_varient.c \
draw/draw_vs_aos.c \
draw/draw_vs_aos_io.c \
draw/draw_vs_aos_machine.c \
draw/draw_vs_exec.c \
draw/draw_vs_ppc.c \
draw/draw_vs_sse.c \
+ draw/draw_vs_varient.c \
indices/u_indices_gen.c \
indices/u_unfilled_gen.c \
os/os_misc.c \
+ os/os_stream.c \
os/os_stream_log.c \
+ os/os_stream_null.c \
os/os_stream_stdc.c \
os/os_stream_str.c \
- os/os_stream_null.c \
os/os_time.c \
pipebuffer/pb_buffer_fenced.c \
pipebuffer/pb_buffer_malloc.c \
@@ -65,17 +64,16 @@ C_SOURCES = \
pipebuffer/pb_bufmgr_slab.c \
pipebuffer/pb_validate.c \
rbug/rbug_connection.c \
+ rbug/rbug_context.c \
rbug/rbug_core.c \
+ rbug/rbug_demarshal.c \
rbug/rbug_texture.c \
- rbug/rbug_context.c \
rbug/rbug_shader.c \
- rbug/rbug_demarshal.c \
rtasm/rtasm_cpu.c \
rtasm/rtasm_execmem.c \
- rtasm/rtasm_x86sse.c \
rtasm/rtasm_ppc.c \
rtasm/rtasm_ppc_spe.c \
- tgsi/tgsi_sanity.c \
+ rtasm/rtasm_x86sse.c \
tgsi/tgsi_build.c \
tgsi/tgsi_dump.c \
tgsi/tgsi_exec.c \
@@ -83,19 +81,22 @@ C_SOURCES = \
tgsi/tgsi_iterate.c \
tgsi/tgsi_parse.c \
tgsi/tgsi_ppc.c \
+ tgsi/tgsi_sanity.c \
tgsi/tgsi_scan.c \
tgsi/tgsi_sse2.c \
tgsi/tgsi_text.c \
tgsi/tgsi_transform.c \
tgsi/tgsi_ureg.c \
tgsi/tgsi_util.c \
- translate/translate_generic.c \
- translate/translate_sse.c \
translate/translate.c \
translate/translate_cache.c \
+ translate/translate_generic.c \
+ translate/translate_sse.c \
util/u_debug.c \
- util/u_debug_symbol.c \
+ util/u_debug_describe.c \
+ util/u_debug_refcnt.c \
util/u_debug_stack.c \
+ util/u_debug_symbol.c \
util/u_dump_defines.c \
util/u_dump_state.c \
util/u_bitmask.c \
@@ -118,10 +119,11 @@ C_SOURCES = \
util/u_gen_mipmap.c \
util/u_half.c \
util/u_handle_table.c \
- util/u_hash_table.c \
util/u_hash.c \
+ util/u_hash_table.c \
util/u_keymap.c \
util/u_linear.c \
+ util/u_linkage.c \
util/u_network.c \
util/u_math.c \
util/u_mempool.c \
@@ -172,10 +174,10 @@ GALLIVM_SOURCES = \
gallivm/lp_bld_tgsi_soa.c \
gallivm/lp_bld_type.c \
draw/draw_llvm.c \
- draw/draw_vs_llvm.c \
- draw/draw_pt_fetch_shade_pipeline_llvm.c \
+ draw/draw_llvm_sample.c \
draw/draw_llvm_translate.c \
- draw/draw_llvm_sample.c
+ draw/draw_vs_llvm.c \
+ draw/draw_pt_fetch_shade_pipeline_llvm.c
GALLIVM_CPP_SOURCES = \
gallivm/lp_bld_misc.cpp
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 3124e20ce8..6210ada990 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -50,10 +50,11 @@ env.Depends('util/u_format_table.c', [
])
source = [
- 'cso_cache/cso_context.c',
'cso_cache/cso_cache.c',
+ 'cso_cache/cso_context.c',
'cso_cache/cso_hash.c',
'draw/draw_context.c',
+ 'draw/draw_gs.c',
'draw/draw_pipe.c',
'draw/draw_pipe_aaline.c',
'draw/draw_pipe_aapoint.c',
@@ -71,7 +72,6 @@ source = [
'draw/draw_pipe_wide_line.c',
'draw/draw_pipe_wide_point.c',
'draw/draw_pt.c',
- 'draw/draw_pt_elts.c',
'draw/draw_pt_emit.c',
'draw/draw_pt_fetch.c',
'draw/draw_pt_fetch_emit.c',
@@ -80,8 +80,7 @@ source = [
'draw/draw_pt_post_vs.c',
'draw/draw_pt_so_emit.c',
'draw/draw_pt_util.c',
- 'draw/draw_pt_varray.c',
- 'draw/draw_pt_vcache.c',
+ 'draw/draw_pt_vsplit.c',
'draw/draw_vertex.c',
'draw/draw_vs.c',
'draw/draw_vs_aos.c',
@@ -91,16 +90,16 @@ source = [
'draw/draw_vs_ppc.c',
'draw/draw_vs_sse.c',
'draw/draw_vs_varient.c',
- 'draw/draw_gs.c',
#'indices/u_indices.c',
#'indices/u_unfilled_indices.c',
'indices/u_indices_gen.c',
'indices/u_unfilled_gen.c',
'os/os_misc.c',
+ 'os/os_stream.c',
'os/os_stream_log.c',
+ 'os/os_stream_null.c',
'os/os_stream_stdc.c',
'os/os_stream_str.c',
- 'os/os_stream_null.c',
'os/os_time.c',
'pipebuffer/pb_buffer_fenced.c',
'pipebuffer/pb_buffer_malloc.c',
@@ -112,35 +111,35 @@ source = [
'pipebuffer/pb_bufmgr_pool.c',
'pipebuffer/pb_bufmgr_slab.c',
'pipebuffer/pb_validate.c',
+ 'rbug/rbug_connection.c',
+ 'rbug/rbug_context.c',
'rbug/rbug_core.c',
+ 'rbug/rbug_demarshal.c',
'rbug/rbug_shader.c',
- 'rbug/rbug_context.c',
'rbug/rbug_texture.c',
- 'rbug/rbug_demarshal.c',
- 'rbug/rbug_connection.c',
'rtasm/rtasm_cpu.c',
'rtasm/rtasm_execmem.c',
- 'rtasm/rtasm_x86sse.c',
'rtasm/rtasm_ppc.c',
'rtasm/rtasm_ppc_spe.c',
+ 'rtasm/rtasm_x86sse.c',
'tgsi/tgsi_build.c',
'tgsi/tgsi_dump.c',
'tgsi/tgsi_exec.c',
'tgsi/tgsi_info.c',
'tgsi/tgsi_iterate.c',
'tgsi/tgsi_parse.c',
+ 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sanity.c',
'tgsi/tgsi_scan.c',
- 'tgsi/tgsi_ppc.c',
'tgsi/tgsi_sse2.c',
'tgsi/tgsi_text.c',
'tgsi/tgsi_transform.c',
'tgsi/tgsi_ureg.c',
'tgsi/tgsi_util.c',
- 'translate/translate_generic.c',
- 'translate/translate_sse.c',
'translate/translate.c',
'translate/translate_cache.c',
+ 'translate/translate_generic.c',
+ 'translate/translate_sse.c',
'util/u_bitmask.c',
'util/u_blit.c',
'util/u_blitter.c',
@@ -148,7 +147,9 @@ source = [
'util/u_caps.c',
'util/u_cpu_detect.c',
'util/u_debug.c',
+ 'util/u_debug_describe.c',
'util/u_debug_memory.c',
+ 'util/u_debug_refcnt.c',
'util/u_debug_stack.c',
'util/u_debug_symbol.c',
'util/u_dump_defines.c',
@@ -170,6 +171,8 @@ source = [
'util/u_hash.c',
'util/u_hash_table.c',
'util/u_keymap.c',
+ 'util/u_linear.c',
+ 'util/u_linkage.c',
'util/u_network.c',
'util/u_math.c',
'util/u_mempool.c',
@@ -208,9 +211,9 @@ if env['llvm']:
'gallivm/lp_bld_format_soa.c',
'gallivm/lp_bld_format_yuv.c',
'gallivm/lp_bld_gather.c',
+ 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_intr.c',
'gallivm/lp_bld_logic.c',
- 'gallivm/lp_bld_init.c',
'gallivm/lp_bld_misc.cpp',
'gallivm/lp_bld_pack.c',
'gallivm/lp_bld_printf.c',
@@ -222,10 +225,10 @@ if env['llvm']:
'gallivm/lp_bld_tgsi_soa.c',
'gallivm/lp_bld_type.c',
'draw/draw_llvm.c',
- 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_llvm_sample.c',
'draw/draw_llvm_translate.c',
- 'draw/draw_vs_llvm.c',
- 'draw/draw_llvm_sample.c'
+ 'draw/draw_pt_fetch_shade_pipeline_llvm.c',
+ 'draw/draw_vs_llvm.c'
]
gallium = env.ConvenienceLibrary(
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
new file mode 100644
index 0000000000..958ed20dc8
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -0,0 +1,114 @@
+/**************************************************************************
+ *
+ * Copyright 2010, VMware, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
+ struct draw_vertex_info *info )
+{
+ struct vertex_header *out = info->verts;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ /* const */ float (*plane)[4] = pvs->draw->plane;
+ const unsigned pos = draw_current_shader_position_output(pvs->draw);
+ const unsigned ef = pvs->draw->vs.edgeflag_output;
+ const unsigned nr = pvs->draw->nr_planes;
+ const unsigned flags = (FLAGS);
+ unsigned need_pipeline = 0;
+ unsigned j;
+
+ for (j = 0; j < info->count; j++) {
+ float *position = out->data[pos];
+ unsigned mask = 0x0;
+
+ initialize_vertex_header(out);
+
+ if (flags & (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_HALF_Z | DO_CLIP_USER)) {
+ out->clip[0] = position[0];
+ out->clip[1] = position[1];
+ out->clip[2] = position[2];
+ out->clip[3] = position[3];
+
+ /* Do the hardwired planes first:
+ */
+ if (flags & DO_CLIP_XY) {
+ if (-position[0] + position[3] < 0) mask |= (1<<0);
+ if ( position[0] + position[3] < 0) mask |= (1<<1);
+ if (-position[1] + position[3] < 0) mask |= (1<<2);
+ if ( position[1] + position[3] < 0) mask |= (1<<3);
+ }
+
+ /* Clip Z planes according to full cube, half cube or none.
+ */
+ if (flags & DO_CLIP_FULL_Z) {
+ if ( position[2] + position[3] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+ else if (flags & DO_CLIP_HALF_Z) {
+ if ( position[2] < 0) mask |= (1<<4);
+ if (-position[2] + position[3] < 0) mask |= (1<<5);
+ }
+
+ if (flags & DO_CLIP_USER) {
+ unsigned i;
+ for (i = 6; i < nr; i++) {
+ if (dot4(position, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+ }
+
+ out->clipmask = mask;
+ need_pipeline |= out->clipmask;
+ }
+
+ if ((flags & DO_VIEWPORT) && mask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / position[3];
+
+ /* Viewport mapping */
+ position[0] = position[0] * w * scale[0] + trans[0];
+ position[1] = position[1] * w * scale[1] + trans[1];
+ position[2] = position[2] * w * scale[2] + trans[2];
+ position[3] = w;
+ }
+
+ if ((flags & DO_EDGEFLAG) && ef) {
+ const float *edgeflag = out->data[ef];
+ out->edgeflag = !(edgeflag[0] != 1.0f);
+ need_pipeline |= !out->edgeflag;
+ }
+
+ out = (struct vertex_header *)( (char *)out + info->stride );
+ }
+
+ return need_pipeline != 0;
+}
+
+
+#undef FLAGS
+#undef TAG
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 995b675b9a..937b093479 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -34,6 +34,7 @@
#include "pipe/p_context.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "draw_context.h"
#include "draw_vs.h"
#include "draw_gs.h"
@@ -41,6 +42,25 @@
#if HAVE_LLVM
#include "gallivm/lp_bld_init.h"
#include "draw_llvm.h"
+
+static boolean
+draw_get_option_use_llvm(void)
+{
+ static boolean first = TRUE;
+ static boolean value;
+ if (first) {
+ first = FALSE;
+ value = debug_get_bool_option("DRAW_USE_LLVM", TRUE);
+
+#ifdef PIPE_ARCH_X86
+ util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ value = FALSE;
+#endif
+ }
+ return value;
+}
#endif
struct draw_context *draw_create( struct pipe_context *pipe )
@@ -50,10 +70,13 @@ struct draw_context *draw_create( struct pipe_context *pipe )
goto fail;
#if HAVE_LLVM
- lp_build_init();
- assert(lp_build_engine);
- draw->engine = lp_build_engine;
- draw->llvm = draw_llvm_create(draw);
+ if(draw_get_option_use_llvm())
+ {
+ lp_build_init();
+ assert(lp_build_engine);
+ draw->engine = lp_build_engine;
+ draw->llvm = draw_llvm_create(draw);
+ }
#endif
if (!draw_init(draw))
@@ -83,6 +106,8 @@ boolean draw_init(struct draw_context *draw)
ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
draw->nr_planes = 6;
+ draw->clip_xy = 1;
+ draw->clip_z = 1;
draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
@@ -135,7 +160,8 @@ void draw_destroy( struct draw_context *draw )
draw_vs_destroy( draw );
draw_gs_destroy( draw );
#ifdef HAVE_LLVM
- draw_llvm_destroy( draw->llvm );
+ if(draw->llvm)
+ draw_llvm_destroy( draw->llvm );
#endif
FREE( draw );
@@ -162,6 +188,14 @@ void draw_set_mrd(struct draw_context *draw, double mrd)
}
+static void update_clip_flags( struct draw_context *draw )
+{
+ draw->clip_xy = !draw->driver.bypass_clip_xy;
+ draw->clip_z = (!draw->driver.bypass_clip_z &&
+ !draw->depth_clamp);
+ draw->clip_user = (draw->nr_planes > 6);
+}
+
/**
* Register new primitive rasterization/rendering state.
* This causes the drawing pipeline to be rebuilt.
@@ -176,18 +210,25 @@ void draw_set_rasterizer_state( struct draw_context *draw,
draw->rasterizer = raster;
draw->rast_handle = rast_handle;
- draw->bypass_clipping = draw->driver.bypass_clipping;
- }
+ }
}
-
+/* With a little more work, llvmpipe will be able to turn this off and
+ * do its own x/y clipping.
+ *
+ * Some hardware can turn off clipping altogether - in particular any
+ * hardware with a TNL unit can do its own clipping, even if it is
+ * relying on the draw module for some other reason.
+ */
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping )
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z )
{
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->driver.bypass_clipping = bypass_clipping;
- draw->bypass_clipping = draw->driver.bypass_clipping;
+ draw->driver.bypass_clip_xy = bypass_clip_xy;
+ draw->driver.bypass_clip_z = bypass_clip_z;
+ update_clip_flags(draw);
}
@@ -217,6 +258,8 @@ void draw_set_clip_state( struct draw_context *draw,
memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
draw->nr_planes = 6 + clip->nr;
draw->depth_clamp = clip->depth_clamp;
+
+ update_clip_flags(draw);
}
@@ -472,47 +515,28 @@ void draw_set_render( struct draw_context *draw,
}
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements )
+draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = min_index;
- draw->pt.user.max_index = max_index;
+ if (ib)
+ memcpy(&draw->pt.index_buffer, ib, sizeof(draw->pt.index_buffer));
+ else
+ memset(&draw->pt.index_buffer, 0, sizeof(draw->pt.index_buffer));
}
+/**
+ * Tell drawing context where to find mapped index/element buffer.
+ */
void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements )
+draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements)
{
- draw->pt.user.elts = elements;
- draw->pt.user.eltSize = eltSize;
- draw->pt.user.eltBias = eltBias;
- draw->pt.user.min_index = 0;
- draw->pt.user.max_index = 0xffffffff;
+ draw->pt.user.elts = elements;
}
-
+
/* Revamp me please:
*/
void draw_do_flush( struct draw_context *draw, unsigned flags )
@@ -659,7 +683,8 @@ draw_set_mapped_texture(struct draw_context *draw,
const void *data[DRAW_MAX_TEXTURE_LEVELS])
{
#ifdef HAVE_LLVM
- draw_llvm_set_mapped_texture(draw,
+ if(draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
sampler_idx,
width, height, depth, last_level,
row_stride, img_stride, data);
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 116716af6f..4c780e4dcb 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -160,18 +160,11 @@ void draw_set_vertex_elements(struct draw_context *draw,
unsigned count,
const struct pipe_vertex_element *elements);
-void
-draw_set_mapped_element_buffer_range( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- unsigned min_index,
- unsigned max_index,
- const void *elements );
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize,
- int eltBias,
- const void *elements );
+void draw_set_index_buffer(struct draw_context *draw,
+ const struct pipe_index_buffer *ib);
+
+void draw_set_mapped_index_buffer(struct draw_context *draw,
+ const void *elements);
void draw_set_mapped_vertex_buffer(struct draw_context *draw,
unsigned attr, const void *buffer);
@@ -196,6 +189,9 @@ draw_set_so_state(struct draw_context *draw,
* draw_pt.c
*/
+void draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info);
+
void draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count);
@@ -216,7 +212,8 @@ void draw_set_render( struct draw_context *draw,
struct vbuf_render *render );
void draw_set_driver_clipping( struct draw_context *draw,
- boolean bypass_clipping );
+ boolean bypass_clip_xy,
+ boolean bypass_clip_z );
void draw_set_force_passthrough( struct draw_context *draw,
boolean enable );
diff --git a/src/gallium/auxiliary/draw/draw_decompose_tmp.h b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
index a52d2b5058..a142563af9 100644
--- a/src/gallium/auxiliary/draw/draw_decompose_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_decompose_tmp.h
@@ -54,10 +54,10 @@ FUNC(FUNC_VARS)
FUNC_ENTER;
- /* prim, count, and last_vertex_last should have been defined */
+ /* prim, prim_flags, count, and last_vertex_last should have been defined */
if (0) {
- debug_printf("%s: prim 0x%x, count %d, last_vertex_last %d\n",
- __FUNCTION__, prim, count, last_vertex_last);
+ debug_printf("%s: prim 0x%x, prim_flags 0x%x, count %d, last_vertex_last %d\n",
+ __FUNCTION__, prim, prim_flags, count, last_vertex_last);
}
switch (prim) {
@@ -80,7 +80,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_LOOP:
case PIPE_PRIM_LINE_STRIP:
if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = idx[1];
@@ -90,7 +90,7 @@ FUNC(FUNC_VARS)
LINE(flags, idx[0], idx[1]);
}
/* close the loop */
- if (prim == PIPE_PRIM_LINE_LOOP)
+ if (prim == PIPE_PRIM_LINE_LOOP && !prim_flags)
LINE(flags, idx[1], idx[2]);
}
break;
@@ -255,17 +255,23 @@ FUNC(FUNC_VARS)
if (last_vertex_last) {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_2 |
DRAW_PIPE_EDGE_FLAG_0);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_2;
+
edge_next = DRAW_PIPE_EDGE_FLAG_0;
- edge_finish = DRAW_PIPE_EDGE_FLAG_1;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_1;
}
else {
flags = (DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
DRAW_PIPE_EDGE_FLAG_1);
+ if (!(prim_flags & DRAW_SPLIT_BEFORE))
+ flags |= DRAW_PIPE_EDGE_FLAG_0;
+
edge_next = DRAW_PIPE_EDGE_FLAG_1;
- edge_finish = DRAW_PIPE_EDGE_FLAG_2;
+ edge_finish =
+ (prim_flags & DRAW_SPLIT_AFTER) ? 0 : DRAW_PIPE_EDGE_FLAG_2;
}
idx[0] = GET_ELT(0);
@@ -300,7 +306,7 @@ FUNC(FUNC_VARS)
case PIPE_PRIM_LINE_STRIP_ADJACENCY:
if (count >= 4) {
- flags = DRAW_PIPE_RESET_STIPPLE;
+ flags = (prim_flags & DRAW_SPLIT_BEFORE) ? 0 : DRAW_PIPE_RESET_STIPPLE;
idx[1] = GET_ELT(0);
idx[2] = GET_ELT(1);
idx[3] = GET_ELT(2);
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index 4a1013e79a..50a03ac95a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -380,7 +380,7 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
#define FUNC gs_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[idx] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[idx])
#include "draw_gs_tmp.h"
@@ -457,6 +457,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
output_prims->start = 0;
output_prims->count = shader->emitted_vertices;
output_prims->prim = shader->output_primitive;
+ output_prims->flags = 0x0;
output_prims->primitive_lengths = shader->primitive_lengths;
output_prims->primitive_count = shader->emitted_primitives;
output_verts->count = shader->emitted_vertices;
diff --git a/src/gallium/auxiliary/draw/draw_gs_tmp.h b/src/gallium/auxiliary/draw/draw_gs_tmp.h
index 4a17af0dea..de7b02655a 100644
--- a/src/gallium/auxiliary/draw/draw_gs_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_gs_tmp.h
@@ -6,12 +6,10 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = gs->draw; \
const unsigned prim = input_prims->prim; \
+ const unsigned prim_flags = input_prims->flags; \
const unsigned count = input_prims->count; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 8d53601d19..8759c38cab 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -210,13 +210,6 @@ draw_llvm_create(struct draw_context *draw)
{
struct draw_llvm *llvm;
-#ifdef PIPE_ARCH_X86
- util_cpu_detect();
- /* require SSE2 due to LLVM PR6960. */
- if (!util_cpu_caps.has_sse2)
- return NULL;
-#endif
-
llvm = CALLOC_STRUCT( draw_llvm );
if (!llvm)
return NULL;
@@ -292,15 +285,23 @@ draw_llvm_destroy(struct draw_llvm *llvm)
}
struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_inputs,
+ const struct draw_llvm_variant_key *key)
{
- struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
+ struct draw_llvm_variant *variant;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(llvm->draw->vs.vertex_shader);
+ variant = MALLOC(sizeof *variant +
+ shader->variant_key_size -
+ sizeof variant->key);
+ if (variant == NULL)
+ return NULL;
+
variant->llvm = llvm;
- draw_llvm_make_variant_key(llvm, &variant->key);
+ memcpy(&variant->key, key, shader->variant_key_size);
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
@@ -738,8 +739,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
- sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
- context_ptr);
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
#if DEBUG_STORE
lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
@@ -901,8 +903,9 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
/* code generated texture sampling */
- sampler = draw_llvm_sampler_soa_create(variant->key.sampler,
- context_ptr);
+ sampler = draw_llvm_sampler_soa_create(
+ draw_llvm_variant_key_samplers(&variant->key),
+ context_ptr);
fetch_max = LLVMBuildSub(builder, fetch_count,
LLVMConstInt(LLVMInt32Type(), 1, 0),
@@ -1002,35 +1005,42 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian
lp_func_delete_body(variant->function_elts);
}
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key)
+
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
{
unsigned i;
+ struct draw_llvm_variant_key *key;
+ struct lp_sampler_static_state *sampler;
- memset(key, 0, sizeof(struct draw_llvm_variant_key));
+ key = (struct draw_llvm_variant_key *)store;
+ /* Presumably all variants of the shader should have the same
+ * number of vertex elements - ie the number of shader inputs.
+ */
key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
+ /* All variants of this shader will have the same value for
+ * nr_samplers. Not yet trying to compact away holes in the
+ * sampler array.
+ */
+ key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+
+ sampler = draw_llvm_variant_key_samplers(key);
+
memcpy(key->vertex_element,
llvm->draw->pt.vertex_element,
sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
+
+ memset(sampler, 0, key->nr_samplers * sizeof *sampler);
- memcpy(&key->vs,
- &llvm->draw->vs.vertex_shader->state,
- sizeof(struct pipe_shader_state));
-
- /* if the driver implemented the sampling hooks then
- * setup our sampling state */
- if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) {
- for(i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; ++i) {
- struct draw_vertex_shader *shader = llvm->draw->vs.vertex_shader;
- if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
- lp_sampler_static_state(&key->sampler[i],
- llvm->draw->sampler_views[i],
- llvm->draw->samplers[i]);
- }
+ for (i = 0 ; i < key->nr_samplers; i++) {
+ lp_sampler_static_state(&sampler[i],
+ llvm->draw->sampler_views[i],
+ llvm->draw->samplers[i]);
}
+
+ return key;
}
void
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index 4addb47d2d..6196b2f983 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -151,12 +151,43 @@ typedef void
struct draw_llvm_variant_key
{
- struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_elements;
- struct pipe_shader_state vs;
- struct lp_sampler_static_state sampler[PIPE_MAX_VERTEX_SAMPLERS];
+ unsigned nr_vertex_elements:16;
+ unsigned nr_samplers:16;
+
+ /* Variable number of vertex elements:
+ */
+ struct pipe_vertex_element vertex_element[1];
+
+ /* Followed by variable number of samplers:
+ */
+/* struct lp_sampler_static_state sampler; */
};
+#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
+ (sizeof(struct draw_llvm_variant_key) + \
+ PIPE_MAX_VERTEX_SAMPLERS * sizeof(struct lp_sampler_static_state) + \
+ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+
+
+static INLINE size_t
+draw_llvm_variant_key_size(unsigned nr_vertex_elements,
+ unsigned nr_samplers)
+{
+ return (sizeof(struct draw_llvm_variant_key) +
+ nr_samplers * sizeof(struct lp_sampler_static_state) +
+ (nr_vertex_elements - 1) * sizeof(struct pipe_vertex_element));
+}
+
+
+static INLINE struct lp_sampler_static_state *
+draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
+{
+ return (struct lp_sampler_static_state *)
+ &key->vertex_element[key->nr_vertex_elements];
+}
+
+
+
struct draw_llvm_variant_list_item
{
struct draw_llvm_variant *base;
@@ -165,7 +196,6 @@ struct draw_llvm_variant_list_item
struct draw_llvm_variant
{
- struct draw_llvm_variant_key key;
LLVMValueRef function;
LLVMValueRef function_elts;
draw_jit_vert_func jit_func;
@@ -176,11 +206,16 @@ struct draw_llvm_variant
struct draw_llvm *llvm;
struct draw_llvm_variant_list_item list_item_global;
struct draw_llvm_variant_list_item list_item_local;
+
+ /* key is variable-sized, must be last */
+ struct draw_llvm_variant_key key;
+ /* key is variable-sized, must be last */
};
struct llvm_vertex_shader {
struct draw_vertex_shader base;
+ unsigned variant_key_size;
struct draw_llvm_variant_list_item variants;
unsigned variants_created;
unsigned variants_cached;
@@ -220,14 +255,15 @@ void
draw_llvm_destroy(struct draw_llvm *llvm);
struct draw_llvm_variant *
-draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs);
+draw_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_vertex_header_attribs,
+ const struct draw_llvm_variant_key *key);
void
draw_llvm_destroy_variant(struct draw_llvm_variant *variant);
-void
-draw_llvm_make_variant_key(struct draw_llvm *llvm,
- struct draw_llvm_variant_key *key);
+struct draw_llvm_variant_key *
+draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
LLVMValueRef
draw_llvm_translate_from(LLVMBuilderRef builder,
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 58995e0724..6206197dae 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -169,35 +169,27 @@ static void do_triangle( struct draw_context *draw,
/*
* Set up macros for draw_pt_decompose.h template code.
* This code uses vertex indexes / elements.
- *
- * Flags are needed by the stipple and unfilled stages. When the two stages
- * are active, vcache_run_extras is called and the flags are stored in the
- * higher bits of i0. Otherwise, flags do not matter.
*/
#define TRIANGLE(flags,i0,i1,i2) \
do { \
- assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
- assert(!((i2) & DRAW_PIPE_FLAG_MASK)); \
do_triangle( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ flags, \
+ verts + stride * (i0), \
verts + stride * (i1), \
verts + stride * (i2) ); \
} while (0)
#define LINE(flags,i0,i1) \
do { \
- assert(!((i1) & DRAW_PIPE_FLAG_MASK)); \
do_line( draw, \
- i0, /* flags */ \
- verts + stride * (i0 & ~DRAW_PIPE_FLAG_MASK), \
+ flags, \
+ verts + stride * (i0), \
verts + stride * (i1) ); \
} while (0)
#define POINT(i0) \
do { \
- assert(!((i0) & DRAW_PIPE_FLAG_MASK)); \
do_point( draw, verts + stride * (i0) ); \
} while (0)
@@ -207,6 +199,7 @@ static void do_triangle( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
const ushort *elts, \
@@ -245,22 +238,27 @@ void draw_pipeline_run( struct draw_context *draw,
const unsigned count = prim_info->primitive_lengths[i];
#if DEBUG
- /* make sure none of the element indexes go outside the vertex buffer */
+ /* Warn if one of the element indexes go outside the vertex buffer */
{
unsigned max_index = 0x0, i;
/* find the largest element index */
for (i = 0; i < count; i++) {
- unsigned int index = (prim_info->elts[start + i]
- & ~DRAW_PIPE_FLAG_MASK);
+ unsigned int index = prim_info->elts[start + i];
if (index > max_index)
max_index = index;
}
- assert(max_index <= vert_info->count);
+ if (max_index >= vert_info->count) {
+ debug_printf("%s: max_index (%u) outside vertex buffer (%u)\n",
+ __FUNCTION__,
+ max_index,
+ vert_info->count);
+ }
}
#endif
pipe_run_elts(draw,
prim_info->prim,
+ prim_info->flags,
vert_info->verts,
vert_info->stride,
prim_info->elts + start,
@@ -298,6 +296,7 @@ void draw_pipeline_run( struct draw_context *draw,
#define FUNC_VARS \
struct draw_context *draw, \
unsigned prim, \
+ unsigned prim_flags, \
struct vertex_header *vertices, \
unsigned stride, \
unsigned count
@@ -330,6 +329,7 @@ void draw_pipeline_run_linear( struct draw_context *draw,
pipe_run_linear(draw,
prim_info->prim,
+ prim_info->flags,
(struct vertex_header*)verts,
vert_info->stride,
count);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c
index eafa29276f..8b92543987 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -265,7 +265,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
/* Clip stage
*/
- if (!draw->bypass_clipping)
+ if (draw->clip_xy || draw->clip_z || draw->clip_user)
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index 3c93c9014a..58c5858734 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -353,9 +353,6 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf )
/* Allocate a new vertex buffer */
vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- /* even number */
- vbuf->max_vertices = vbuf->max_vertices & ~1;
-
if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID)
vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 397d4bf653..362f563ba6 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -140,8 +140,7 @@ struct draw_context
} middle;
struct {
- struct draw_pt_front_end *vcache;
- struct draw_pt_front_end *varray;
+ struct draw_pt_front_end *vsplit;
} front;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -150,6 +149,8 @@ struct draw_context
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned nr_vertex_elements;
+ struct pipe_index_buffer index_buffer;
+
/* user-space vertex data, buffers */
struct {
/** vertex element/index buffer (ex: glDrawElements) */
@@ -175,13 +176,19 @@ struct draw_context
} pt;
struct {
- boolean bypass_clipping;
- boolean bypass_vs;
+ boolean bypass_clip_xy;
+ boolean bypass_clip_z;
} driver;
boolean flushing; /**< debugging/sanity */
boolean suspend_flushing; /**< internally set */
- boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */
+
+ /* Flags set if API requires clipping in these planes and the
+ * driver doesn't indicate that it can do it for us.
+ */
+ boolean clip_xy;
+ boolean clip_z;
+ boolean clip_user;
boolean force_passthrough; /**< never clip or shade */
@@ -296,6 +303,10 @@ struct draw_vertex_info {
unsigned count;
};
+/* these flags are set if the primitive is a segment of a larger one */
+#define DRAW_SPLIT_BEFORE 0x1
+#define DRAW_SPLIT_AFTER 0x2
+
struct draw_prim_info {
boolean linear;
unsigned start;
@@ -304,6 +315,7 @@ struct draw_prim_info {
unsigned count;
unsigned prim;
+ unsigned flags;
unsigned *primitive_lengths;
unsigned primitive_count;
};
@@ -369,21 +381,15 @@ void draw_pipeline_destroy( struct draw_context *draw );
-/* We use the top few bits in the elts[] parameter to convey a little
- * API information. This limits the number of vertices we can address
- * to only 4096 -- if that becomes a problem, we can switch to 32-bit
- * draw indices.
- *
- * These flags expected at first vertex of lines & triangles when
- * unfilled and/or line stipple modes are operational.
+/*
+ * These flags are used by the pipeline when unfilled and/or line stipple modes
+ * are operational.
*/
-#define DRAW_PIPE_MAX_VERTICES (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12)
-#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12)
-#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12)
-#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12)
-#define DRAW_PIPE_RESET_STIPPLE (0x8<<12)
-#define DRAW_PIPE_FLAG_MASK (0xf<<12)
+#define DRAW_PIPE_EDGE_FLAG_0 0x1
+#define DRAW_PIPE_EDGE_FLAG_1 0x2
+#define DRAW_PIPE_EDGE_FLAG_2 0x4
+#define DRAW_PIPE_EDGE_FLAG_ALL 0x7
+#define DRAW_PIPE_RESET_STIPPLE 0x8
void draw_pipeline_run( struct draw_context *draw,
const struct draw_vertex_info *vert,
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 248927505d..f44bf2507c 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -39,25 +39,14 @@
#include "util/u_math.h"
#include "util/u_prim.h"
#include "util/u_format.h"
+#include "util/u_draw.h"
DEBUG_GET_ONCE_BOOL_OPTION(draw_fse, "DRAW_FSE", FALSE)
DEBUG_GET_ONCE_BOOL_OPTION(draw_no_fse, "DRAW_NO_FSE", FALSE)
-#ifdef HAVE_LLVM
-DEBUG_GET_ONCE_BOOL_OPTION(draw_use_llvm, "DRAW_USE_LLVM", TRUE)
-#endif
-
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- if (count < first)
- return 0;
- return count - (count - first) % incr;
-}
-
-
/* Overall we split things into:
- * - frontend -- prepare fetch_elts, draw_elts - eg vcache
+ * - frontend -- prepare fetch_elts, draw_elts - eg vsplit
* - middle -- fetch, shade, cliptest, viewport
* - pipeline -- the prim pipeline: clipping, wide lines, etc
* - backend -- the vbuf_render provided by the driver.
@@ -77,7 +66,7 @@ draw_pt_arrays(struct draw_context *draw,
{
unsigned first, incr;
draw_pt_split_prim(prim, &first, &incr);
- count = trim(count, first, incr);
+ count = draw_pt_trim_count(count, first, incr);
if (count < first)
return TRUE;
}
@@ -97,7 +86,9 @@ draw_pt_arrays(struct draw_context *draw,
opt |= PT_PIPELINE;
}
- if (!draw->bypass_clipping && !draw->pt.test_fse) {
+ if ((draw->clip_xy ||
+ draw->clip_z ||
+ draw->clip_user) && !draw->pt.test_fse) {
opt |= PT_CLIPTEST;
}
@@ -115,22 +106,11 @@ draw_pt_arrays(struct draw_context *draw,
middle = draw->pt.middle.general;
}
-
- /* Pick the right frontend
- */
- if (draw->pt.user.elts || (opt & PT_PIPELINE)) {
- frontend = draw->pt.front.vcache;
- } else {
- frontend = draw->pt.front.varray;
- }
+ frontend = draw->pt.front.vsplit;
frontend->prepare( frontend, prim, middle, opt );
- frontend->run(frontend,
- draw_pt_elt_func(draw),
- draw_pt_elt_ptr(draw, start),
- draw->pt.user.eltBias,
- count);
+ frontend->run(frontend, start, count);
frontend->finish( frontend );
@@ -143,12 +123,8 @@ boolean draw_pt_init( struct draw_context *draw )
draw->pt.test_fse = debug_get_option_draw_fse();
draw->pt.no_fse = debug_get_option_draw_no_fse();
- draw->pt.front.vcache = draw_pt_vcache( draw );
- if (!draw->pt.front.vcache)
- return FALSE;
-
- draw->pt.front.varray = draw_pt_varray(draw);
- if (!draw->pt.front.varray)
+ draw->pt.front.vsplit = draw_pt_vsplit(draw);
+ if (!draw->pt.front.vsplit)
return FALSE;
draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw );
@@ -164,7 +140,7 @@ boolean draw_pt_init( struct draw_context *draw )
return FALSE;
#if HAVE_LLVM
- if (debug_get_option_draw_use_llvm())
+ if (draw->llvm)
draw->pt.middle.llvm = draw_pt_fetch_pipeline_or_emit_llvm( draw );
#endif
@@ -194,14 +170,9 @@ void draw_pt_destroy( struct draw_context *draw )
draw->pt.middle.fetch_shade_emit = NULL;
}
- if (draw->pt.front.vcache) {
- draw->pt.front.vcache->destroy( draw->pt.front.vcache );
- draw->pt.front.vcache = NULL;
- }
-
- if (draw->pt.front.varray) {
- draw->pt.front.varray->destroy( draw->pt.front.varray );
- draw->pt.front.varray = NULL;
+ if (draw->pt.front.vsplit) {
+ draw->pt.front.vsplit->destroy( draw->pt.front.vsplit );
+ draw->pt.front.vsplit = NULL;
}
}
@@ -221,24 +192,29 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
uint ii = 0;
uint j;
- if (draw->pt.user.elts) {
+ if (draw->pt.user.eltSize) {
+ const char *elts;
+
/* indexed arrays */
+ elts = (const char *) draw->pt.user.elts;
+ elts += draw->pt.index_buffer.offset;
+
switch (draw->pt.user.eltSize) {
case 1:
{
- const ubyte *elem = (const ubyte *) draw->pt.user.elts;
+ const ubyte *elem = (const ubyte *) elts;
ii = elem[start + i];
}
break;
case 2:
{
- const ushort *elem = (const ushort *) draw->pt.user.elts;
+ const ushort *elem = (const ushort *) elts;
ii = elem[start + i];
}
break;
case 4:
{
- const uint *elem = (const uint *) draw->pt.user.elts;
+ const uint *elem = (const uint *) elts;
ii = elem[start + i];
}
break;
@@ -324,17 +300,8 @@ draw_arrays(struct draw_context *draw, unsigned prim,
/**
- * Draw vertex arrays.
- * This is the main entrypoint into the drawing module.
- * If drawing an indexed primitive, the draw_set_mapped_element_buffer_range()
- * function should have already been called to specify the element/index buffer
- * information.
- *
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
- * \param startInstance number for the first primitive instance (usually 0).
- * \param instanceCount number of instances to draw (1=non-instanced)
+ * Instanced drawing.
+ * \sa draw_vbo
*/
void
draw_arrays_instanced(struct draw_context *draw,
@@ -344,10 +311,50 @@ draw_arrays_instanced(struct draw_context *draw,
unsigned startInstance,
unsigned instanceCount)
{
- unsigned reduced_prim = u_reduced_prim(mode);
+ struct pipe_draw_info info;
+
+ util_draw_init_info(&info);
+
+ info.mode = mode;
+ info.start = start;
+ info.count = count;
+ info.start_instance = startInstance;
+ info.instance_count = instanceCount;
+
+ info.indexed = (draw->pt.user.elts != NULL);
+ if (!info.indexed) {
+ info.min_index = start;
+ info.max_index = start + count - 1;
+ }
+
+ draw_vbo(draw, &info);
+}
+
+
+/**
+ * Draw vertex arrays.
+ * This is the main entrypoint into the drawing module. If drawing an indexed
+ * primitive, the draw_set_index_buffer() and draw_set_mapped_index_buffer()
+ * functions should have already been called to specify the element/index
+ * buffer information.
+ */
+void
+draw_vbo(struct draw_context *draw,
+ const struct pipe_draw_info *info)
+{
+ unsigned reduced_prim = u_reduced_prim(info->mode);
unsigned instance;
- assert(instanceCount > 0);
+ assert(info->instance_count > 0);
+ if (info->indexed)
+ assert(draw->pt.user.elts);
+
+ draw->pt.user.eltSize =
+ (info->indexed) ? draw->pt.index_buffer.index_size : 0;
+
+ draw->pt.user.eltBias = info->index_bias;
+ draw->pt.user.min_index = info->min_index;
+ draw->pt.user.max_index = info->max_index;
if (reduced_prim != draw->reduced_prim) {
draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
@@ -355,8 +362,8 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
- debug_printf("draw_arrays(mode=%u start=%u count=%u):\n",
- mode, start, count);
+ debug_printf("draw_vbo(mode=%u start=%u count=%u):\n",
+ info->mode, info->start, info->count);
if (0)
tgsi_dump(draw->vs.vertex_shader->state.tokens, 0);
@@ -384,10 +391,10 @@ draw_arrays_instanced(struct draw_context *draw,
}
if (0)
- draw_print_arrays(draw, mode, start, MIN2(count, 20));
+ draw_print_arrays(draw, info->mode, info->start, MIN2(info->count, 20));
- for (instance = 0; instance < instanceCount; instance++) {
- draw->instance_id = instance + startInstance;
- draw_pt_arrays(draw, mode, start, count);
+ for (instance = 0; instance < info->instance_count; instance++) {
+ draw->instance_id = instance + info->start_instance;
+ draw_pt_arrays(draw, info->mode, info->start, info->count);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 44356fba4c..5fbb424291 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -35,8 +35,6 @@
#include "pipe/p_compiler.h"
-typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx );
-
struct draw_pt_middle_end;
struct draw_context;
struct draw_prim_info;
@@ -52,13 +50,18 @@ struct draw_vertex_info;
/* The "front end" - prepare sets of fetch, draw elements for the
* middle end.
*
- * Currenly one version of this:
- * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims
- * Later:
- * - varray, varray_split
- * - velement, velement_split
+ * The fetch elements are indices to the vertices. The draw elements are
+ * indices to the fetched vertices. When both arrays of elements are both
+ * linear, middle->run_linear is called; When only the fetch elements are
+ * linear, middle->run_linear_elts is called; Otherwise, middle->run is
+ * called.
+ *
+ * When the number of the draw elements exceeds max_vertex of the middle end,
+ * the draw elements (as well as the fetch elements) are splitted and the
+ * middle end is called multiple times.
*
- * Currenly only using the vcache version.
+ * Currenly there is:
+ * - vsplit - catchall implementation, splits big prims
*/
struct draw_pt_front_end {
void (*prepare)( struct draw_pt_front_end *,
@@ -67,9 +70,7 @@ struct draw_pt_front_end {
unsigned opt );
void (*run)( struct draw_pt_front_end *,
- pt_elt_func elt_func,
- const void *elt_ptr,
- int elt_bias,
+ unsigned start,
unsigned count );
void (*finish)( struct draw_pt_front_end * );
@@ -80,6 +81,8 @@ struct draw_pt_front_end {
/* The "middle end" - prepares actual hardware vertices for the
* hardware backend.
*
+ * prim_flags is as defined by pipe_draw_info::flags.
+ *
* Currently two versions of this:
* - fetch, vertex shade, cliptest, prim-pipeline
* - fetch, emit (ie passthrough)
@@ -94,11 +97,13 @@ struct draw_pt_middle_end {
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
void (*run_linear)(struct draw_pt_middle_end *,
unsigned start,
- unsigned count);
+ unsigned count,
+ unsigned prim_flags );
/* Transform all vertices in a linear range and then draw them with
* the supplied element list. May fail and return FALSE.
@@ -107,7 +112,8 @@ struct draw_pt_middle_end {
unsigned fetch_start,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count );
+ unsigned draw_count,
+ unsigned prim_flags );
int (*get_max_vertex_count)( struct draw_pt_middle_end * );
@@ -122,19 +128,11 @@ struct vbuf_render;
struct vertex_header;
-/* Helper functions.
- */
-pt_elt_func draw_pt_elt_func( struct draw_context *draw );
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start );
-
/* Frontends:
*
- * Currently only the general-purpose vcache implementation, could add
- * a special case for tiny vertex buffers.
+ * Currently only the general-purpose vsplit implementation.
*/
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw );
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw);
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw);
/* Middle-ends:
@@ -223,7 +221,9 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags );
@@ -237,6 +237,7 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
deleted file mode 100644
index 88f4d9f495..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "draw/draw_pt.h"
-#include "draw/draw_private.h"
-
-/* Neat get_elt func that also works for varrays drawing by encoding
- * the start value into a pointer.
- */
-
-static unsigned elt_uint( const void *elts, unsigned idx )
-{
- return *(((const uint *)elts) + idx);
-}
-
-static unsigned elt_ushort( const void *elts, unsigned idx )
-{
- return *(((const ushort *)elts) + idx);
-}
-
-static unsigned elt_ubyte( const void *elts, unsigned idx )
-{
- return *(((const ubyte *)elts) + idx);
-}
-
-static unsigned elt_vert( const void *elts, unsigned idx )
-{
- /* unsigned index is packed in the pointer */
- return (unsigned)(uintptr_t)elts + idx;
-}
-
-pt_elt_func draw_pt_elt_func( struct draw_context *draw )
-{
- switch (draw->pt.user.eltSize) {
- case 0: return &elt_vert;
- case 1: return &elt_ubyte;
- case 2: return &elt_ushort;
- case 4: return &elt_uint;
- default: return NULL;
- }
-}
-
-const void *draw_pt_elt_ptr( struct draw_context *draw,
- unsigned start )
-{
- const char *elts = draw->pt.user.elts;
-
- switch (draw->pt.user.eltSize) {
- case 0:
- return (const void *)(((const ubyte *)NULL) + start);
- case 1:
- return (const void *)(((const ubyte *)elts) + start);
- case 2:
- return (const void *)(((const ushort *)elts) + start);
- case 4:
- return (const void *)(((const uint *)elts) + start);
- default:
- return NULL;
- }
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index 5568fbb9f8..c8dfc16911 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -120,9 +120,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* even number */
- *max_vertices = *max_vertices & ~1;
}
@@ -147,11 +144,6 @@ void draw_pt_emit( struct pt_emit *emit,
if (vertex_count == 0)
return;
- if (vertex_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
@@ -226,9 +218,6 @@ void draw_pt_emit_linear(struct pt_emit *emit,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
*/
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
index 5c8af17c8e..e706b7796f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -191,15 +191,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
-
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
}
@@ -210,7 +201,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -220,11 +212,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID) {
- assert(0);
- return;
- }
-
draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)fetch_count );
@@ -273,7 +260,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -283,9 +271,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
@@ -334,7 +319,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle;
struct draw_context *draw = feme->draw;
@@ -344,9 +330,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)feme->translate->key.output_stride,
(ushort)count ))
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
index b8270280b6..7c198c6026 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c
@@ -102,7 +102,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
fse->key.nr_inputs); /* inputs - fetch from api format */
fse->key.viewport = !draw->identity_viewport;
- fse->key.clip = !draw->bypass_clipping;
+ fse->key.clip = draw->clip_xy || draw->clip_z || draw->clip_user;
fse->key.const_vbuffers = 0;
memset(fse->key.element, 0,
@@ -175,15 +175,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
*max_vertices = (draw->render->max_vertex_buffer_bytes /
(vinfo->size * 4));
- /* Return an even number of verts.
- * This prevents "parity" errors when splitting long triangle strips which
- * can lead to front/back culling mix-ups.
- * Every other triangle in a strip has an alternate front/back orientation
- * so splitting at an odd position can cause the orientation of subsequent
- * triangles to get reversed.
- */
- *max_vertices = *max_vertices & ~1;
-
/* Probably need to do this somewhere (or fix exec shader not to
* need it):
*/
@@ -197,7 +188,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle,
static void fse_run_linear( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count )
+ unsigned count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -207,9 +199,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
@@ -265,7 +254,8 @@ fse_run(struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -275,9 +265,6 @@ fse_run(struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (fetch_count >= UNDEFINED_VERTEX_ID)
- goto fail;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)fetch_count ))
@@ -327,7 +314,8 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle;
struct draw_context *draw = fse->draw;
@@ -337,9 +325,6 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle,
*/
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
- if (count >= UNDEFINED_VERTEX_ID)
- return FALSE;
-
if (!draw->render->allocate_vertices( draw->render,
(ushort)fse->key.output_stride,
(ushort)count ))
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5b16c3788e..b72fd61245 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -100,8 +100,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)draw->identity_viewport,
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -112,16 +114,13 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
gs_out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
- /* return even number */
- *max_vertices = *max_vertices & ~1;
-
/* No need to prepare the shader.
*/
vs->prepare(vs, draw);
@@ -295,7 +294,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -311,6 +311,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -320,7 +321,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -336,6 +338,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +351,8 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +368,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 4b99bee86a..77291e304e 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -66,7 +66,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
struct draw_context *draw = fpme->draw;
struct llvm_vertex_shader *shader =
llvm_vertex_shader(draw->vs.vertex_shader);
- struct draw_llvm_variant_key key;
+ char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE];
+ struct draw_llvm_variant_key *key;
struct draw_llvm_variant *variant = NULL;
struct draw_llvm_variant_list_item *li;
unsigned i;
@@ -106,8 +107,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
* but gl vs dx9 clip spaces.
*/
draw_pt_post_vs_prepare( fpme->post_vs,
- (boolean)draw->bypass_clipping,
- (boolean)(draw->identity_viewport),
+ draw->clip_xy,
+ draw->clip_z,
+ draw->clip_user,
+ draw->identity_viewport,
(boolean)draw->rasterizer->gl_rasterization_rules,
(draw->vs.edgeflag_output ? TRUE : FALSE) );
@@ -118,21 +121,21 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
out_prim,
max_vertices );
- *max_vertices = MAX2( *max_vertices,
- DRAW_PIPE_MAX_VERTICES );
+ *max_vertices = MAX2( *max_vertices, 4096 );
}
else {
- *max_vertices = DRAW_PIPE_MAX_VERTICES;
+ /* limit max fetches by limiting max_vertices */
+ *max_vertices = 4096;
}
/* return even number */
*max_vertices = *max_vertices & ~1;
-
- draw_llvm_make_variant_key(fpme->llvm, &key);
+
+ key = draw_llvm_make_variant_key(fpme->llvm, store);
li = first_elem(&shader->variants);
while(!at_end(&shader->variants, li)) {
- if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ if(memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
variant = li->base;
break;
}
@@ -155,7 +158,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
}
}
- variant = draw_llvm_create_variant(fpme->llvm, nr);
+ variant = draw_llvm_create_variant(fpme->llvm, nr, key);
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);
@@ -294,7 +297,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
const unsigned *fetch_elts,
unsigned fetch_count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -310,6 +314,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
@@ -319,7 +324,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle,
static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
unsigned start,
- unsigned count)
+ unsigned count,
+ unsigned prim_flags)
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -335,6 +341,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle,
prim_info.count = count;
prim_info.elts = NULL;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &count;
@@ -348,7 +355,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
unsigned start,
unsigned count,
const ushort *draw_elts,
- unsigned draw_count )
+ unsigned draw_count,
+ unsigned prim_flags )
{
struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle;
struct draw_fetch_info fetch_info;
@@ -364,6 +372,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle,
prim_info.count = draw_count;
prim_info.elts = draw_elts;
prim_info.prim = fpme->input_prim;
+ prim_info.flags = prim_flags;
prim_info.primitive_count = 1;
prim_info.primitive_lengths = &draw_count;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 308f927b77..769409cfd6 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -26,14 +26,26 @@
**************************************************************************/
#include "util/u_memory.h"
+#include "util/u_math.h"
#include "pipe/p_context.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
+
+#define DO_CLIP_XY 0x1
+#define DO_CLIP_FULL_Z 0x2
+#define DO_CLIP_HALF_Z 0x4
+#define DO_CLIP_USER 0x8
+#define DO_VIEWPORT 0x10
+#define DO_EDGEFLAG 0x20
+
+
struct pt_post_vs {
struct draw_context *draw;
+ unsigned flags;
+
boolean (*run)( struct pt_post_vs *pvs,
struct draw_vertex_info *info );
};
@@ -56,186 +68,47 @@ dot4(const float *a, const float *b)
a[3]*b[3]);
}
-static INLINE unsigned
-compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr,
- boolean clip_depth)
-{
- unsigned mask = 0x0;
- unsigned i;
+#define FLAGS (0)
+#define TAG(x) x##_none
+#include "draw_cliptest_tmp.h"
-#if 0
- debug_printf("compute clipmask %f %f %f %f\n",
- clip[0], clip[1], clip[2], clip[3]);
- assert(clip[3] != 0.0);
-#endif
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_viewport
+#include "draw_cliptest_tmp.h"
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= (1<<0);
- if ( clip[0] + clip[3] < 0) mask |= (1<<1);
- if (-clip[1] + clip[3] < 0) mask |= (1<<2);
- if ( clip[1] + clip[3] < 0) mask |= (1<<3);
- if (clip_depth) {
- if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */
- if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */
- }
+#define FLAGS (DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_xy_halfz_viewport
+#include "draw_cliptest_tmp.h"
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
+#define FLAGS (DO_CLIP_FULL_Z | DO_VIEWPORT)
+#define TAG(x) x##_fullz_viewport
+#include "draw_cliptest_tmp.h"
- return mask;
-}
+#define FLAGS (DO_CLIP_HALF_Z | DO_VIEWPORT)
+#define TAG(x) x##_halfz_viewport
+#include "draw_cliptest_tmp.h"
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT)
+#define TAG(x) x##_xy_fullz_user_viewport
+#include "draw_cliptest_tmp.h"
-/* The normal case - cliptest, rhw divide, viewport transform.
- *
- * Also handle identity viewport here at the expense of a few wasted
- * instructions
- */
-static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned clipped = 0;
- unsigned j;
-
- if (0) debug_printf("%s count, %d\n", __FUNCTION__, info->count);
-
- for (j = 0; j < info->count; j++) {
- float *position = out->data[pos];
-
- initialize_vertex_header(out);
-#if 0
- debug_printf("%d) io = %p, data = %p = [%f, %f, %f, %f]\n",
- j, out, position, position[0], position[1], position[2], position[3]);
-#endif
-
- out->clip[0] = position[0];
- out->clip[1] = position[1];
- out->clip[2] = position[2];
- out->clip[3] = position[3];
-
- out->vertex_id = 0xffff;
- /* Disable depth clipping if depth clamping is enabled. */
- out->clipmask = compute_clipmask_gl(out->clip,
- pvs->draw->plane,
- pvs->draw->nr_planes,
- !pvs->draw->depth_clamp);
- clipped += out->clipmask;
-
- if (out->clipmask == 0)
- {
- /* divide by w */
- float w = 1.0f / position[3];
-
- /* Viewport mapping */
- position[0] = position[0] * w * scale[0] + trans[0];
- position[1] = position[1] * w * scale[1] + trans[1];
- position[2] = position[2] * w * scale[2] + trans[2];
- position[3] = w;
-#if 0
- debug_printf("post viewport: %f %f %f %f\n",
- position[0],
- position[1],
- position[2],
- position[3]);
-#endif
- }
-
- out = (struct vertex_header *)( (char *)out + info->stride );
- }
-
- return clipped != 0;
-}
+#define FLAGS (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT | DO_EDGEFLAG)
+#define TAG(x) x##_xy_fullz_user_viewport_edgeflag
+#include "draw_cliptest_tmp.h"
-/* As above plus edgeflags
+/* Don't want to create 64 versions of this function, so catch the
+ * less common ones here. This is looking like something which should
+ * be code-generated, perhaps appended to the end of the vertex
+ * shader.
*/
-static boolean
-post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
- struct draw_vertex_info *info)
-{
- unsigned j;
- boolean needpipe;
-
- needpipe = post_vs_cliptest_viewport_gl(pvs, info);
-
- /* If present, copy edgeflag VS output into vertex header.
- * Otherwise, leave header as is.
- */
- if (pvs->draw->vs.edgeflag_output) {
- struct vertex_header *out = info->verts;
- int ef = pvs->draw->vs.edgeflag_output;
-
- for (j = 0; j < info->count; j++) {
- const float *edgeflag = out->data[ef];
- out->edgeflag = !(edgeflag[0] != 1.0f);
- needpipe |= !out->edgeflag;
- out = (struct vertex_header *)( (char *)out + info->stride );
- }
- }
- return needpipe;
-}
-
+#define FLAGS (pvs->flags)
+#define TAG(x) x##_generic
+#include "draw_cliptest_tmp.h"
-/* If bypass_clipping is set, skip cliptest and rhw divide.
- */
-static boolean post_vs_viewport( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- const float *scale = pvs->draw->viewport.scale;
- const float *trans = pvs->draw->viewport.translate;
- const unsigned pos = draw_current_shader_position_output(pvs->draw);
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- for (j = 0; j < info->count; j++) {
- float *position = out->data[pos];
-
- initialize_vertex_header(out);
- /* Viewport mapping only, no cliptest/rhw divide
- */
- position[0] = position[0] * scale[0] + trans[0];
- position[1] = position[1] * scale[1] + trans[1];
- position[2] = position[2] * scale[2] + trans[2];
-
- out = (struct vertex_header *)((char *)out + info->stride);
- }
-
- return FALSE;
-}
-
-
-/* If bypass_clipping is set and we have an identity viewport, nothing
- * to do.
- */
-static boolean post_vs_none( struct pt_post_vs *pvs,
- struct draw_vertex_info *info )
-{
- struct vertex_header *out = info->verts;
- unsigned j;
-
- if (0) debug_printf("%s\n", __FUNCTION__);
- /* just initialize the vertex_id in all headers */
- for (j = 0; j < info->count; j++) {
- initialize_vertex_header(out);
-
- out = (struct vertex_header *)((char *)out + info->stride);
- }
- return FALSE;
-}
-
boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
struct draw_vertex_info *info )
{
@@ -244,31 +117,72 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
- boolean bypass_clipping,
+ boolean clip_xy,
+ boolean clip_z,
+ boolean clip_user,
boolean bypass_viewport,
boolean opengl,
boolean need_edgeflags )
{
- if (!need_edgeflags) {
- if (bypass_clipping) {
- if (bypass_viewport)
- pvs->run = post_vs_none;
- else
- pvs->run = post_vs_viewport;
- }
- else {
- /* if (opengl) */
- pvs->run = post_vs_cliptest_viewport_gl;
- }
+ pvs->flags = 0;
+
+ if (clip_xy)
+ pvs->flags |= DO_CLIP_XY;
+
+ if (clip_z && opengl) {
+ pvs->flags |= DO_CLIP_FULL_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 1 );
+ }
+
+ if (clip_z && !opengl) {
+ pvs->flags |= DO_CLIP_HALF_Z;
+ ASSIGN_4V( pvs->draw->plane[4], 0, 0, 1, 0 );
}
- else {
- /* If we need to copy edgeflags to the vertex header, it should
- * mean we're running the primitive pipeline. Hence the bypass
- * flags should be false.
- */
- assert(!bypass_clipping);
- assert(!bypass_viewport);
- pvs->run = post_vs_cliptest_viewport_gl_edgeflag;
+
+ if (clip_user)
+ pvs->flags |= DO_CLIP_USER;
+
+ if (!bypass_viewport)
+ pvs->flags |= DO_VIEWPORT;
+
+ if (need_edgeflags)
+ pvs->flags |= DO_EDGEFLAG;
+
+ /* Now select the relevant function:
+ */
+ switch (pvs->flags) {
+ case 0:
+ pvs->run = do_cliptest_none;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_halfz_viewport;
+ break;
+
+ case DO_CLIP_FULL_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_fullz_viewport;
+ break;
+
+ case DO_CLIP_HALF_Z | DO_VIEWPORT:
+ pvs->run = do_cliptest_halfz_viewport;
+ break;
+
+ case DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER | DO_VIEWPORT:
+ pvs->run = do_cliptest_xy_fullz_user_viewport;
+ break;
+
+ case (DO_CLIP_XY | DO_CLIP_FULL_Z | DO_CLIP_USER |
+ DO_VIEWPORT | DO_EDGEFLAG):
+ pvs->run = do_cliptest_xy_fullz_user_viewport_edgeflag;
+ break;
+
+ default:
+ pvs->run = do_cliptest_generic;
+ break;
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index f7f4f24d35..c86bdd99a3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -225,7 +225,7 @@ static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2)
#define FUNC so_run_elts
#define LOCAL_VARS const ushort *elts = input_prims->elts;
-#define GET_ELT(idx) (elts[start + (idx)] & ~DRAW_PIPE_FLAG_MASK)
+#define GET_ELT(idx) (elts[start + (idx)])
#include "draw_so_emit_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 182a597cca..513bbbed21 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -92,3 +92,10 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
+
+unsigned draw_pt_trim_count(unsigned count, unsigned first, unsigned incr)
+{
+ if (count < first)
+ return 0;
+ return count - (count - first) % incr;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c
deleted file mode 100644
index cd7bb7bf25..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-#define FETCH_MAX 256
-#define DRAW_MAX (FETCH_MAX+8)
-
-struct varray_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned driver_fetch_max;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-};
-
-
-static void varray_flush_linear(struct varray_frontend *varray,
- unsigned start, unsigned count)
-{
- if (count) {
- assert(varray->middle->run_linear);
- varray->middle->run_linear(varray->middle, start, count);
- }
-}
-
-static void varray_line_loop_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count,
- boolean end )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 1) {
- unsigned nr = 0, i;
-
- for (i = 0; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- if (end)
- varray->fetch_elts[nr++] = start;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-static void varray_fan_segment(struct varray_frontend *varray,
- unsigned start,
- unsigned segment_start,
- unsigned segment_count )
-{
- assert(segment_count < varray->fetch_max);
- if (segment_count >= 2) {
- unsigned nr = 0, i;
-
- if (segment_start != 0)
- varray->fetch_elts[nr++] = start;
-
- for (i = 0 ; i < segment_count; i++)
- varray->fetch_elts[nr++] = start + segment_start + i;
-
- assert(nr <= FETCH_MAX);
-
- varray->middle->run(varray->middle,
- varray->fetch_elts,
- nr,
- varray->draw_elts, /* ie. linear */
- nr);
- }
-}
-
-
-
-
-#define FUNC varray_run
-#include "draw_pt_varray_tmp_linear.h"
-
-static unsigned decompose_prim[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY + 1] = {
- PIPE_PRIM_POINTS,
- PIPE_PRIM_LINES,
- PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */
- PIPE_PRIM_LINE_STRIP,
- PIPE_PRIM_TRIANGLES,
- PIPE_PRIM_TRIANGLE_STRIP,
- PIPE_PRIM_TRIANGLE_FAN,
- PIPE_PRIM_QUADS,
- PIPE_PRIM_QUAD_STRIP,
- PIPE_PRIM_POLYGON,
- PIPE_PRIM_LINES_ADJACENCY,
- PIPE_PRIM_LINE_STRIP_ADJACENCY,
- PIPE_PRIM_TRIANGLES_ADJACENCY,
- PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
-};
-
-
-
-static void varray_prepare(struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
-
- varray->base.run = varray_run;
-
- varray->input_prim = in_prim;
- assert(in_prim < Elements(decompose_prim));
- varray->output_prim = decompose_prim[in_prim];
-
- varray->middle = middle;
- middle->prepare(middle,
- varray->output_prim,
- opt, &varray->driver_fetch_max );
-
- /* check that the max is even */
- assert((varray->driver_fetch_max & 1) == 0);
-
- varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max);
-}
-
-
-
-
-static void varray_finish(struct draw_pt_front_end *frontend)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- varray->middle->finish(varray->middle);
- varray->middle = NULL;
-}
-
-static void varray_destroy(struct draw_pt_front_end *frontend)
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw)
-{
- ushort i;
- struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend);
- if (varray == NULL)
- return NULL;
-
- varray->base.prepare = varray_prepare;
- varray->base.run = NULL;
- varray->base.finish = varray_finish;
- varray->base.destroy = varray_destroy;
- varray->draw = draw;
-
- for (i = 0; i < DRAW_MAX; i++) {
- varray->draw_elts[i] = i;
- }
-
- return &varray->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
deleted file mode 100644
index 7c722457c3..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h
+++ /dev/null
@@ -1,238 +0,0 @@
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- struct draw_context *draw = varray->draw;
- unsigned start = (unsigned)elts;
-
- boolean flatfirst = (draw->rasterizer->flatshade &&
- draw->rasterizer->flatshade_first);
- unsigned i, j;
- ushort flags;
- unsigned first, incr;
-
- varray->fetch_start = start;
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i < end; i++) {
- POINT(varray, i + 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+1 < end; i += 2) {
- LINE(varray, DRAW_PIPE_RESET_STIPPLE,
- i + 0, i + 1);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- flags = DRAW_PIPE_RESET_STIPPLE;
-
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- LINE(varray, flags, i - 1, 0);
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- flags = DRAW_PIPE_RESET_STIPPLE;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 1; i < end; i++, flags = 0) {
- LINE(varray, flags, i - 1, i);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i += 3) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- if (flatfirst) {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0, i + 1 + (i&1), i + 2 - (i&1));
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- else {
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i + 2 < end; i++) {
- TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- i + 0 + (i&1), i + 1 - (i&1), i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- if (flatfirst) {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- else {
- flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++) {
- TRIANGLE(varray, flags, 0, i + 1, i + 2);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- }
- break;
-
- case PIPE_PRIM_QUADS:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 4) {
- QUAD(varray, i + 0, i + 1, i + 2, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+3 < end; i += 2) {
- QUAD(varray, i + 2, i + 0, i + 1, i + 3);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- if (j + first + i <= count) {
- varray->fetch_start -= 2;
- i -= 2;
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- {
- /* These bitflags look a little odd because we submit the
- * vertices as (1,2,0) to satisfy flatshade requirements.
- */
- const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
- const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
- const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
-
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
- for (j = 0; j + first <= count; j += i) {
- unsigned end = MIN2(FETCH_MAX, count - j);
- end -= (end % incr);
- for (i = 0; i+2 < end; i++, flags = edge_middle) {
-
- if (i + 3 == count)
- flags |= edge_last;
-
- TRIANGLE(varray, flags, i + 1, i + 2, 0);
- }
- i = end;
- fetch_init(varray, end);
- varray_flush(varray);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-
- varray_flush(varray);
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
deleted file mode 100644
index 55e43b2a71..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ /dev/null
@@ -1,103 +0,0 @@
-static unsigned trim( unsigned count, unsigned first, unsigned incr )
-{
- /*
- * count either has been trimmed in draw_pt_arrays or is set to
- * (driver)_fetch_max which is hopefully always larger than first.
- */
- assert(count >= first);
- return count - (count - first) % incr;
-}
-
-static void FUNC(struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned count)
-{
- struct varray_frontend *varray = (struct varray_frontend *)frontend;
- unsigned start = (unsigned) ((char *) elts - (char *) NULL);
-
- unsigned j;
- unsigned first, incr;
-
- assert(elt_bias == 0);
-
- draw_pt_split_prim(varray->input_prim, &first, &incr);
-
- /* Sanitize primitive length:
- */
- count = trim(count, first, incr);
- if (count < first)
- return;
-
-#if 0
- debug_printf("%s (%d) %d/%d\n", __FUNCTION__,
- varray->input_prim,
- start, count);
-#endif
-
- switch (varray->input_prim) {
- case PIPE_PRIM_POINTS:
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
- varray_flush_linear(varray, start + j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- /* Always have to decompose as we've stated that this will be
- * emitted as a line-strip.
- */
- for (j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_line_loop_segment(varray, start, j, nr, nr == remaining);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- break;
-
-
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count < varray->driver_fetch_max) {
- varray_flush_linear(varray, start, count);
- }
- else {
- for ( j = 0; j < count;) {
- unsigned remaining = count - j;
- unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr );
- varray_fan_segment(varray, start, j, nr);
- j += nr;
- if (nr != remaining)
- j -= (first - incr);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-#undef TRIANGLE
-#undef QUAD
-#undef POINT
-#undef LINE
-#undef FUNC
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
deleted file mode 100644
index a848b54f7d..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ /dev/null
@@ -1,610 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "util/u_memory.h"
-#include "util/u_prim.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-#include "draw/draw_pt.h"
-
-
-#define CACHE_MAX 256
-#define FETCH_MAX 256
-#define DRAW_MAX (16*1024)
-
-
-struct vcache_frontend {
- struct draw_pt_front_end base;
- struct draw_context *draw;
-
- unsigned in[CACHE_MAX];
- ushort out[CACHE_MAX];
-
- ushort draw_elts[DRAW_MAX];
- unsigned fetch_elts[FETCH_MAX];
-
- unsigned draw_count;
- unsigned fetch_count;
- unsigned fetch_max;
-
- struct draw_pt_middle_end *middle;
-
- unsigned input_prim;
- unsigned output_prim;
-
- unsigned middle_prim;
- unsigned opt;
-};
-
-
-static INLINE void
-vcache_flush( struct vcache_frontend *vcache )
-{
- if (vcache->middle_prim != vcache->output_prim) {
- vcache->middle_prim = vcache->output_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- if (vcache->draw_count) {
- vcache->middle->run( vcache->middle,
- vcache->fetch_elts,
- vcache->fetch_count,
- vcache->draw_elts,
- vcache->draw_count );
- }
-
- memset(vcache->in, ~0, sizeof(vcache->in));
- vcache->fetch_count = 0;
- vcache->draw_count = 0;
-}
-
-
-static INLINE void
-vcache_check_flush( struct vcache_frontend *vcache )
-{
- if (vcache->draw_count + 6 >= DRAW_MAX ||
- vcache->fetch_count + 6 >= FETCH_MAX) {
- vcache_flush( vcache );
- }
-}
-
-
-static INLINE void
-vcache_elt( struct vcache_frontend *vcache,
- unsigned felt,
- ushort flags )
-{
- unsigned idx = felt % CACHE_MAX;
-
- if (vcache->in[idx] != felt) {
- assert(vcache->fetch_count < FETCH_MAX);
-
- vcache->in[idx] = felt;
- vcache->out[idx] = (ushort)vcache->fetch_count;
- vcache->fetch_elts[vcache->fetch_count++] = felt;
- }
-
- vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags;
-}
-
-
-
-static INLINE void
-vcache_triangle( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line( struct vcache_frontend *vcache,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_flags( struct vcache_frontend *vcache,
- ushort flags,
- unsigned i0,
- unsigned i1 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_point( struct vcache_frontend *vcache,
- unsigned i0 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_line_adj( struct vcache_frontend *vcache,
- unsigned a0, unsigned i0, unsigned i1, unsigned a1 )
-{
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj_flags( struct vcache_frontend *vcache,
- unsigned flags,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, flags);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-static INLINE void
-vcache_triangle_adj( struct vcache_frontend *vcache,
- unsigned i0, unsigned a0,
- unsigned i1, unsigned a1,
- unsigned i2, unsigned a2 )
-{
- vcache_elt(vcache, i0, 0);
- vcache_elt(vcache, a0, 0);
- vcache_elt(vcache, i1, 0);
- vcache_elt(vcache, a1, 0);
- vcache_elt(vcache, i2, 0);
- vcache_elt(vcache, a2, 0);
- vcache_check_flush(vcache);
-}
-
-
-/* At least for now, we're back to using a template include file for
- * this. The two paths aren't too different though - it may be
- * possible to reunify them.
- */
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle_flags(vcache,flags,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line_flags(vcache,flags,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj_flags(vcache,flags,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj_flags(vcache,flags,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run_extras
-#include "draw_pt_vcache_tmp.h"
-
-#define TRIANGLE(flags,i0,i1,i2) vcache_triangle(vcache,i0,i1,i2)
-#define LINE(flags,i0,i1) vcache_line(vcache,i0,i1)
-#define POINT(i0) vcache_point(vcache,i0)
-#define LINE_ADJ(flags,a0,i0,i1,a1) \
- vcache_line_adj(vcache,a0,i0,i1,a1)
-#define TRIANGLE_ADJ(flags,i0,a0,i1,a1,i2,a2) \
- vcache_triangle_adj(vcache,i0,a0,i1,a1,i2,a2)
-#define FUNC vcache_run
-#include "draw_pt_vcache_tmp.h"
-
-static INLINE void
-rebase_uint_elts( const unsigned *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ushort_elts( const ushort *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-rebase_ubyte_elts( const ubyte *src,
- unsigned count,
- int delta,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i] + delta);
-}
-
-
-static INLINE void
-translate_uint_elts( const unsigned *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ushort_elts( const ushort *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-static INLINE void
-translate_ubyte_elts( const ubyte *src,
- unsigned count,
- ushort *dest )
-{
- unsigned i;
- for (i = 0; i < count; i++)
- dest[i] = (ushort)(src[i]);
-}
-
-
-
-
-#if 0
-static INLINE enum pipe_format
-format_from_get_elt( pt_elt_func get_elt )
-{
- switch (draw->pt.user.eltSize) {
- case 1: return PIPE_FORMAT_R8_UNORM;
- case 2: return PIPE_FORMAT_R16_UNORM;
- case 4: return PIPE_FORMAT_R32_UNORM;
- default: return PIPE_FORMAT_NONE;
- }
-}
-#endif
-
-
-/**
- * Check if any vertex attributes use instance divisors.
- * Note that instance divisors complicate vertex fetching so we need
- * to take the vcache path when they're in use.
- */
-static boolean
-any_instance_divisors(const struct draw_context *draw)
-{
- uint i;
-
- for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
- uint div = draw->pt.vertex_element[i].instance_divisor;
- if (div)
- return TRUE;
- }
- return FALSE;
-}
-
-
-static INLINE void
-vcache_check_run( struct draw_pt_front_end *frontend,
- pt_elt_func get_elt,
- const void *elts,
- int elt_bias,
- unsigned draw_count )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- struct draw_context *draw = vcache->draw;
- const unsigned min_index = draw->pt.user.min_index;
- const unsigned max_index = draw->pt.user.max_index;
- const unsigned index_size = draw->pt.user.eltSize;
- unsigned fetch_count;
- const ushort *transformed_elts;
- ushort *storage = NULL;
- boolean ok = FALSE;
-
- /* debug: verify indexes are in range [min_index, max_index] */
- if (0) {
- unsigned i;
- for (i = 0; i < draw_count; i++) {
- if (index_size == 1) {
- assert( ((const ubyte *) elts)[i] >= min_index);
- assert( ((const ubyte *) elts)[i] <= max_index);
- }
- else if (index_size == 2) {
- assert( ((const ushort *) elts)[i] >= min_index);
- assert( ((const ushort *) elts)[i] <= max_index);
- }
- else {
- assert(index_size == 4);
- assert( ((const uint *) elts)[i] >= min_index);
- assert( ((const uint *) elts)[i] <= max_index);
- }
- }
- }
-
- /* Note: max_index is frequently 0xffffffff so we have to be sure
- * that any arithmetic involving max_index doesn't overflow!
- */
- if (max_index >= (unsigned) DRAW_PIPE_MAX_VERTICES)
- goto fail;
-
- if (any_instance_divisors(draw))
- goto fail;
-
- fetch_count = max_index + 1 - min_index;
-
- if (0)
- debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count,
- vcache->fetch_max,
- draw_count);
-
- if (elt_bias + max_index >= DRAW_PIPE_MAX_VERTICES ||
- fetch_count >= UNDEFINED_VERTEX_ID ||
- fetch_count > draw_count) {
- if (0) debug_printf("fail\n");
- goto fail;
- }
-
- if (vcache->middle_prim != vcache->input_prim) {
- vcache->middle_prim = vcache->input_prim;
- vcache->middle->prepare( vcache->middle,
- vcache->middle_prim,
- vcache->opt,
- &vcache->fetch_max );
- }
-
- assert((elt_bias >= 0 && min_index + elt_bias >= min_index) ||
- (elt_bias < 0 && min_index + elt_bias < min_index));
-
- if (min_index == 0 &&
- index_size == 2) {
- transformed_elts = (const ushort *)elts;
- }
- else {
- storage = MALLOC( draw_count * sizeof(ushort) );
- if (!storage)
- goto fail;
-
- if (min_index == 0) {
- switch(index_size) {
- case 1:
- translate_ubyte_elts( (const ubyte *)elts,
- draw_count,
- storage );
- break;
-
- case 2:
- translate_ushort_elts( (const ushort *)elts,
- draw_count,
- storage );
- break;
-
- case 4:
- translate_uint_elts( (const uint *)elts,
- draw_count,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- else {
- switch(index_size) {
- case 1:
- rebase_ubyte_elts( (const ubyte *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 2:
- rebase_ushort_elts( (const ushort *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- case 4:
- rebase_uint_elts( (const uint *)elts,
- draw_count,
- 0 - (int)min_index,
- storage );
- break;
-
- default:
- assert(0);
- FREE(storage);
- return;
- }
- }
- transformed_elts = storage;
- }
-
- if (fetch_count < UNDEFINED_VERTEX_ID)
- ok = vcache->middle->run_linear_elts( vcache->middle,
- min_index + elt_bias, /* start */
- fetch_count,
- transformed_elts,
- draw_count );
-
- FREE(storage);
-
- if (ok)
- return;
-
- debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n",
- fetch_count, draw_count);
-
-fail:
- vcache_run( frontend, get_elt, elts, elt_bias, draw_count );
-}
-
-
-
-
-static void
-vcache_prepare( struct draw_pt_front_end *frontend,
- unsigned in_prim,
- struct draw_pt_middle_end *middle,
- unsigned opt )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
-
- if (opt & PT_PIPELINE) {
- vcache->base.run = vcache_run_extras;
- }
- else {
- vcache->base.run = vcache_check_run;
- }
-
- /* VCache will always emit the reduced version of its input
- * primitive, ie STRIP/FANS become TRIS, etc.
- *
- * This is not to be confused with what the GS might be up to,
- * which is a separate issue.
- */
- vcache->input_prim = in_prim;
- switch (in_prim) {
- case PIPE_PRIM_LINES_ADJACENCY:
- case PIPE_PRIM_LINE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_LINES_ADJACENCY;
- break;
- case PIPE_PRIM_TRIANGLES_ADJACENCY:
- case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
- vcache->output_prim = PIPE_PRIM_TRIANGLES_ADJACENCY;
- break;
- default:
- vcache->output_prim = u_reduced_prim(in_prim);
- }
-
- vcache->middle = middle;
- vcache->opt = opt;
-
- /* Have to run prepare here, but try and guess a good prim for
- * doing so:
- */
- vcache->middle_prim = (opt & PT_PIPELINE)
- ? vcache->output_prim : vcache->input_prim;
-
- middle->prepare( middle,
- vcache->middle_prim,
- opt, &vcache->fetch_max );
-}
-
-
-static void
-vcache_finish( struct draw_pt_front_end *frontend )
-{
- struct vcache_frontend *vcache = (struct vcache_frontend *)frontend;
- vcache->middle->finish( vcache->middle );
- vcache->middle = NULL;
-}
-
-
-static void
-vcache_destroy( struct draw_pt_front_end *frontend )
-{
- FREE(frontend);
-}
-
-
-struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw )
-{
- struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend );
- if (vcache == NULL)
- return NULL;
-
- vcache->base.prepare = vcache_prepare;
- vcache->base.run = NULL;
- vcache->base.finish = vcache_finish;
- vcache->base.destroy = vcache_destroy;
- vcache->draw = draw;
-
- memset(vcache->in, ~0, sizeof(vcache->in));
-
- return &vcache->base;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
deleted file mode 100644
index 1a3748d5f0..0000000000
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#define FUNC_VARS \
- struct draw_pt_front_end *frontend, \
- pt_elt_func get_elt, \
- const void *elts, \
- int elt_bias, \
- unsigned count
-
-#define LOCAL_VARS \
- struct vcache_frontend *vcache = (struct vcache_frontend *) frontend; \
- struct draw_context *draw = vcache->draw; \
- const unsigned prim = vcache->input_prim; \
- const boolean last_vertex_last = !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first);
-
-#define GET_ELT(idx) (get_elt(elts, idx) + elt_bias)
-
-#define FUNC_EXIT do { vcache_flush(vcache); } while (0)
-
-#include "draw_decompose_tmp.h"
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
new file mode 100644
index 0000000000..a687525309
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -0,0 +1,208 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_pt.h"
+
+#define SEGMENT_SIZE 1024
+#define MAP_SIZE 256
+
+struct vsplit_frontend {
+ struct draw_pt_front_end base;
+ struct draw_context *draw;
+
+ unsigned prim;
+
+ struct draw_pt_middle_end *middle;
+
+ unsigned max_vertices;
+ ushort segment_size;
+
+ /* buffers for splitting */
+ unsigned fetch_elts[SEGMENT_SIZE];
+ ushort draw_elts[SEGMENT_SIZE];
+ ushort identity_draw_elts[SEGMENT_SIZE];
+
+ struct {
+ /* map a fetch element to a draw element */
+ unsigned fetches[MAP_SIZE];
+ ushort draws[MAP_SIZE];
+ boolean has_max_fetch;
+
+ ushort num_fetch_elts;
+ ushort num_draw_elts;
+ } cache;
+};
+
+
+static void
+vsplit_clear_cache(struct vsplit_frontend *vsplit)
+{
+ memset(vsplit->cache.fetches, 0xff, sizeof(vsplit->cache.fetches));
+ vsplit->cache.has_max_fetch = FALSE;
+ vsplit->cache.num_fetch_elts = 0;
+ vsplit->cache.num_draw_elts = 0;
+}
+
+static void
+vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
+{
+ vsplit->middle->run(vsplit->middle,
+ vsplit->fetch_elts, vsplit->cache.num_fetch_elts,
+ vsplit->draw_elts, vsplit->cache.num_draw_elts, flags);
+}
+
+/**
+ * Add a fetch element and add it to the draw elements.
+ */
+static INLINE void
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ unsigned hash = fetch % MAP_SIZE;
+
+ if (vsplit->cache.fetches[hash] != fetch) {
+ /* update cache */
+ vsplit->cache.fetches[hash] = fetch;
+ vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
+
+ /* add fetch */
+ assert(vsplit->cache.num_fetch_elts < vsplit->segment_size);
+ vsplit->fetch_elts[vsplit->cache.num_fetch_elts++] = fetch;
+ }
+
+ vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
+}
+
+
+/**
+ * Add a fetch element and add it to the draw elements. The fetch element is
+ * in full range (uint).
+ */
+static INLINE void
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
+{
+ /* special care for 0xffffffff */
+ if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
+ unsigned hash = fetch % MAP_SIZE;
+ vsplit->cache.fetches[hash] = fetch - 1; /* force update */
+ vsplit->cache.has_max_fetch = TRUE;
+ }
+
+ vsplit_add_cache(vsplit, fetch);
+}
+
+
+#define FUNC vsplit_run_linear
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ubyte
+#define ELT_TYPE ubyte
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_ushort
+#define ELT_TYPE ushort
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+#define FUNC vsplit_run_uint
+#define ELT_TYPE uint
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
+#include "draw_pt_vsplit_tmp.h"
+
+
+static void vsplit_prepare(struct draw_pt_front_end *frontend,
+ unsigned in_prim,
+ struct draw_pt_middle_end *middle,
+ unsigned opt)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+
+ switch (vsplit->draw->pt.user.eltSize) {
+ case 0:
+ vsplit->base.run = vsplit_run_linear;
+ break;
+ case 1:
+ vsplit->base.run = vsplit_run_ubyte;
+ break;
+ case 2:
+ vsplit->base.run = vsplit_run_ushort;
+ break;
+ case 4:
+ vsplit->base.run = vsplit_run_uint;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* split only */
+ vsplit->prim = in_prim;
+
+ vsplit->middle = middle;
+ middle->prepare(middle, vsplit->prim, opt, &vsplit->max_vertices);
+
+ vsplit->segment_size = MIN2(SEGMENT_SIZE, vsplit->max_vertices);
+}
+
+
+static void vsplit_finish(struct draw_pt_front_end *frontend)
+{
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend;
+ vsplit->middle->finish(vsplit->middle);
+ vsplit->middle = NULL;
+}
+
+
+static void vsplit_destroy(struct draw_pt_front_end *frontend)
+{
+ FREE(frontend);
+}
+
+
+struct draw_pt_front_end *draw_pt_vsplit(struct draw_context *draw)
+{
+ struct vsplit_frontend *vsplit = CALLOC_STRUCT(vsplit_frontend);
+ ushort i;
+
+ if (!vsplit)
+ return NULL;
+
+ vsplit->base.prepare = vsplit_prepare;
+ vsplit->base.run = NULL;
+ vsplit->base.finish = vsplit_finish;
+ vsplit->base.destroy = vsplit_destroy;
+ vsplit->draw = draw;
+
+ for (i = 0; i < SEGMENT_SIZE; i++)
+ vsplit->identity_draw_elts[i] = i;
+
+ return &vsplit->base;
+}
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
new file mode 100644
index 0000000000..3f66f962e1
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -0,0 +1,309 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#define CONCAT2(name, elt_type) name ## elt_type
+#define CONCAT(name, elt_type) CONCAT2(name, elt_type)
+
+#ifdef ELT_TYPE
+
+/**
+ * Fetch all elements in [min_index, max_index] with bias, and use the
+ * (rebased) index buffer as the draw elements.
+ */
+static boolean
+CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned istart, unsigned icount)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const unsigned min_index = draw->pt.user.min_index;
+ const unsigned max_index = draw->pt.user.max_index;
+ const int elt_bias = draw->pt.user.eltBias;
+ unsigned fetch_start, fetch_count;
+ const ushort *draw_elts = NULL;
+ unsigned i;
+
+ /* use the ib directly */
+ if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
+ if (icount > vsplit->max_vertices)
+ return FALSE;
+
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+ assert(idx >= min_index && idx <= max_index);
+ }
+ draw_elts = (const ushort *) ib;
+ }
+ else {
+ /* have to go through vsplit->draw_elts */
+ if (icount > vsplit->segment_size)
+ return FALSE;
+ }
+
+ /* this is faster only when we fetch less elements than the normal path */
+ if (max_index - min_index > icount - 1)
+ return FALSE;
+
+ if (elt_bias < 0 && min_index < -elt_bias)
+ return FALSE;
+
+ /* why this check? */
+ for (i = 0; i < draw->pt.nr_vertex_elements; i++) {
+ if (draw->pt.vertex_element[i].instance_divisor)
+ return FALSE;
+ }
+
+ fetch_start = min_index + elt_bias;
+ fetch_count = max_index - min_index + 1;
+
+ if (!draw_elts) {
+ if (min_index == 0) {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) idx;
+ }
+ }
+ else {
+ for (i = 0; i < icount; i++) {
+ ELT_TYPE idx = ib[istart + i];
+
+ assert(idx >= min_index && idx <= max_index);
+ vsplit->draw_elts[i] = (ushort) (idx - min_index);
+ }
+ }
+
+ draw_elts = vsplit->draw_elts;
+ }
+
+ return vsplit->middle->run_linear_elts(vsplit->middle,
+ fetch_start, fetch_count,
+ draw_elts, icount, 0x0);
+}
+
+/**
+ * Use the cache to prepare the fetch and draw elements, and flush.
+ *
+ * When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
+ * appended.
+ */
+static INLINE void
+CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart, unsigned icount,
+ boolean spoken, unsigned ispoken,
+ boolean close, unsigned iclose)
+{
+ struct draw_context *draw = vsplit->draw;
+ const ELT_TYPE *ib = (const ELT_TYPE *)
+ ((const char *) draw->pt.user.elts + draw->pt.index_buffer.offset);
+ const int ibias = draw->pt.user.eltBias;
+ unsigned i;
+
+ assert(icount + !!close <= vsplit->segment_size);
+
+ vsplit_clear_cache(vsplit);
+
+ spoken = !!spoken;
+ if (ibias == 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, ib[ispoken]);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, ib[istart + i]);
+
+ if (close)
+ ADD_CACHE(vsplit, ib[iclose]);
+ }
+ else if (ibias > 0) {
+ if (spoken)
+ ADD_CACHE(vsplit, (uint) ib[ispoken] + ibias);
+
+ for (i = spoken; i < icount; i++)
+ ADD_CACHE(vsplit, (uint) ib[istart + i] + ibias);
+
+ if (close)
+ ADD_CACHE(vsplit, (uint) ib[iclose] + ibias);
+ }
+ else {
+ if (spoken) {
+ if (ib[ispoken] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[ispoken] + ibias);
+ }
+
+ for (i = spoken; i < icount; i++) {
+ if (ib[istart + i] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[istart + i] + ibias);
+ }
+
+ if (close) {
+ if (ib[iclose] < -ibias)
+ return;
+ ADD_CACHE(vsplit, ib[iclose] + ibias);
+ }
+ }
+
+ vsplit_flush_cache(vsplit, flags);
+}
+
+static void
+CONCAT(vsplit_segment_simple_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount)
+{
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, FALSE, 0);
+}
+
+static void
+CONCAT(vsplit_segment_loop_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean close_loop = ((flags) == DRAW_SPLIT_BEFORE);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, FALSE, 0, close_loop, i0);
+}
+
+static void
+CONCAT(vsplit_segment_fan_, ELT_TYPE)(struct vsplit_frontend *vsplit,
+ unsigned flags,
+ unsigned istart,
+ unsigned icount,
+ unsigned i0)
+{
+ const boolean use_spoken = (((flags) & DRAW_SPLIT_BEFORE) != 0);
+
+ CONCAT(vsplit_segment_cache_, ELT_TYPE)(vsplit,
+ flags, istart, icount, use_spoken, i0, FALSE, 0);
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->segment_size; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) \
+ CONCAT(vsplit_primitive_, ELT_TYPE)(vsplit, istart, icount)
+
+#else /* ELT_TYPE */
+
+static void
+vsplit_segment_simple_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount)
+{
+ assert(icount <= vsplit->max_vertices);
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+}
+
+static void
+vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean close_loop = (flags == DRAW_SPLIT_BEFORE);
+ unsigned nr;
+
+ assert(icount + !!close_loop <= vsplit->segment_size);
+
+ if (close_loop) {
+ for (nr = 0; nr < icount; nr++)
+ vsplit->fetch_elts[nr] = istart + nr;
+ vsplit->fetch_elts[nr++] = i0;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+static void
+vsplit_segment_fan_linear(struct vsplit_frontend *vsplit, unsigned flags,
+ unsigned istart, unsigned icount, unsigned i0)
+{
+ boolean use_spoken = ((flags & DRAW_SPLIT_BEFORE) != 0);
+ unsigned nr = 0, i;
+
+ assert(icount + !!use_spoken <= vsplit->segment_size);
+
+ if (use_spoken) {
+ vsplit->fetch_elts[nr++] = i0;
+ for (i = 1 ; i < icount; i++)
+ vsplit->fetch_elts[nr++] = istart + i;
+
+ vsplit->middle->run(vsplit->middle, vsplit->fetch_elts, nr,
+ vsplit->identity_draw_elts, nr, flags);
+ }
+ else {
+ vsplit->middle->run_linear(vsplit->middle, istart, icount, flags);
+ }
+}
+
+#define LOCAL_VARS \
+ struct vsplit_frontend *vsplit = (struct vsplit_frontend *) frontend; \
+ const unsigned prim = vsplit->prim; \
+ const unsigned max_count_simple = vsplit->max_vertices; \
+ const unsigned max_count_loop = vsplit->segment_size - 1; \
+ const unsigned max_count_fan = vsplit->segment_size;
+
+#define PRIMITIVE(istart, icount) FALSE
+
+#define ELT_TYPE linear
+
+#endif /* ELT_TYPE */
+
+#define FUNC_VARS \
+ struct draw_pt_front_end *frontend, \
+ unsigned start, \
+ unsigned count
+
+#define SEGMENT_SIMPLE(flags, istart, icount) \
+ CONCAT(vsplit_segment_simple_, ELT_TYPE)(vsplit, flags, istart, icount)
+
+#define SEGMENT_LOOP(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_loop_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#define SEGMENT_FAN(flags, istart, icount, i0) \
+ CONCAT(vsplit_segment_fan_, ELT_TYPE)(vsplit, flags, istart, icount, i0)
+
+#include "draw_split_tmp.h"
+
+#undef CONCAT2
+#undef CONCAT
+
+#undef ELT_TYPE
+#undef ADD_CACHE
diff --git a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
index 6d8937a0b4..7fafde9d5e 100644
--- a/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_so_emit_tmp.h
@@ -7,11 +7,9 @@
#define FUNC_ENTER \
/* declare more local vars */ \
- struct draw_context *draw = so->draw; \
const unsigned prim = input_prims->prim; \
- const boolean last_vertex_last = \
- !(draw->rasterizer->flatshade && \
- draw->rasterizer->flatshade_first); \
+ const unsigned prim_flags = input_prims->flags; \
+ const boolean last_vertex_last = TRUE; \
do { \
debug_assert(input_prims->primitive_count == 1); \
switch (prim) { \
diff --git a/src/gallium/auxiliary/draw/draw_split_tmp.h b/src/gallium/auxiliary/draw/draw_split_tmp.h
new file mode 100644
index 0000000000..47defc62b9
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_split_tmp.h
@@ -0,0 +1,176 @@
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.9
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (C) 2010 LunarG Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+static void
+FUNC(FUNC_VARS)
+{
+ unsigned first, incr;
+ LOCAL_VARS
+
+ /*
+ * prim, start, count, and max_count_{simple,loop,fan} should have been
+ * defined
+ */
+ if (0) {
+ debug_printf("%s: prim 0x%x, start %d, count %d, max_count_simple %d, "
+ "max_count_loop %d, max_count_fan %d\n",
+ __FUNCTION__, prim, start, count, max_count_simple,
+ max_count_loop, max_count_fan);
+ }
+
+ draw_pt_split_prim(prim, &first, &incr);
+ /* sanitize primitive length */
+ count = draw_pt_trim_count(count, first, incr);
+ if (count < first)
+ return;
+
+ /* try flushing the entire primitive */
+ if (PRIMITIVE(start, count))
+ return;
+
+ /* must be able to at least flush two complete primitives */
+ assert(max_count_simple >= first + incr &&
+ max_count_loop >= first + incr &&
+ max_count_fan >= first + incr);
+
+ /* no splitting required */
+ if (count <= max_count_simple) {
+ SEGMENT_SIMPLE(0x0, start, count);
+ }
+ else {
+ const unsigned rollback = first - incr;
+ unsigned flags = DRAW_SPLIT_AFTER, seg_start = 0, seg_max;
+
+ /*
+ * Both count and seg_max below are explicitly trimmed. Because
+ *
+ * seg_start = N * (seg_max - rollback) = N' * incr,
+ *
+ * we have
+ *
+ * remaining = count - seg_start = first + N'' * incr.
+ *
+ * That is, remaining is implicitly trimmed.
+ */
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ case PIPE_PRIM_LINES:
+ case PIPE_PRIM_LINE_STRIP:
+ case PIPE_PRIM_TRIANGLES:
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_LINES_ADJACENCY:
+ case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+ case PIPE_PRIM_TRIANGLES_ADJACENCY:
+ case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_simple, count), first, incr);
+ if (prim == PIPE_PRIM_TRIANGLE_STRIP ||
+ prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY) {
+ /* make sure we flush even number of triangles at a time */
+ if (seg_max < count && !(((seg_max - first) / incr) & 1))
+ seg_max -= incr;
+ }
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_SIMPLE(flags, start + seg_start, seg_max);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_SIMPLE(flags, start + seg_start, remaining);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_loop, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_LOOP(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_LOOP(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ seg_max =
+ draw_pt_trim_count(MIN2(max_count_fan, count), first, incr);
+
+ do {
+ const unsigned remaining = count - seg_start;
+
+ if (remaining > seg_max) {
+ SEGMENT_FAN(flags, start + seg_start, seg_max, start);
+ seg_start += seg_max - rollback;
+
+ flags |= DRAW_SPLIT_BEFORE;
+ }
+ else {
+ flags &= ~DRAW_SPLIT_AFTER;
+
+ SEGMENT_FAN(flags, start + seg_start, remaining, start);
+ seg_start += remaining;
+ }
+ } while (seg_start < count);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+#undef FUNC
+#undef FUNC_VARS
+#undef LOCAL_VARS
+
+#undef PRIMITIVE
+#undef SEGMENT_SIMPLE
+#undef SEGMENT_LOOP
+#undef SEGMENT_FAN
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index d13ad24fff..fa9992db78 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -28,6 +28,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
#include "draw_private.h"
#include "draw_context.h"
@@ -109,6 +110,11 @@ draw_create_vs_llvm(struct draw_context *draw,
tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->variant_key_size =
+ draw_llvm_variant_key_size(
+ vs->base.info.file_max[TGSI_FILE_INPUT]+1,
+ vs->base.info.file_max[TGSI_FILE_SAMPLER]+1);
+
vs->base.draw = draw;
vs->base.prepare = vs_llvm_prepare;
vs->base.run_linear = vs_llvm_run_linear;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 7b35dd4bb4..e0d30be98d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -59,14 +59,6 @@
#include "lp_bld_arit.h"
-/*
- * XXX: Increasing eliminates some artifacts, but adds others, most
- * noticeably corruption in the Earth halo in Google Earth.
- */
-#define RCP_NEWTON_STEPS 0
-
-#define RSQRT_NEWTON_STEPS 0
-
#define EXP_POLY_DEGREE 3
#define LOG_POLY_DEGREE 5
@@ -267,7 +259,7 @@ lp_build_add(struct lp_build_context *bld,
}
-/** Return the sum of the elements of a */
+/** Return the scalar sum of the elements of a */
LLVMValueRef
lp_build_sum_vector(struct lp_build_context *bld,
LLVMValueRef a)
@@ -278,11 +270,9 @@ lp_build_sum_vector(struct lp_build_context *bld,
assert(lp_check_value(type, a));
- if (a == bld->zero)
- return bld->zero;
- if (a == bld->undef)
- return bld->undef;
- assert(type.length > 1);
+ if (type.length == 1) {
+ return a;
+ }
assert(!bld->type.norm);
@@ -546,7 +536,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
if(b == 2 && bld->type.floating)
return lp_build_add(bld, a, a);
- if(util_is_pot(b)) {
+ if(util_is_power_of_two(b)) {
unsigned shift = ffs(b) - 1;
if(bld->type.floating) {
@@ -1266,6 +1256,11 @@ lp_build_sqrt(struct lp_build_context *bld,
*
* x_{i+1} = x_i * (2 - a * x_i)
*
+ * XXX: Unfortunately this won't give IEEE-754 conformant results for 0 or
+ * +/-Inf, giving NaN instead. Certain applications rely on this behavior,
+ * such as Google Earth, which does RCP(RSQRT(0.0) when drawing the Earth's
+ * halo. It would be necessary to clamp the argument to prevent this.
+ *
* See also:
* - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
* - http://softwarecommunity.intel.com/articles/eng/1818.htm
@@ -1306,13 +1301,27 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ /*
+ * We don't use RCPPS because:
+ * - it only has 10bits of precision
+ * - it doesn't even get the reciprocate of 1.0 exactly
+ * - doing Newton-Rapshon steps yields wrong (NaN) values for 0.0 or Inf
+ * - for recent processors the benefit over DIVPS is marginal, a case
+ * depedent
+ *
+ * We could still use it on certain processors if benchmarks show that the
+ * RCPPS plus necessary workarounds are still preferrable to DIVPS; or for
+ * particular uses that require less workarounds.
+ */
+
+ if (FALSE && util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", bld->vec_type, a);
- for (i = 0; i < RCP_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rcp_refine(bld, a, res);
}
@@ -1363,13 +1372,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
- if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ if (util_cpu_caps.has_sse && type.width == 32 && type.length == 4) {
+ const unsigned num_iterations = 0;
LLVMValueRef res;
unsigned i;
res = lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", bld->vec_type, a);
- for (i = 0; i < RSQRT_NEWTON_STEPS; ++i) {
+ for (i = 0; i < num_iterations; ++i) {
res = lp_build_rsqrt_refine(bld, a, res);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.c b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
index 39dfc51e50..d3a5afff8c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.c
@@ -46,7 +46,7 @@
boolean
lp_check_alignment(const void *ptr, unsigned alignment)
{
- assert(util_is_pot(alignment));
+ assert(util_is_power_of_two(alignment));
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index 247cb83ce6..92123e09d3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -388,7 +388,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
if (format_matches_type(format_desc, type) &&
format_desc->block.bits <= type.width * 4 &&
- util_is_pot(format_desc->block.bits)) {
+ util_is_power_of_two(format_desc->block.bits)) {
LLVMValueRef packed;
/*
@@ -416,7 +416,7 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
- util_is_pot(format_desc->block.bits) &&
+ util_is_power_of_two(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
!format_desc->is_mixed &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 6d5410d970..48baf7c425 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -40,6 +40,7 @@
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JITEventListener.h>
#include <llvm/Support/CommandLine.h>
+#include <llvm/Support/PrettyStackTrace.h>
#include "pipe/p_config.h"
#include "util/u_debug.h"
@@ -143,7 +144,6 @@ lp_set_target_options(void)
llvm::UnsafeFPMath = true;
#endif
-#if 0
/*
* LLVM will generate MMX instructions for vectors <= 64 bits, leading to
* innefficient code, and in 32bit systems, to the corruption of the FPU
@@ -152,10 +152,8 @@ lp_set_target_options(void)
* See also:
* - http://llvm.org/bugs/show_bug.cgi?id=3287
* - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
- *
- * XXX: Unfortunately this is not working.
*/
- static boolean first = FALSE;
+ static boolean first = TRUE;
if (first) {
static const char* options[] = {
"prog",
@@ -164,7 +162,13 @@ lp_set_target_options(void)
llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
first = FALSE;
}
-#endif
+
+ /*
+ * By default LLVM adds a signal handler to output a pretty stack trace.
+ * This signal handler is never removed, causing problems when unloading the
+ * shared object where the gallium driver resides.
+ */
+ llvm::DisablePrettyStackTrace = true;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
index e470082b97..e947b90d16 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -37,6 +37,8 @@
#define LP_BLD_PACK_H
+#include "pipe/p_compiler.h"
+
#include "gallivm/lp_bld.h"
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index 0fd014ab9b..259b1142e3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -82,9 +82,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
state->swizzle_a = view->swizzle_a;
state->target = texture->target;
- state->pot_width = util_is_pot(texture->width0);
- state->pot_height = util_is_pot(texture->height0);
- state->pot_depth = util_is_pot(texture->depth0);
+ state->pot_width = util_is_power_of_two(texture->width0);
+ state->pot_height = util_is_power_of_two(texture->height0);
+ state->pot_depth = util_is_power_of_two(texture->depth0);
state->wrap_s = sampler->wrap_s;
state->wrap_t = sampler->wrap_t;
@@ -124,6 +124,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
/**
+ * Compute the partial offset of a pixel block along an arbitrary axis.
+ *
+ * @param coord coordinate in pixels
+ * @param stride number of bytes between rows of successive pixel blocks
+ * @param block_length number of pixels in a pixels block along the coordinate
+ * axis
+ * @param out_offset resulting relative offset of the pixel block in bytes
+ * @param out_subcoord resulting sub-block pixel coordinate
+ */
+void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_subcoord)
+{
+ LLVMValueRef offset;
+ LLVMValueRef subcoord;
+
+ if (block_length == 1) {
+ subcoord = bld->zero;
+ }
+ else {
+ /*
+ * Pixel blocks have power of two dimensions. LLVM should convert the
+ * rem/div to bit arithmetic.
+ * TODO: Verify this.
+ */
+
+ LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
+ subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
+ coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
+ }
+
+ offset = lp_build_mul(bld, coord, stride);
+
+ assert(out_offset);
+ assert(out_subcoord);
+
+ *out_offset = offset;
+ *out_subcoord = subcoord;
+}
+
+
+/**
* Compute the offset of a pixel block.
*
* x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
@@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
{
LLVMValueRef x_stride;
LLVMValueRef offset;
- LLVMValueRef i;
- LLVMValueRef j;
-
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (format_desc->block.width == 1) {
- i = bld->zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (format_desc->block.height == 1) {
- j = bld->zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
- offset = lp_build_mul(bld, x, x_stride);
+
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.width,
+ x, x_stride,
+ &offset, out_i);
if (y && y_stride) {
- LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
+ LLVMValueRef y_offset;
+ lp_build_sample_partial_offset(bld,
+ format_desc->block.height,
+ y, y_stride,
+ &y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
+ else {
+ *out_j = bld->zero;
+ }
if (z && z_stride) {
- LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
+ LLVMValueRef z_offset;
+ LLVMValueRef k;
+ lp_build_sample_partial_offset(bld,
+ 1, /* pixel blocks are always 2D */
+ z, z_stride,
+ &z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
*out_offset = offset;
- *out_i = i;
- *out_j = j;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 5b8f478094..caafc4eca0 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -36,6 +36,8 @@
#define LP_BLD_SAMPLE_H
+#include "pipe/p_format.h"
+
#include "gallivm/lp_bld.h"
struct pipe_resource;
@@ -147,6 +149,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
void
+lp_build_sample_partial_offset(struct lp_build_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef stride,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i);
+
+
+void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,
LLVMValueRef x,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 806c7d56a8..1f39d9c98b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -176,6 +176,7 @@ texture_dims(enum pipe_texture_target tex)
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return 2;
case PIPE_TEXTURE_3D:
@@ -322,59 +323,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
/**
- * Fetch the texels as <4n x i8> in AoS form.
- */
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_array)
-{
- LLVMValueRef offset, i, j;
- LLVMValueRef data_ptr;
- LLVMValueRef res;
-
- /* convert x,y,z coords to linear offset from start of texture, in bytes */
- lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL,
- &offset, &i, &j);
-
- /* get pointer to mipmap level 0 data */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
- if (util_format_is_rgba8_variant(bld->format_desc)) {
- /* Just fetch the data directly without swizzling */
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- res = lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
- }
- else {
- struct lp_type type;
-
- assert(bld->texel_type.width == 32);
-
- memset(&type, 0, sizeof type);
- type.width = 8;
- type.length = bld->texel_type.length*4;
- type.norm = TRUE;
-
- res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
- data_ptr, offset, i, j);
- }
-
- return res;
-}
-
-
-/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
static LLVMValueRef
@@ -408,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
@@ -430,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param i0 resulting sub-block pixel coordinate for coord0
*/
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@@ -469,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
assert(0);
}
- return coord;
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
+ }
}
@@ -1740,16 +1820,21 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
- LLVMValueRef neighbors[2][2];
+ LLVMValueRef data_ptr;
+ LLVMValueRef x_stride, y_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef offset[2][2];
+ LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
- LLVMValueRef stride;
+ const unsigned level = 0;
+ unsigned i, j;
- assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->target == PIPE_TEXTURE_2D
+ || bld->static_state->target == PIPE_TEXTURE_RECT);
assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
@@ -1793,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
- x0 = s_ipart;
- y0 = t_ipart;
-
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
- x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
+
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
+ offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
+ offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
+ offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
@@ -1836,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
- unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
#ifdef PIPE_ARCH_LITTLE_ENDIAN
@@ -1864,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
- stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+ /*
+ * get pointer to mipmap level 0 data
+ */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
@@ -1883,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
+ for (j = 0; j < 2; ++j) {
+ for (i = 0; i < 2; ++i) {
+ LLVMValueRef rgba8;
- neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
- neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
- neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
- neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[j][i]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[j][i], &neighbors_hi[j][i]);
+ }
+ }
/*
* Linear interpolate with 8.8 fixed point.
@@ -2077,7 +2191,8 @@ lp_build_sample_soa(LLVMBuilderRef builder,
}
else if (util_format_fits_8unorm(bld.format_desc) &&
bld.format_desc->nr_channels > 1 &&
- static_state->target == PIPE_TEXTURE_2D &&
+ (static_state->target == PIPE_TEXTURE_2D ||
+ static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 0aa64affac..0e07f7f3f3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -200,8 +200,10 @@ static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
}
mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
- mask->cond_mask = val;
-
+ mask->cond_mask = LLVMBuildAnd(mask->bld->builder,
+ mask->cond_mask,
+ val,
+ "");
lp_exec_mask_update(mask);
}
@@ -802,7 +804,7 @@ emit_store(
case TGSI_FILE_PREDICATE:
lp_exec_mask_store(&bld->exec_mask, pred, value,
- bld->preds[index][chan_index]);
+ bld->preds[reg->Register.Index][chan_index]);
break;
default:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 3ffe916f8e..fec1d3dfbc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -128,16 +128,16 @@ struct lp_build_context
*/
struct lp_type type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_elem_type(type) */
LLVMTypeRef elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_vec_type(type) */
LLVMTypeRef vec_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_elem_type(type) */
LLVMTypeRef int_elem_type;
- /** Same as lp_build_undef(type) */
+ /** Same as lp_build_int_vec_type(type) */
LLVMTypeRef int_vec_type;
/** Same as lp_build_undef(type) */
diff --git a/src/gallium/auxiliary/os/os_stream.c b/src/gallium/auxiliary/os/os_stream.c
new file mode 100644
index 0000000000..3c55fc00d9
--- /dev/null
+++ b/src/gallium/auxiliary/os/os_stream.c
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_config.h"
+
+#include "os_stream.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ char buf[1024];
+ int retval;
+ va_list ap2;
+ va_copy(ap2, ap);
+ retval = util_vsnprintf(buf, sizeof(buf), format, ap2);
+ va_end(ap2);
+ if(retval <= 0)
+ {}
+ else if(retval < sizeof(buf))
+ stream->write(stream, buf, retval);
+ else
+ {
+ char* str = MALLOC(retval + 1);
+ if(!str)
+ return -1;
+ retval = util_vsnprintf(str, retval + 1, format, ap);
+ if(retval > 0)
+ stream->write(stream, str, retval);
+ FREE(str);
+ }
+
+ return retval;
+}
diff --git a/src/gallium/auxiliary/os/os_stream.h b/src/gallium/auxiliary/os/os_stream.h
index 693a0621e2..6c6050bb02 100644
--- a/src/gallium/auxiliary/os/os_stream.h
+++ b/src/gallium/auxiliary/os/os_stream.h
@@ -50,6 +50,9 @@ struct os_stream
void
(*flush)(struct os_stream *stream);
+
+ int
+ (*vprintf)(struct os_stream *stream, const char* format, va_list ap);
};
@@ -90,6 +93,27 @@ os_stream_flush(struct os_stream *stream)
stream->flush(stream);
}
+int
+os_default_stream_vprintf (struct os_stream* stream, const char *format, va_list ap);
+
+static INLINE int
+os_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return stream->vprintf(stream, format, ap);
+}
+
+static INLINE int
+os_stream_printf (struct os_stream* stream, const char *format, ...)
+{
+ int retval;
+ va_list args;
+
+ va_start (args, format);
+ retval = stream->vprintf(stream, format, args);
+ va_end (args);
+
+ return retval;
+}
struct os_stream *
os_file_stream_create(const char *filename);
@@ -118,5 +142,4 @@ os_str_stream_get_and_close(struct os_stream *stream);
#define os_file_stream_create(_filename) os_null_stream_create()
#endif
-
#endif /* _OS_STREAM_H_ */
diff --git a/src/gallium/auxiliary/os/os_stream_log.c b/src/gallium/auxiliary/os/os_stream_log.c
index 7cc2028a22..b01377c346 100644
--- a/src/gallium/auxiliary/os/os_stream_log.c
+++ b/src/gallium/auxiliary/os/os_stream_log.c
@@ -73,7 +73,8 @@ static struct os_stream
os_log_stream_struct = {
&os_log_stream_close,
&os_log_stream_write,
- &os_log_stream_flush
+ &os_log_stream_flush,
+ &os_default_stream_vprintf,
};
diff --git a/src/gallium/auxiliary/os/os_stream_null.c b/src/gallium/auxiliary/os/os_stream_null.c
index 128c4e8f0e..a549a789e6 100644
--- a/src/gallium/auxiliary/os/os_stream_null.c
+++ b/src/gallium/auxiliary/os/os_stream_null.c
@@ -56,12 +56,18 @@ os_null_stream_flush(struct os_stream *stream)
(void)stream;
}
+static int
+os_null_stream_vprintf (struct os_stream* stream, const char *format, va_list ap)
+{
+ return 0;
+}
static struct os_stream
os_null_stream = {
&os_null_stream_close,
&os_null_stream_write,
- &os_null_stream_flush
+ &os_null_stream_flush,
+ &os_null_stream_vprintf
};
diff --git a/src/gallium/auxiliary/os/os_stream_stdc.c b/src/gallium/auxiliary/os/os_stream_stdc.c
index 9e7ed71107..37e7d063e2 100644
--- a/src/gallium/auxiliary/os/os_stream_stdc.c
+++ b/src/gallium/auxiliary/os/os_stream_stdc.c
@@ -83,6 +83,14 @@ os_stdc_stream_flush(struct os_stream *_stream)
fflush(stream->file);
}
+static int
+os_stdc_stream_vprintf (struct os_stream* _stream, const char *format, va_list ap)
+{
+ struct os_stdc_stream *stream = os_stdc_stream(_stream);
+
+ return vfprintf(stream->file, format, ap);
+}
+
struct os_stream *
os_file_stream_create(const char *filename)
@@ -96,6 +104,7 @@ os_file_stream_create(const char *filename)
stream->base.close = &os_stdc_stream_close;
stream->base.write = &os_stdc_stream_write;
stream->base.flush = &os_stdc_stream_flush;
+ stream->base.vprintf = &os_stdc_stream_vprintf;
stream->file = fopen(filename, "w");
if(!stream->file)
diff --git a/src/gallium/auxiliary/os/os_stream_str.c b/src/gallium/auxiliary/os/os_stream_str.c
index b5c7270d2a..be9478b2a1 100644
--- a/src/gallium/auxiliary/os/os_stream_str.c
+++ b/src/gallium/auxiliary/os/os_stream_str.c
@@ -118,6 +118,7 @@ os_str_stream_create(size_t size)
stream->base.close = &os_str_stream_close;
stream->base.write = &os_str_stream_write;
stream->base.flush = &os_str_stream_flush;
+ stream->base.vprintf = &os_default_stream_vprintf;
stream->str = os_malloc(size);
if(!stream->str)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index cec2524da2..2ef02160f2 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -50,8 +50,7 @@
#define PB_BUFMGR_H_
-#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
+#include "pb_buffer.h"
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
index 2e15751e50..0461c81550 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -30,7 +30,7 @@
#include "rtasm_cpu.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
static boolean rtasm_sse_enabled(void)
{
static boolean firsttime = 1;
@@ -49,7 +49,7 @@ static boolean rtasm_sse_enabled(void)
int rtasm_cpu_has_sse(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
@@ -59,7 +59,7 @@ int rtasm_cpu_has_sse(void)
int rtasm_cpu_has_sse2(void)
{
/* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
return rtasm_sse_enabled();
#else
return 0;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 9f70b73698..75b0f6a68e 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -22,8 +22,9 @@
**************************************************************************/
#include "pipe/p_config.h"
+#include "util/u_cpu_detect.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "pipe/p_compiler.h"
#include "util/u_debug.h"
@@ -231,6 +232,10 @@ static void emit_modrm( struct x86_function *p,
assert(reg.mod == mod_REG);
+ /* TODO: support extended x86-64 registers */
+ assert(reg.idx < 8);
+ assert(regmem.idx < 8);
+
val |= regmem.mod << 6; /* mod field */
val |= reg.idx << 3; /* reg field */
val |= regmem.idx; /* r/m field */
@@ -363,6 +368,12 @@ int x86_get_label( struct x86_function *p )
*/
+void x64_rexw(struct x86_function *p)
+{
+ if(x86_target(p) != X86_32)
+ emit_1ub(p, 0x48);
+}
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label )
@@ -449,6 +460,52 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
emit_1i(p, imm);
}
+void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ x86_mov_reg_imm(p, dst, imm);
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1i(p, imm);
+ }
+}
+
+void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm )
+{
+ DUMP_RI( dst, imm );
+ emit_1ub(p, 0x66);
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb8 + dst.idx);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+ else
+ {
+ emit_1ub(p, 0xc7);
+ emit_modrm_noreg(p, 0, dst);
+ emit_2ub(p, imm & 0xff, imm >> 8);
+ }
+}
+
+void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
+{
+ DUMP_RI( dst, imm );
+ if(dst.mod == mod_REG)
+ {
+ emit_1ub(p, 0xb0 + dst.idx);
+ emit_1ub(p, imm);
+ }
+ else
+ {
+ emit_1ub(p, 0xc6);
+ emit_modrm_noreg(p, 0, dst);
+ emit_1ub(p, imm);
+ }
+}
+
/**
* Immediate group 1 instructions.
*/
@@ -520,7 +577,7 @@ void x86_push( struct x86_function *p,
}
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
void x86_push_imm32( struct x86_function *p,
@@ -530,7 +587,7 @@ void x86_push_imm32( struct x86_function *p,
emit_1ub(p, 0x68);
emit_1i(p, imm32);
- p->stack_offset += 4;
+ p->stack_offset += sizeof(void*);
}
@@ -540,23 +597,33 @@ void x86_pop( struct x86_function *p,
DUMP_R( reg );
assert(reg.mod == mod_REG);
emit_1ub(p, 0x58 + reg.idx);
- p->stack_offset -= 4;
+ p->stack_offset -= sizeof(void*);
}
void x86_inc( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x40 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x40 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 0, reg);
}
void x86_dec( struct x86_function *p,
struct x86_reg reg )
{
DUMP_R( reg );
- assert(reg.mod == mod_REG);
- emit_1ub(p, 0x48 + reg.idx);
+ if(x86_target(p) == X86_32 && reg.mod == mod_REG)
+ {
+ emit_1ub(p, 0x48 + reg.idx);
+ return;
+ }
+ emit_1ub(p, 0xff);
+ emit_modrm_noreg(p, 1, reg);
}
void x86_ret( struct x86_function *p )
@@ -583,9 +650,82 @@ void x86_mov( struct x86_function *p,
struct x86_reg src )
{
DUMP_RR( dst, src );
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ uint8_t rex = 0x40;
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ emit_1ub(p, rex);
+ }
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov16( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_1ub(p, 0x66);
+ emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_mov8( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_op_modrm( p, 0x8a, 0x88, dst, src );
+}
+
+void x64_mov64( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ uint8_t rex = 0x48;
+ DUMP_RR( dst, src );
+ assert(x86_target(p) != X86_32);
+
+ /* special hack for reading arguments until we support x86-64 registers everywhere */
+ if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8))
+ {
+ if(dst.idx >= 8)
+ {
+ rex |= 4;
+ dst.idx -= 8;
+ }
+ if(src.idx >= 8)
+ {
+ rex |= 1;
+ src.idx -= 8;
+ }
+ }
+ emit_1ub(p, rex);
emit_op_modrm( p, 0x8b, 0x89, dst, src );
}
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb6);
+ emit_modrm(p, dst, src);
+}
+
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, 0x0f, 0xb7);
+ emit_modrm(p, dst, src);
+}
+
void x86_xor( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -680,6 +820,61 @@ void x86_div( struct x86_function *p,
emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src);
}
+void x86_bswap( struct x86_function *p, struct x86_reg reg )
+{
+ DUMP_R(reg);
+ assert(reg.file == file_REG32);
+ assert(reg.mod == mod_REG);
+ emit_2ub(p, 0x0f, 0xc8 + reg.idx);
+}
+
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 5, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 5, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 7, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 7, reg);
+ emit_1ub(p, imm);
+ }
+}
+
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm )
+{
+ DUMP_RI(reg, imm);
+ if(imm == 1)
+ {
+ emit_1ub(p, 0xd1);
+ emit_modrm_noreg(p, 4, reg);
+ }
+ else
+ {
+ emit_1ub(p, 0xc1);
+ emit_modrm_noreg(p, 4, reg);
+ emit_1ub(p, imm);
+ }
+}
/***********************************************************************
@@ -1013,6 +1208,77 @@ void sse_movmskps( struct x86_function *p,
* SSE2 instructions
*/
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ if(dst.mod == mod_REG && dst.file == file_REG32)
+ {
+ emit_1ub(p, 0x7e);
+ emit_modrm(p, src, dst);
+ }
+ else
+ {
+ emit_op_modrm(p, 0x6e, 0x7e, dst, src);
+ }
+}
+
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ switch (dst.mod) {
+ case mod_REG:
+ emit_3ub(p, 0xf3, 0x0f, 0x7e);
+ emit_modrm(p, dst, src);
+ break;
+ case mod_INDIRECT:
+ case mod_DISP32:
+ case mod_DISP8:
+ assert(src.mod == mod_REG);
+ emit_3ub(p, 0x66, 0x0f, 0xd6);
+ emit_modrm(p, src, dst);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf3, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x6f, 0x7f, dst, src);
+}
+
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0xf2, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x10, 0x11, dst, src);
+}
+
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_2ub(p, 0x66, 0x0f);
+ emit_op_modrm(p, 0x28, 0x29, dst, src);
+}
+
/**
* Perform a reduced swizzle:
*/
@@ -1027,6 +1293,28 @@ void sse2_pshufd( struct x86_function *p,
emit_1ub(p, shuf);
}
+void sse2_pshuflw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf2, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
+void sse2_pshufhw( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf)
+{
+ DUMP_RRI( dst, src, shuf );
+ emit_3ub(p, 0xf3, X86_TWOB, 0x70);
+ emit_modrm(p, dst, src);
+ emit_1ub(p, shuf);
+}
+
void sse2_cvttps2dq( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1045,6 +1333,24 @@ void sse2_cvtps2dq( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_cvtsd2ss( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0xf2, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_cvtpd2ps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x5a);
+ emit_modrm( p, dst, src );
+}
+
void sse2_packssdw( struct x86_function *p,
struct x86_reg dst,
struct x86_reg src )
@@ -1081,6 +1387,97 @@ void sse2_punpcklbw( struct x86_function *p,
emit_modrm( p, dst, src );
}
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x61);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x62);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR( dst, src );
+ emit_3ub(p, 0x66, 0x0f, 0x6c);
+ emit_modrm( p, dst, src );
+}
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 6, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x73);
+ emit_modrm_noreg(p, 2, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x71);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm )
+{
+ DUMP_RI(dst, imm);
+ emit_3ub(p, 0x66, 0x0f, 0x72);
+ emit_modrm_noreg(p, 4, dst);
+ emit_1ub(p, imm);
+}
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src )
+{
+ DUMP_RR(dst, src);
+ emit_3ub(p, 0x66, 0x0f, 0xeb);
+ emit_modrm(p, dst, src);
+}
void sse2_rcpps( struct x86_function *p,
struct x86_reg dst,
@@ -1100,18 +1497,6 @@ void sse2_rcpss( struct x86_function *p,
emit_modrm( p, dst, src );
}
-void sse2_movd( struct x86_function *p,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( dst, src );
- emit_2ub(p, 0x66, X86_TWOB);
- emit_op_modrm( p, 0x6e, 0x7e, dst, src );
-}
-
-
-
-
/***********************************************************************
* x87 instructions
*/
@@ -1702,23 +2087,80 @@ void x86_cdecl_caller_pop_regs( struct x86_function *p )
}
-/* Retreive a reference to one of the function arguments, taking into
- * account any push/pop activity:
- */
struct x86_reg x86_fn_arg( struct x86_function *p,
- unsigned arg )
+ unsigned arg )
{
- return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ switch(x86_target(p))
+ {
+ case X86_64_WIN64_ABI:
+ /* Microsoft uses a different calling convention than the rest of the world */
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 2:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 3:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 4:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ /* Win64 allocates stack slots as if it pushed the first 4 arguments too */
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + arg * 8);
+ }
+ case X86_64_STD_ABI:
+ switch(arg)
+ {
+ case 1:
+ return x86_make_reg(file_REG32, reg_DI);
+ case 2:
+ return x86_make_reg(file_REG32, reg_SI);
+ case 3:
+ return x86_make_reg(file_REG32, reg_DX);
+ case 4:
+ return x86_make_reg(file_REG32, reg_CX);
+ case 5:
+ return x86_make_reg(file_REG32, reg_R8);
+ case 6:
+ return x86_make_reg(file_REG32, reg_R9);
+ default:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
+ p->stack_offset + (arg - 6) * 8); /* ??? */
+ }
+ case X86_32:
+ return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
p->stack_offset + arg * 4); /* ??? */
+ default:
+ abort();
+ }
}
+static void x86_init_func_common( struct x86_function *p )
+{
+ util_cpu_detect();
+ p->caps = 0;
+ if(util_cpu_caps.has_mmx)
+ p->caps |= X86_MMX;
+ if(util_cpu_caps.has_mmx2)
+ p->caps |= X86_MMX2;
+ if(util_cpu_caps.has_sse)
+ p->caps |= X86_SSE;
+ if(util_cpu_caps.has_sse2)
+ p->caps |= X86_SSE2;
+ if(util_cpu_caps.has_sse3)
+ p->caps |= X86_SSE3;
+ if(util_cpu_caps.has_sse4_1)
+ p->caps |= X86_SSE4_1;
+ p->csr = p->store;
+ DUMP_START();
+}
void x86_init_func( struct x86_function *p )
{
p->size = 0;
p->store = NULL;
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_init_func_size( struct x86_function *p, unsigned code_size )
@@ -1728,8 +2170,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
if (p->store == NULL) {
p->store = p->error_overflow;
}
- p->csr = p->store;
- DUMP_START();
+ x86_init_func_common(p);
}
void x86_release_func( struct x86_function *p )
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 6208e8f707..2b9678b176 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -24,22 +24,31 @@
#ifndef _RTASM_X86SSE_H_
#define _RTASM_X86SSE_H_
+#include "pipe/p_compiler.h"
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
- unsigned file:3;
- unsigned idx:3;
+ unsigned file:2;
+ unsigned idx:4;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
+#define X86_MMX 1
+#define X86_MMX2 2
+#define X86_SSE 4
+#define X86_SSE2 8
+#define X86_SSE3 0x10
+#define X86_SSE4_1 0x20
+
struct x86_function {
+ unsigned caps;
unsigned size;
unsigned char *store;
unsigned char *csr;
@@ -75,7 +84,15 @@ enum x86_reg_name {
reg_SP,
reg_BP,
reg_SI,
- reg_DI
+ reg_DI,
+ reg_R8,
+ reg_R9,
+ reg_R10,
+ reg_R11,
+ reg_R12,
+ reg_R13,
+ reg_R14,
+ reg_R15
};
@@ -110,6 +127,29 @@ typedef void (*x86_func)(void);
/* Begin/end/retrieve function creation:
*/
+enum x86_target
+{
+ X86_32,
+ X86_64_STD_ABI,
+ X86_64_WIN64_ABI
+};
+
+/* make this read a member of x86_function if target != host is desired */
+static INLINE enum x86_target x86_target( struct x86_function* p )
+{
+#ifdef PIPE_ARCH_X86
+ return X86_32;
+#elif defined(_WIN64)
+ return X86_64_WIN64_ABI;
+#elif defined(PIPE_ARCH_X86_64)
+ return X86_64_STD_ABI;
+#endif
+}
+
+static INLINE unsigned x86_target_caps( struct x86_function* p )
+{
+ return p->caps;
+}
void x86_init_func( struct x86_function *p );
void x86_init_func_size( struct x86_function *p, unsigned code_size );
@@ -138,6 +178,8 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg );
*/
int x86_get_label( struct x86_function *p );
+void x64_rexw(struct x86_function *p);
+
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
int label );
@@ -178,18 +220,54 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
+ unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
+
+void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
@@ -227,7 +305,6 @@ void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg
void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
-void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -237,6 +314,14 @@ void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
+void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
+void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
@@ -250,7 +335,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x86_div( struct x86_function *p, struct x86_reg src );
-
+void x86_bswap( struct x86_function *p, struct x86_reg src );
+void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
+void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
void x86_cdecl_caller_push_regs( struct x86_function *p );
void x86_cdecl_caller_pop_regs( struct x86_function *p );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index 4cd27317b3..dd78b36100 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -28,6 +28,7 @@
#ifndef TGSI_DUMP_H
#define TGSI_DUMP_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 298f3d0a8b..0757f05dfa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3239,6 +3239,8 @@ exec_instruction(
if (mach->CallStackTop == 0) {
/* returning from main() */
+ mach->CondStackTop = 0;
+ mach->LoopStackTop = 0;
*pc = -1;
return;
}
@@ -3767,6 +3769,9 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ /* Strictly speaking, these assertions aren't really needed but they
+ * can potentially catch some bugs in the control flow code.
+ */
assert(mach->CondStackTop == 0);
assert(mach->LoopStackTop == 0);
assert(mach->ContStackTop == 0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 50248884fd..1992d11bbe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -28,6 +28,7 @@
#ifndef TGSI_INFO_H
#define TGSI_INFO_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index db9a342220..1891203abe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -282,17 +282,6 @@ tgsi_parse_token(
}
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens)
-{
- struct tgsi_parse_context ctx;
- if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) {
- unsigned len = (ctx.FullHeader.Header.HeaderSize +
- ctx.FullHeader.Header.BodySize);
- return len;
- }
- return 0;
-}
/**
@@ -319,3 +308,19 @@ tgsi_alloc_tokens(unsigned num_tokens)
unsigned bytes = num_tokens * sizeof(struct tgsi_token);
return (struct tgsi_token *) MALLOC(bytes);
}
+
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens)
+{
+ const unsigned *dwords = (const unsigned *)tokens;
+ int nr = tgsi_num_tokens(tokens);
+ int i;
+
+ assert(sizeof(*tokens) == sizeof(unsigned));
+
+ debug_printf("const unsigned tokens[%d] = {\n", nr);
+ for (i = 0; i < nr; i++)
+ debug_printf("0x%08x,\n", dwords[i]);
+ debug_printf("};\n");
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 36de8807b4..d4df585176 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -28,6 +28,7 @@
#ifndef TGSI_PARSE_H
#define TGSI_PARSE_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
@@ -132,8 +133,15 @@ void
tgsi_parse_token(
struct tgsi_parse_context *ctx );
-unsigned
-tgsi_num_tokens(const struct tgsi_token *tokens);
+static INLINE unsigned
+tgsi_num_tokens(const struct tgsi_token *tokens)
+{
+ struct tgsi_header header = *(const struct tgsi_header *) tokens;
+ return header.HeaderSize + header.BodySize;
+}
+
+void
+tgsi_dump_tokens(const struct tgsi_token *tokens);
struct tgsi_token *
tgsi_dup_tokens(const struct tgsi_token *tokens);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index d81ee3d00e..00aa8b84fe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -32,9 +32,12 @@
extern "C" {
#endif
+#include "pipe/p_compiler.h"
+
+struct tgsi_exec_machine;
+struct tgsi_interp_coef;
struct tgsi_token;
struct x86_function;
-struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c
index fe638e211f..73287b667d 100644
--- a/src/gallium/auxiliary/translate/translate.c
+++ b/src/gallium/auxiliary/translate/translate.c
@@ -38,7 +38,7 @@ struct translate *translate_create( const struct translate_key *key )
{
struct translate *translate = NULL;
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
translate = translate_sse2_create( key );
if (translate)
return translate;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index eb6f2cc486..a75380228b 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -85,6 +85,18 @@ struct translate {
unsigned instance_id,
void *output_buffer);
+ void (PIPE_CDECL *run_elts16)( struct translate *,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
+ void (PIPE_CDECL *run_elts8)( struct translate *,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer);
+
void (PIPE_CDECL *run)( struct translate *,
unsigned start,
unsigned count,
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 42cfd763e9..ad809db720 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -64,6 +64,14 @@ struct translate_generic {
unsigned input_stride;
unsigned max_index;
+ /* this value is set to -1 if this is a normal element with output_format != input_format:
+ * in this case, u_format is used to do a full conversion
+ *
+ * this value is set to the format size in bytes if output_format == input_format or for 32-bit instance ids:
+ * in this case, memcpy is used to copy this amount of bytes
+ */
+ int copy_size;
+
} attrib[PIPE_MAX_ATTRIBS];
unsigned nr_attrib;
@@ -354,7 +362,65 @@ static emit_func get_emit_func( enum pipe_format format )
}
}
+static ALWAYS_INLINE void PIPE_CDECL generic_run_one( struct translate_generic *tg,
+ unsigned elt,
+ unsigned instance_id,
+ void *vert )
+{
+ unsigned nr_attrs = tg->nr_attrib;
+ unsigned attr;
+
+ for (attr = 0; attr < nr_attrs; attr++) {
+ float data[4];
+ uint8_t *dst = (uint8_t *)vert + tg->attrib[attr].output_offset;
+
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ const uint8_t *src;
+ unsigned index;
+ int copy_size;
+
+ if (tg->attrib[attr].instance_divisor) {
+ index = instance_id / tg->attrib[attr].instance_divisor;
+ }
+ else {
+ index = elt;
+ }
+
+ /* clamp to void going out of bounds */
+ index = MIN2(index, tg->attrib[attr].max_index);
+ src = tg->attrib[attr].input_ptr +
+ tg->attrib[attr].input_stride * index;
+
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ index,
+ data[0], data[1],data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
+ } else {
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
+ }
+ }
+}
/**
* Fetch vertex attributes for 'count' vertices.
@@ -367,62 +433,45 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- const unsigned elt = *elts++;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- } else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- tg->attrib[attr].instance_divisor,
- tg->attrib[attr].max_index,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
- if (0)
- debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
+static void PIPE_CDECL generic_run_elts16( struct translate *translate,
+ const uint16_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
- tg->attrib[attr].emit( data, dst );
- }
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
+static void PIPE_CDECL generic_run_elts8( struct translate *translate,
+ const uint8_t *elts,
+ unsigned count,
+ unsigned instance_id,
+ void *output_buffer )
+{
+ struct translate_generic *tg = translate_generic(translate);
+ char *vert = output_buffer;
+ unsigned i;
+ for (i = 0; i < count; i++) {
+ generic_run_one(tg, *elts++, instance_id, vert);
+ vert += tg->translate.key.output_stride;
+ }
+}
static void PIPE_CDECL generic_run( struct translate *translate,
unsigned start,
@@ -432,57 +481,10 @@ static void PIPE_CDECL generic_run( struct translate *translate,
{
struct translate_generic *tg = translate_generic(translate);
char *vert = output_buffer;
- unsigned nr_attrs = tg->nr_attrib;
- unsigned attr;
unsigned i;
- /* loop over vertex attributes (vertex shader inputs)
- */
for (i = 0; i < count; i++) {
- unsigned elt = start + i;
-
- for (attr = 0; attr < nr_attrs; attr++) {
- float data[4];
- char *dst = vert + tg->attrib[attr].output_offset;
-
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
- const uint8_t *src;
- unsigned index;
-
- if (tg->attrib[attr].instance_divisor) {
- index = instance_id / tg->attrib[attr].instance_divisor;
- }
- else {
- index = elt;
- }
-
- /* clamp to void going out of bounds */
- index = MIN2(index, tg->attrib[attr].max_index);
-
- src = tg->attrib[attr].input_ptr +
- tg->attrib[attr].input_stride * index;
-
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch linear attr %d from %p stride %d index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- index,
- data[0], data[1],data[2], data[3]);
- } else {
- data[0] = (float)instance_id;
- }
-
- if (0)
- debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
- }
-
+ generic_run_one(tg, start + i, instance_id, vert);
vert += tg->translate.key.output_stride;
}
}
@@ -528,6 +530,8 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->translate.release = generic_release;
tg->translate.set_buffer = generic_set_buffer;
tg->translate.run_elts = generic_run_elts;
+ tg->translate.run_elts16 = generic_run_elts16;
+ tg->translate.run_elts8 = generic_run_elts8;
tg->translate.run = generic_run;
for (i = 0; i < key->nr_elements; i++) {
@@ -544,9 +548,28 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].input_offset = key->element[i].input_offset;
tg->attrib[i].instance_divisor = key->element[i].instance_divisor;
- tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
+ tg->attrib[i].copy_size = -1;
+ if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+ {
+ if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+ || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+ tg->attrib[i].copy_size = 4;
+ }
+ else
+ {
+ if(key->element[i].input_format == key->element[i].output_format
+ && format_desc->block.width == 1
+ && format_desc->block.height == 1
+ && !(format_desc->block.bits & 7))
+ tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+ }
+
+ if(tg->attrib[i].copy_size < 0)
+ tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
+ else
+ tg->attrib[i].emit = NULL;
}
tg->nr_attrib = key->nr_elements;
diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c
index ef3aa674a3..f8bf5b4669 100644
--- a/src/gallium/auxiliary/translate/translate_sse.c
+++ b/src/gallium/auxiliary/translate/translate_sse.c
@@ -30,11 +30,12 @@
#include "pipe/p_compiler.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_format.h"
#include "translate.h"
-#if defined(PIPE_ARCH_X86)
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
#include "rtasm/rtasm_cpu.h"
#include "rtasm/rtasm_x86sse.h"
@@ -46,21 +47,9 @@
#define W 3
-typedef void (PIPE_CDECL *run_func)( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
-typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer);
-
struct translate_buffer {
const void *base_ptr;
- unsigned stride;
+ uintptr_t stride;
unsigned max_index;
};
@@ -73,21 +62,43 @@ struct translate_buffer_varient {
#define ELEMENT_BUFFER_INSTANCE_ID 1001
+#define NUM_CONSTS 7
+
+enum
+{
+ CONST_IDENTITY,
+ CONST_INV_127,
+ CONST_INV_255,
+ CONST_INV_32767,
+ CONST_INV_65535,
+ CONST_INV_2147483647,
+ CONST_255
+};
+
+#define C(v) {(float)(v), (float)(v), (float)(v), (float)(v)}
+static float consts[NUM_CONSTS][4] = {
+ {0, 0, 0, 1},
+ C(1.0 / 127.0),
+ C(1.0 / 255.0),
+ C(1.0 / 32767.0),
+ C(1.0 / 65535.0),
+ C(1.0 / 2147483647.0),
+ C(255.0)
+};
+#undef C
struct translate_sse {
struct translate translate;
struct x86_function linear_func;
struct x86_function elt_func;
+ struct x86_function elt16_func;
+ struct x86_function elt8_func;
struct x86_function *func;
- boolean loaded_identity;
- boolean loaded_255;
- boolean loaded_inv_255;
-
- float identity[4];
- float float_255[4];
- float inv_255[4];
+ PIPE_ALIGN_VAR(16) float consts[NUM_CONSTS][4];
+ int8_t reg_to_const[16];
+ int8_t const_to_reg[NUM_CONSTS];
struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
unsigned nr_buffers;
@@ -102,17 +113,16 @@ struct translate_sse {
boolean use_instancing;
unsigned instance_id;
- run_func gen_run;
- run_elts_func gen_run_elts;
-
/* these are actually known values, but putting them in a struct
* like this is helpful to keep them in sync across the file.
*/
struct x86_reg tmp_EAX;
- struct x86_reg idx_EBX; /* either start+i or &elt[i] */
- struct x86_reg outbuf_ECX;
- struct x86_reg machine_EDX;
- struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg tmp2_EDX;
+ struct x86_reg src_ECX;
+ struct x86_reg idx_ESI; /* either start+i or &elt[i] */
+ struct x86_reg machine_EDI;
+ struct x86_reg outbuf_EBX;
+ struct x86_reg count_EBP; /* decrements to zero */
};
static int get_offset( const void *a, const void *b )
@@ -120,281 +130,950 @@ static int get_offset( const void *a, const void *b )
return (const char *)b - (const char *)a;
}
+static struct x86_reg get_const( struct translate_sse *p, unsigned id)
+{
+ struct x86_reg reg;
+ unsigned i;
+ if(p->const_to_reg[id] >= 0)
+ return x86_make_reg(file_XMM, p->const_to_reg[id]);
-static struct x86_reg get_identity( struct translate_sse *p )
-{
- struct x86_reg reg = x86_make_reg(file_XMM, 6);
-
- if (!p->loaded_identity) {
- p->loaded_identity = TRUE;
- p->identity[0] = 0;
- p->identity[1] = 0;
- p->identity[2] = 0;
- p->identity[3] = 1;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->identity[0])));
+ for(i = 2; i < 8; ++i)
+ {
+ if(p->reg_to_const[i] < 0)
+ break;
}
+ /* TODO: be smarter here */
+ if(i == 8)
+ --i;
+
+ reg = x86_make_reg(file_XMM, i);
+
+ if(p->reg_to_const[i] >= 0)
+ p->const_to_reg[p->reg_to_const[i]] = -1;
+
+ p->reg_to_const[i] = id;
+ p->const_to_reg[id] = i;
+
+ /* TODO: this should happen outside the loop, if possible */
+ sse_movaps(p->func, reg,
+ x86_make_disp(p->machine_EDI,
+ get_offset(p, &p->consts[id][0])));
+
return reg;
}
-static struct x86_reg get_255( struct translate_sse *p )
+/* load the data in a SSE2 register, padding with zeros */
+static boolean emit_load_sse2( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg src,
+ unsigned size)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 7);
-
- if (!p->loaded_255) {
- p->loaded_255 = TRUE;
- p->float_255[0] =
- p->float_255[1] =
- p->float_255[2] =
- p->float_255[3] = 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->float_255[0])));
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ switch(size)
+ {
+ case 1:
+ x86_movzx8(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 2:
+ x86_movzx16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 3:
+ x86_movzx8(p->func, tmp, x86_make_disp(src, 2));
+ x86_shl_imm(p->func, tmp, 16);
+ x86_mov16(p->func, tmp, src);
+ sse2_movd(p->func, data, tmp);
+ break;
+ case 4:
+ sse2_movd(p->func, data, src);
+ break;
+ case 6:
+ sse2_movd(p->func, data, src);
+ x86_movzx16(p->func, tmp, x86_make_disp(src, 4));
+ sse2_movd(p->func, tmpXMM, tmp);
+ sse2_punpckldq(p->func, data, tmpXMM);
+ break;
+ case 8:
+ sse2_movq(p->func, data, src);
+ break;
+ case 12:
+ sse2_movq(p->func, data, src);
+ sse2_movd(p->func, tmpXMM, x86_make_disp(src, 8));
+ sse2_punpcklqdq(p->func, data, tmpXMM);
+ break;
+ case 16:
+ sse2_movdqu(p->func, data, src);
+ break;
+ default:
+ return FALSE;
}
-
- return reg;
+ return TRUE;
}
-static struct x86_reg get_inv_255( struct translate_sse *p )
+/* this value can be passed for the out_chans argument */
+#define CHANNELS_0001 5
+
+/* this function will load #chans float values, and will
+ * pad the register with zeroes at least up to out_chans.
+ *
+ * If out_chans is set to CHANNELS_0001, then the fourth
+ * value will be padded with 1. Only pass this value if
+ * chans < 4 or results are undefined.
+ */
+static void emit_load_float32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
{
- struct x86_reg reg = x86_make_reg(file_XMM, 5);
-
- if (!p->loaded_inv_255) {
- p->loaded_inv_255 = TRUE;
- p->inv_255[0] =
- p->inv_255[1] =
- p->inv_255[2] =
- p->inv_255[3] = 1.0f / 255.0f;
-
- sse_movups(p->func, reg,
- x86_make_disp(p->machine_EDX,
- get_offset(p, &p->inv_255[0])));
+ switch(chans)
+ {
+ case 1:
+ /* a 0 0 0
+ * a 0 0 1
+ */
+ sse_movss(p->func, data, arg0);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 2:
+ /* 0 0 0 1
+ * a b 0 1
+ */
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 3:
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1 if out_chans == CHANNELS_0001
+ * 0 0 c 0/1
+ * a b c 0/1
+ */
+ sse_movss(p->func, data, x86_make_disp(arg0, 8));
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X,Y,Z,W) );
+ sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
+ sse_movlps(p->func, data, arg0);
+ break;
+ case 4:
+ sse_movups(p->func, data, arg0);
+ break;
}
-
- return reg;
}
+/* this function behaves like emit_load_float32, but loads
+ 64-bit floating point numbers, converting them to 32-bit
+ ones */
+static void emit_load_float64to32( struct translate_sse *p,
+ struct x86_reg data,
+ struct x86_reg arg0,
+ unsigned out_chans,
+ unsigned chans)
+{
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ switch(chans)
+ {
+ case 1:
+ sse2_movsd(p->func, data, arg0);
+ if(out_chans > 1)
+ sse2_cvtpd2ps(p->func, data, data);
+ else
+ sse2_cvtsd2ss(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ break;
+ case 2:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ if(out_chans == CHANNELS_0001)
+ sse_shufps(p->func, data, get_const(p, CONST_IDENTITY), SHUF(X, Y, Z, W) );
+ else if(out_chans > 2)
+ sse_movlhps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 3:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movsd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ if(out_chans > 3)
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ else
+ sse2_cvtsd2ss(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ if(out_chans == CHANNELS_0001)
+ sse_orps(p->func, data, get_const(p, CONST_IDENTITY) );
+ break;
+ case 4:
+ sse2_movupd(p->func, data, arg0);
+ sse2_cvtpd2ps(p->func, data, data);
+ sse2_movupd(p->func, tmpXMM, x86_make_disp(arg0, 16));
+ sse2_cvtpd2ps(p->func, tmpXMM, tmpXMM);
+ sse_movlhps(p->func, data, tmpXMM);
+ break;
+ }
+}
-static void emit_load_R32G32B32A32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_mov64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- sse_movups(p->func, data, arg0);
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, dst_gpr, src_gpr);
+ else
+ {
+ /* TODO: when/on which CPUs is SSE2 actually better than SSE? */
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movq(p->func, dst_xmm, src_xmm);
+ else
+ sse_movlps(p->func, dst_xmm, src_xmm);
+ }
}
-static void emit_load_R32G32B32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_load64(struct translate_sse *p, struct x86_reg dst_gpr, struct x86_reg dst_xmm, struct x86_reg src)
{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(p->func, data, x86_make_disp(arg0, 8));
- sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst_gpr, dst_xmm, src, src);
}
-static void emit_load_R32G32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+static void emit_store64(struct translate_sse *p, struct x86_reg dst, struct x86_reg src_gpr, struct x86_reg src_xmm)
{
- /* 0 0 0 1
- * a b 0 1
- */
- sse_movups(p->func, data, get_identity(p) );
- sse_movlps(p->func, data, arg0);
+ emit_mov64(p, dst, dst, src_gpr, src_xmm);
}
+static void emit_mov128(struct translate_sse *p, struct x86_reg dst, struct x86_reg src)
+{
+ if(x86_target_caps(p->func) & X86_SSE2)
+ sse2_movdqu(p->func, dst, src);
+ else
+ sse_movups(p->func, dst, src);
+}
-static void emit_load_R32( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg arg0 )
+/* TODO: this uses unaligned accesses liberally, which is great on Nehalem,
+ * but may or may not be good on older processors
+ * TODO: may perhaps want to use non-temporal stores here if possible
+ */
+static void emit_memcpy(struct translate_sse *p, struct x86_reg dst, struct x86_reg src, unsigned size)
{
- /* a 0 0 0
- * a 0 0 1
- */
- sse_movss(p->func, data, arg0);
- sse_orps(p->func, data, get_identity(p) );
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg dataXMM2 = x86_make_reg(file_XMM, 1);
+ struct x86_reg dataGPR = p->tmp_EAX;
+ struct x86_reg dataGPR2 = p->tmp2_EDX;
+
+ if(size < 8)
+ {
+ switch (size)
+ {
+ case 1:
+ x86_mov8(p->func, dataGPR, src);
+ x86_mov8(p->func, dst, dataGPR);
+ break;
+ case 2:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov16(p->func, dst, dataGPR);
+ break;
+ case 3:
+ x86_mov16(p->func, dataGPR, src);
+ x86_mov8(p->func, dataGPR2, x86_make_disp(src, 2));
+ x86_mov16(p->func, dst, dataGPR);
+ x86_mov8(p->func, x86_make_disp(dst, 2), dataGPR2);
+ break;
+ case 4:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov(p->func, dst, dataGPR);
+ break;
+ case 6:
+ x86_mov(p->func, dataGPR, src);
+ x86_mov16(p->func, dataGPR2, x86_make_disp(src, 4));
+ x86_mov(p->func, dst, dataGPR);
+ x86_mov16(p->func, x86_make_disp(dst, 4), dataGPR2);
+ break;
+ }
+ }
+ else if(!(x86_target_caps(p->func) & X86_SSE))
+ {
+ unsigned i = 0;
+ assert((size & 3) == 0);
+ for(i = 0; i < size; i += 4)
+ {
+ x86_mov(p->func, dataGPR, x86_make_disp(src, i));
+ x86_mov(p->func, x86_make_disp(dst, i), dataGPR);
+ }
+ }
+ else
+ {
+ switch(size)
+ {
+ case 8:
+ emit_load64(p, dataGPR, dataXMM, src);
+ emit_store64(p, dst, dataGPR, dataXMM);
+ break;
+ case 12:
+ emit_load64(p, dataGPR2, dataXMM, src);
+ x86_mov(p->func, dataGPR, x86_make_disp(src, 8));
+ emit_store64(p, dst, dataGPR2, dataXMM);
+ x86_mov(p->func, x86_make_disp(dst, 8), dataGPR);
+ break;
+ case 16:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dst, dataXMM);
+ break;
+ case 24:
+ emit_mov128(p, dataXMM, src);
+ emit_load64(p, dataGPR, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_store64(p, x86_make_disp(dst, 16), dataGPR, dataXMM2);
+ break;
+ case 32:
+ emit_mov128(p, dataXMM, src);
+ emit_mov128(p, dataXMM2, x86_make_disp(src, 16));
+ emit_mov128(p, dst, dataXMM);
+ emit_mov128(p, x86_make_disp(dst, 16), dataXMM2);
+ break;
+ default:
+ assert(0);
+ }
+ }
}
+static boolean translate_attr_convert( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
-static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg data,
- struct x86_reg src )
{
+ const struct util_format_description* input_desc = util_format_description(a->input_format);
+ const struct util_format_description* output_desc = util_format_description(a->output_format);
+ unsigned i;
+ boolean id_swizzle = TRUE;
+ unsigned swizzle[4] = {UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE, UTIL_FORMAT_SWIZZLE_NONE};
+ unsigned needed_chans = 0;
+ unsigned imms[2] = {0, 0x3f800000};
- /* Load and unpack twice:
- */
- sse_movss(p->func, data, src);
- sse2_punpcklbw(p->func, data, get_identity(p));
- sse2_punpcklbw(p->func, data, get_identity(p));
+ if(a->output_format == PIPE_FORMAT_NONE || a->input_format == PIPE_FORMAT_NONE)
+ return FALSE;
- /* Convert to float:
- */
- sse2_cvtdq2ps(p->func, data, data);
+ if(input_desc->channel[0].size & 7)
+ return FALSE;
+ if(input_desc->colorspace != output_desc->colorspace)
+ return FALSE;
- /* Scale by 1/255.0
- */
- sse_mulps(p->func, data, get_inv_255(p));
-}
+ for(i = 1; i < input_desc->nr_channels; ++i)
+ {
+ if(memcmp(&input_desc->channel[i], &input_desc->channel[0], sizeof(input_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 1; i < output_desc->nr_channels; ++i)
+ {
+ if(memcmp(&output_desc->channel[i], &output_desc->channel[0], sizeof(output_desc->channel[0])))
+ return FALSE;
+ }
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(output_desc->swizzle[i] < 4)
+ swizzle[output_desc->swizzle[i]] = input_desc->swizzle[i];
+ }
+ if((x86_target_caps(p->func) & X86_SSE) && (0
+ || a->output_format == PIPE_FORMAT_R32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32_FLOAT
+ || a->output_format == PIPE_FORMAT_R32G32B32A32_FLOAT))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
-static void emit_store_R32G32B32A32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movups(p->func, dest, dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R32G32B32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Emit two, shuffle, emit one.
- */
- sse_movlps(p->func, dest, dataXMM);
- sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
-static void emit_store_R32G32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movlps(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovzx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ /* TODO: this may be inefficient due to get_identity() being used both as a float and integer register */
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case 32: /* we lose precision here */
+ sse2_psrld_imm(p->func, dataXMM, 1);
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_255);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_65535);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ else if(input_desc->channel[0].size == 32)
+ sse_addps(p->func, dataXMM, dataXMM); /* compensate for the bit we threw away to fit u32 into s32 */
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ /* TODO: add support for SSE4.1 pmovsx */
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 24);
+ break;
+ case 16:
+ sse2_punpcklwd(p->func, dataXMM, dataXMM);
+ sse2_psrad_imm(p->func, dataXMM, 16);
+ break;
+ case 32: /* we lose precision here */
+ break;
+ default:
+ return FALSE;
+ }
+ sse2_cvtdq2ps(p->func, dataXMM, dataXMM);
+ if(input_desc->channel[0].normalized)
+ {
+ struct x86_reg factor;
+ switch(input_desc->channel[0].size)
+ {
+ case 8:
+ factor = get_const(p, CONST_INV_127);
+ break;
+ case 16:
+ factor = get_const(p, CONST_INV_32767);
+ break;
+ case 32:
+ factor = get_const(p, CONST_INV_2147483647);
+ break;
+ default:
+ assert(0);
+ factor.disp = 0;
+ factor.file = 0;
+ factor.idx = 0;
+ factor.mod = 0;
+ break;
+ }
+ sse_mulps(p->func, dataXMM, factor);
+ }
+ break;
+
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ if(input_desc->channel[0].size != 32 && input_desc->channel[0].size != 64)
+ return FALSE;
+ if(swizzle[3] == UTIL_FORMAT_SWIZZLE_1 && input_desc->nr_channels <= 3)
+ {
+ swizzle[3] = UTIL_FORMAT_SWIZZLE_W;
+ needed_chans = CHANNELS_0001;
+ }
+ switch(input_desc->channel[0].size)
+ {
+ case 32:
+ emit_load_float32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ case 64: /* we lose precision here */
+ if(!(x86_target_caps(p->func) & X86_SSE2))
+ return FALSE;
+ emit_load_float64to32(p, dataXMM, src, needed_chans, input_desc->nr_channels);
+ break;
+ default:
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
-static void emit_store_R32( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- sse_movss(p->func, dest, dataXMM);
-}
+ if(!id_swizzle)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(swizzle[0], swizzle[1], swizzle[2], swizzle[3]) );
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse_movups(p->func, dst, dataXMM);
+ else
+ {
+ if(output_desc->nr_channels >= 2
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movlps(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movss(p->func, dst, dataXMM);
+ else
+ x86_mov_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 2)
+ {
+ if(swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(1, 1, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(output_desc->nr_channels >= 4
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ sse_movhps(p->func, x86_make_disp(dst, 8), dataXMM);
+ else
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2, 2, 2, 3));
+ sse_movss(p->func, x86_make_disp(dst, 8), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 8), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ if(swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(3, 3, 3, 3));
+ sse_movss(p->func, x86_make_disp(dst, 12), dataXMM);
+ }
+ else
+ x86_mov_imm(p->func, x86_make_disp(dst, 12), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if((x86_target_caps(p->func) & X86_SSE2) && input_desc->channel[0].size == 8 && output_desc->channel[0].size == 16
+ && output_desc->channel[0].normalized == input_desc->channel[0].normalized
+ && (0
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ || (input_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 1);
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned imms[2] = {0, 1};
+
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0 && i >= input_desc->nr_channels)
+ swizzle[i] = i;
+ }
-static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg dataXMM )
-{
- /* Scale by 255.0
- */
- sse_mulps(p->func, dataXMM, get_255(p));
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ if(swizzle[i] < 4)
+ needed_chans = MAX2(needed_chans, swizzle[i] + 1);
+ if(swizzle[i] < UTIL_FORMAT_SWIZZLE_0 && swizzle[i] != i)
+ id_swizzle = FALSE;
+ }
- /* Pack and emit:
- */
- sse2_cvtps2dq(p->func, dataXMM, dataXMM);
- sse2_packssdw(p->func, dataXMM, dataXMM);
- sse2_packuswb(p->func, dataXMM, dataXMM);
- sse_movss(p->func, dest, dataXMM);
-}
+ if(needed_chans > 0)
+ {
+ emit_load_sse2(p, dataXMM, src, input_desc->channel[0].size * input_desc->nr_channels >> 3);
+
+ switch(input_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ if(output_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
+ sse2_psrlw_imm(p->func, dataXMM, 1);
+ }
+ else
+ sse2_punpcklbw(p->func, dataXMM, get_const(p, CONST_IDENTITY));
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if(input_desc->channel[0].normalized)
+ {
+ sse2_movq(p->func, tmpXMM, get_const(p, CONST_IDENTITY));
+ sse2_punpcklbw(p->func, tmpXMM, dataXMM);
+ sse2_psllw_imm(p->func, dataXMM, 9);
+ sse2_psrlw_imm(p->func, dataXMM, 8);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ sse2_psrlw_imm(p->func, dataXMM, 7);
+ sse2_por(p->func, tmpXMM, dataXMM);
+ {
+ struct x86_reg t = dataXMM;
+ dataXMM = tmpXMM;
+ tmpXMM = t;
+ }
+ }
+ else
+ {
+ sse2_punpcklbw(p->func, dataXMM, dataXMM);
+ sse2_psraw_imm(p->func, dataXMM, 8);
+ }
+ break;
+ default:
+ assert(0);
+ }
+ if(output_desc->channel[0].normalized)
+ imms[1] = (output_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) ? 0xffff : 0x7ffff;
+ if(!id_swizzle)
+ sse2_pshuflw(p->func, dataXMM, dataXMM, (swizzle[0] & 3) | ((swizzle[1] & 3) << 2) | ((swizzle[2] & 3) << 4) | ((swizzle[3] & 3) << 6));
+ }
+ if(output_desc->nr_channels >= 4
+ && swizzle[0] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[1] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[2] < UTIL_FORMAT_SWIZZLE_0
+ && swizzle[3] < UTIL_FORMAT_SWIZZLE_0
+ )
+ sse2_movq(p->func, dst, dataXMM);
+ else
+ {
+ if(swizzle[0] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] < UTIL_FORMAT_SWIZZLE_0)
+ sse2_movd(p->func, dst, dataXMM);
+ else
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, dst, tmp);
+ if(output_desc->nr_channels >= 2)
+ x86_mov16_imm(p->func, x86_make_disp(dst, 2), imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 2 && swizzle[1] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, dst, (imms[swizzle[1] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, dst, imms[swizzle[0] - UTIL_FORMAT_SWIZZLE_0]);
+ if(output_desc->nr_channels >= 2)
+ {
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_shr_imm(p->func, tmp, 16);
+ x86_mov16(p->func, x86_make_disp(dst, 2), tmp);
+ }
+ }
+ }
+ if(output_desc->nr_channels >= 3)
+ {
+ if(swizzle[2] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] < UTIL_FORMAT_SWIZZLE_0)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, x86_make_disp(dst, 4), dataXMM);
+ }
+ else
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 32);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 4), tmp);
+ if(output_desc->nr_channels >= 4)
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 6), imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0]);
+ }
+ }
+ }
+ else
+ {
+ if(output_desc->nr_channels >= 4 && swizzle[3] >= UTIL_FORMAT_SWIZZLE_0)
+ x86_mov_imm(p->func, x86_make_disp(dst, 4), (imms[swizzle[3] - UTIL_FORMAT_SWIZZLE_0] << 16) | imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+ else
+ {
+ x86_mov16_imm(p->func, x86_make_disp(dst, 4), imms[swizzle[2] - UTIL_FORMAT_SWIZZLE_0]);
+
+ if(output_desc->nr_channels >= 4)
+ {
+ sse2_psrlq_imm(p->func, dataXMM, 48);
+ sse2_movd(p->func, tmp, dataXMM);
+ x86_mov16(p->func, x86_make_disp(dst, 6), tmp);
+ }
+ }
+ }
+ }
+ }
+ return TRUE;
+ }
+ else if(!memcmp(&output_desc->channel[0], &input_desc->channel[0], sizeof(output_desc->channel[0])))
+ {
+ struct x86_reg tmp = p->tmp_EAX;
+ unsigned i;
+ if(input_desc->channel[0].size == 8 && input_desc->nr_channels == 4 && output_desc->nr_channels == 4
+ && swizzle[0] == UTIL_FORMAT_SWIZZLE_W
+ && swizzle[1] == UTIL_FORMAT_SWIZZLE_Z
+ && swizzle[2] == UTIL_FORMAT_SWIZZLE_Y
+ && swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ {
+ /* TODO: support movbe */
+ x86_mov(p->func, tmp, src);
+ x86_bswap(p->func, tmp);
+ x86_mov(p->func, dst, tmp);
+ return TRUE;
+ }
-/* Extended swizzles? Maybe later.
- */
-static void emit_swizzle( struct translate_sse *p,
- struct x86_reg dest,
- struct x86_reg src,
- unsigned char shuffle )
-{
- sse_shufps(p->func, dest, src, shuffle);
-}
+ for(i = 0; i < output_desc->nr_channels; ++i)
+ {
+ switch(output_desc->channel[0].size)
+ {
+ case 8:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[0].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[0].normalized ? 0xff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[0].normalized ? 0x7f : 1;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov8_imm(p->func, x86_make_disp(dst, i * 1), v);
+ }
+ else
+ {
+ x86_mov8(p->func, tmp, x86_make_disp(src, swizzle[i] * 1));
+ x86_mov8(p->func, x86_make_disp(dst, i * 1), tmp);
+ }
+ break;
+ case 16:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3c00;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), v);
+ }
+ else if(swizzle[i] == UTIL_FORMAT_SWIZZLE_0)
+ x86_mov16_imm(p->func, x86_make_disp(dst, i * 2), 0);
+ else
+ {
+ x86_mov16(p->func, tmp, x86_make_disp(src, swizzle[i] * 2));
+ x86_mov16(p->func, x86_make_disp(dst, i * 2), tmp);
+ }
+ break;
+ case 32:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned v = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ v = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ v = output_desc->channel[1].normalized ? 0x7fffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ v = 0x3f800000;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 4), v);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 4), tmp);
+ }
+ break;
+ case 64:
+ if(swizzle[i] >= UTIL_FORMAT_SWIZZLE_0)
+ {
+ unsigned l = 0;
+ unsigned h = 0;
+ if(swizzle[i] == UTIL_FORMAT_SWIZZLE_1)
+ {
+ switch(output_desc->channel[1].type)
+ {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ h = output_desc->channel[1].normalized ? 0xffffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ h = output_desc->channel[1].normalized ? 0x7fffffff : 0;
+ l = output_desc->channel[1].normalized ? 0xffffffff : 1;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ h = 0x3ff00000;
+ l = 0;
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8), l);
+ x86_mov_imm(p->func, x86_make_disp(dst, i * 8 + 4), h);
+ }
+ else
+ {
+ if(x86_target_caps(p->func) & X86_SSE)
+ {
+ struct x86_reg tmpXMM = x86_make_reg(file_XMM, 0);
+ emit_load64(p, tmp, tmpXMM, x86_make_disp(src, swizzle[i] * 8));
+ emit_store64(p, x86_make_disp(dst, i * 8), tmp, tmpXMM);
+ }
+ else
+ {
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8));
+ x86_mov(p->func, x86_make_disp(dst, i * 8), tmp);
+ x86_mov(p->func, tmp, x86_make_disp(src, swizzle[i] * 8 + 4));
+ x86_mov(p->func, x86_make_disp(dst, i * 8 + 4), tmp);
+ }
+ }
+ break;
+ default:
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+ /* special case for draw's EMIT_4UB (RGBA) and EMIT_4UB_BGRA */
+ else if((x86_target_caps(p->func) & X86_SSE2) &&
+ a->input_format == PIPE_FORMAT_R32G32B32A32_FLOAT && (0
+ || a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM
+ || a->output_format == PIPE_FORMAT_R8G8B8A8_UNORM
+ ))
+ {
+ struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ /* load */
+ sse_movups(p->func, dataXMM, src);
-static boolean translate_attr( struct translate_sse *p,
- const struct translate_element *a,
- struct x86_reg srcECX,
- struct x86_reg dstEAX)
-{
- struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
+ if (a->output_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+ sse_shufps(p->func, dataXMM, dataXMM, SHUF(2,1,0,3));
- switch (a->input_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_load_R32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_load_R32G32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_load_R32G32B32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_load_R32G32B32A32(p, dataXMM, srcECX);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
- break;
- default:
- return FALSE;
- }
+ /* scale by 255.0 */
+ sse_mulps(p->func, dataXMM, get_const(p, CONST_255));
- switch (a->output_format) {
- case PIPE_FORMAT_R32_FLOAT:
- emit_store_R32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32_FLOAT:
- emit_store_R32G32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- emit_store_R32G32B32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- emit_store_R32G32B32A32(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
- break;
- default:
- return FALSE;
+ /* pack and emit */
+ sse2_cvtps2dq(p->func, dataXMM, dataXMM);
+ sse2_packssdw(p->func, dataXMM, dataXMM);
+ sse2_packuswb(p->func, dataXMM, dataXMM);
+ sse2_movd(p->func, dst, dataXMM);
+
+ return TRUE;
}
- return TRUE;
+ return FALSE;
}
+static boolean translate_attr( struct translate_sse *p,
+ const struct translate_element *a,
+ struct x86_reg src,
+ struct x86_reg dst)
+{
+ if(a->input_format == a->output_format)
+ {
+ emit_memcpy(p, dst, src, util_format_get_stride(a->input_format, 1));
+ return TRUE;
+ }
+
+ return translate_attr_convert(p, a, src, dst);
+}
static boolean init_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
unsigned i;
- struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
+ struct x86_reg instance_id = x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
- if (linear || varient->instance_divisor) {
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ if (!index_size || varient->instance_divisor) {
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->stride));
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &buffer->base_ptr));
- struct x86_reg elt = p->idx_EBX;
+ struct x86_reg elt = p->idx_ESI;
struct x86_reg tmp_EAX = p->tmp_EAX;
/* Calculate pointer to first attrib:
@@ -406,20 +1085,16 @@ static boolean init_inputs( struct translate_sse *p,
x86_mov(p->func, tmp_EAX, instance_id);
if (varient->instance_divisor != 1) {
- struct x86_reg tmp_EDX = p->machine_EDX;
- struct x86_reg tmp_ECX = p->outbuf_ECX;
+ struct x86_reg tmp_EDX = p->tmp2_EDX;
+ struct x86_reg tmp_ECX = p->src_ECX;
/* TODO: Add x86_shr() to rtasm and use it whenever
* instance divisor is power of two.
*/
- x86_push(p->func, tmp_EDX);
- x86_push(p->func, tmp_ECX);
x86_xor(p->func, tmp_EDX, tmp_EDX);
x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
- x86_pop(p->func, tmp_ECX);
- x86_pop(p->func, tmp_EDX);
}
} else {
x86_mov(p->func, tmp_EAX, elt);
@@ -430,16 +1105,23 @@ static boolean init_inputs( struct translate_sse *p,
*/
x86_imul(p->func, tmp_EAX, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, tmp_EAX, buf_base_ptr);
/* In the linear case, keep the buffer pointer instead of the
* index number.
*/
- if (linear && p->nr_buffer_varients == 1)
+ if (!index_size && p->nr_buffer_varients == 1)
+ {
+ x64_rexw(p->func);
x86_mov(p->func, elt, tmp_EAX);
+ }
else
+ {
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, tmp_EAX);
+ }
}
}
@@ -448,44 +1130,57 @@ static boolean init_inputs( struct translate_sse *p,
static struct x86_reg get_buffer_ptr( struct translate_sse *p,
- boolean linear,
+ unsigned index_size,
unsigned var_idx,
struct x86_reg elt )
{
if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
- return x86_make_disp(p->machine_EDX,
+ return x86_make_disp(p->machine_EDI,
get_offset(p, &p->instance_id));
}
- if (linear && p->nr_buffer_varients == 1) {
- return p->idx_EBX;
+ if (!index_size && p->nr_buffer_varients == 1) {
+ return p->idx_ESI;
}
- else if (linear || p->buffer_varient[var_idx].instance_divisor) {
- struct x86_reg ptr = p->tmp_EAX;
+ else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
+ struct x86_reg ptr = p->src_ECX;
struct x86_reg buf_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer_varient[var_idx].ptr));
+ x64_rexw(p->func);
x86_mov(p->func, ptr, buf_ptr);
return ptr;
}
else {
- struct x86_reg ptr = p->tmp_EAX;
+ struct x86_reg ptr = p->src_ECX;
const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
struct x86_reg buf_stride =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
struct x86_reg buf_base_ptr =
- x86_make_disp(p->machine_EDX,
+ x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
/* Calculate pointer to current attrib:
*/
- x86_mov(p->func, ptr, buf_stride);
- x86_imul(p->func, ptr, elt);
+ switch(index_size)
+ {
+ case 1:
+ x86_movzx8(p->func, ptr, elt);
+ break;
+ case 2:
+ x86_movzx16(p->func, ptr, elt);
+ break;
+ case 4:
+ x86_mov(p->func, ptr, elt);
+ break;
+ }
+ x86_imul(p->func, ptr, buf_stride);
+ x64_rexw(p->func);
x86_add(p->func, ptr, buf_base_ptr);
return ptr;
}
@@ -494,39 +1189,43 @@ static struct x86_reg get_buffer_ptr( struct translate_sse *p,
static boolean incr_inputs( struct translate_sse *p,
- boolean linear )
+ unsigned index_size )
{
- if (linear && p->nr_buffer_varients == 1) {
- struct x86_reg stride = x86_make_disp(p->machine_EDX,
+ if (!index_size && p->nr_buffer_varients == 1) {
+ struct x86_reg stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[0].stride));
if (p->buffer_varient[0].instance_divisor == 0) {
- x86_add(p->func, p->idx_EBX, stride);
- sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
+ x64_rexw(p->func);
+ x86_add(p->func, p->idx_ESI, stride);
+ sse_prefetchnta(p->func, x86_make_disp(p->idx_ESI, 192));
}
}
- else if (linear) {
+ else if (!index_size) {
unsigned i;
/* Is this worthwhile??
*/
for (i = 0; i < p->nr_buffer_varients; i++) {
struct translate_buffer_varient *varient = &p->buffer_varient[i];
- struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_ptr = x86_make_disp(p->machine_EDI,
get_offset(p, &varient->ptr));
- struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
+ struct x86_reg buf_stride = x86_make_disp(p->machine_EDI,
get_offset(p, &p->buffer[varient->buffer_index].stride));
if (varient->instance_divisor == 0) {
- x86_mov(p->func, p->tmp_EAX, buf_ptr);
- x86_add(p->func, p->tmp_EAX, buf_stride);
+ x86_mov(p->func, p->tmp_EAX, buf_stride);
+ x64_rexw(p->func);
+ x86_add(p->func, p->tmp_EAX, buf_ptr);
if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
+ x64_rexw(p->func);
x86_mov(p->func, buf_ptr, p->tmp_EAX);
}
}
}
else {
- x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
+ x64_rexw(p->func);
+ x86_lea(p->func, p->idx_ESI, x86_make_disp(p->idx_ESI, index_size));
}
return TRUE;
@@ -551,35 +1250,52 @@ static boolean incr_inputs( struct translate_sse *p,
*/
static boolean build_vertex_emit( struct translate_sse *p,
struct x86_function *func,
- boolean linear )
+ unsigned index_size )
{
int fixup, label;
unsigned j;
+ memset(p->reg_to_const, 0xff, sizeof(p->reg_to_const));
+ memset(p->const_to_reg, 0xff, sizeof(p->const_to_reg));
+
p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
- p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
- p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
- p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
- p->count_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->idx_ESI = x86_make_reg(file_REG32, reg_SI);
+ p->outbuf_EBX = x86_make_reg(file_REG32, reg_BX);
+ p->machine_EDI = x86_make_reg(file_REG32, reg_DI);
+ p->count_EBP = x86_make_reg(file_REG32, reg_BP);
+ p->tmp2_EDX = x86_make_reg(file_REG32, reg_DX);
+ p->src_ECX = x86_make_reg(file_REG32, reg_CX);
p->func = func;
- p->loaded_inv_255 = FALSE;
- p->loaded_255 = FALSE;
- p->loaded_identity = FALSE;
x86_init_func(p->func);
- /* Push a few regs?
- */
- x86_push(p->func, p->idx_EBX);
- x86_push(p->func, p->count_ESI);
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ /* the ABI guarantees a 16-byte aligned 32-byte "shadow space" above the return address */
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8), x86_make_reg(file_XMM, 6));
+ sse2_movdqa(p->func, x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24), x86_make_reg(file_XMM, 7));
+ }
- /* Load arguments into regs:
- */
- x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
- x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
- x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
- x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
+ x86_push(p->func, p->outbuf_EBX);
+ x86_push(p->func, p->count_EBP);
+
+/* on non-Win64 x86-64, these are already in the right registers */
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_push(p->func, p->machine_EDI);
+ x86_push(p->func, p->idx_ESI);
+
+ x86_mov(p->func, p->machine_EDI, x86_fn_arg(p->func, 1));
+ x86_mov(p->func, p->idx_ESI, x86_fn_arg(p->func, 2));
+ }
+
+ x86_mov(p->func, p->count_EBP, x86_fn_arg(p->func, 3));
+
+ if(x86_target(p->func) != X86_32)
+ x64_mov64(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
+ else
+ x86_mov(p->func, p->outbuf_EBX, x86_fn_arg(p->func, 5));
/* Load instance ID.
*/
@@ -588,25 +1304,25 @@ static boolean build_vertex_emit( struct translate_sse *p,
p->tmp_EAX,
x86_fn_arg(p->func, 4));
x86_mov(p->func,
- x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
+ x86_make_disp(p->machine_EDI, get_offset(p, &p->instance_id)),
p->tmp_EAX);
}
/* Get vertex count, compare to zero
*/
x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
- x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
+ x86_cmp(p->func, p->count_EBP, p->tmp_EAX);
fixup = x86_jcc_forward(p->func, cc_E);
/* always load, needed or not:
*/
- init_inputs(p, linear);
+ init_inputs(p, index_size);
/* Note address for loop jump
*/
label = x86_get_label(p->func);
{
- struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
+ struct x86_reg elt = !index_size ? p->idx_ESI : x86_deref(p->idx_ESI);
int last_varient = -1;
struct x86_reg vb;
@@ -618,30 +1334,31 @@ static boolean build_vertex_emit( struct translate_sse *p,
*/
if (varient != last_varient) {
last_varient = varient;
- vb = get_buffer_ptr(p, linear, varient, elt);
+ vb = get_buffer_ptr(p, index_size, varient, elt);
}
if (!translate_attr( p, a,
x86_make_disp(vb, a->input_offset),
- x86_make_disp(p->outbuf_ECX, a->output_offset)))
+ x86_make_disp(p->outbuf_EBX, a->output_offset)))
return FALSE;
}
/* Next output vertex:
*/
+ x64_rexw(p->func);
x86_lea(p->func,
- p->outbuf_ECX,
- x86_make_disp(p->outbuf_ECX,
+ p->outbuf_EBX,
+ x86_make_disp(p->outbuf_EBX,
p->translate.key.output_stride));
/* Incr index
*/
- incr_inputs( p, linear );
+ incr_inputs( p, index_size );
}
/* decr count, loop if not zero
*/
- x86_dec(p->func, p->count_ESI);
+ x86_dec(p->func, p->count_EBP);
x86_jcc(p->func, cc_NZ, label);
/* Exit mmx state?
@@ -656,8 +1373,20 @@ static boolean build_vertex_emit( struct translate_sse *p,
/* Pop regs and return
*/
- x86_pop(p->func, p->count_ESI);
- x86_pop(p->func, p->idx_EBX);
+ if(x86_target(p->func) != X86_64_STD_ABI)
+ {
+ x86_pop(p->func, p->idx_ESI);
+ x86_pop(p->func, p->machine_EDI);
+ }
+
+ x86_pop(p->func, p->count_EBP);
+ x86_pop(p->func, p->outbuf_EBX);
+
+ if(x86_target(p->func) == X86_64_WIN64_ABI)
+ {
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 6), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 8));
+ sse2_movdqa(p->func, x86_make_reg(file_XMM, 7), x86_make_disp(x86_make_reg(file_REG32, reg_SP), 24));
+ }
x86_ret(p->func);
return TRUE;
@@ -697,37 +1426,7 @@ static void translate_sse_release( struct translate *translate )
x86_release_func( &p->linear_func );
x86_release_func( &p->elt_func );
- FREE(p);
-}
-
-static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
- const unsigned *elts,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run_elts( translate,
- elts,
- count,
- instance_id,
- output_buffer);
-}
-
-static void PIPE_CDECL translate_sse_run( struct translate *translate,
- unsigned start,
- unsigned count,
- unsigned instance_id,
- void *output_buffer )
-{
- struct translate_sse *p = (struct translate_sse *)translate;
-
- p->gen_run( translate,
- start,
- count,
- instance_id,
- output_buffer);
+ os_free_aligned(p);
}
@@ -736,18 +1435,19 @@ struct translate *translate_sse2_create( const struct translate_key *key )
struct translate_sse *p = NULL;
unsigned i;
- if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
+ /* this is misnamed, it actually refers to whether rtasm is enabled or not */
+ if (!rtasm_cpu_has_sse())
goto fail;
- p = CALLOC_STRUCT( translate_sse );
+ p = os_malloc_aligned(sizeof(struct translate_sse), 16);
if (p == NULL)
goto fail;
+ memset(p, 0, sizeof(*p));
+ memcpy(p->consts, consts, sizeof(consts));
p->translate.key = *key;
p->translate.release = translate_sse_release;
p->translate.set_buffer = translate_sse_set_buffer;
- p->translate.run_elts = translate_sse_run_elts;
- p->translate.run = translate_sse_run;
for (i = 0; i < key->nr_elements; i++) {
if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
@@ -783,18 +1483,32 @@ struct translate *translate_sse2_create( const struct translate_key *key )
if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
- if (!build_vertex_emit(p, &p->linear_func, TRUE))
+ if (!build_vertex_emit(p, &p->linear_func, 0))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt_func, 4))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt16_func, 2))
+ goto fail;
+
+ if (!build_vertex_emit(p, &p->elt8_func, 1))
+ goto fail;
+
+ p->translate.run = (void*)x86_get_func(&p->linear_func);
+ if (p->translate.run == NULL)
goto fail;
- if (!build_vertex_emit(p, &p->elt_func, FALSE))
+ p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
+ if (p->translate.run_elts == NULL)
goto fail;
- p->gen_run = (run_func)x86_get_func(&p->linear_func);
- if (p->gen_run == NULL)
+ p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
+ if (p->translate.run_elts16 == NULL)
goto fail;
- p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
- if (p->gen_run_elts == NULL)
+ p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
+ if (p->translate.run_elts8 == NULL)
goto fail;
return &p->translate;
diff --git a/src/gallium/auxiliary/util/u_bitmask.h b/src/gallium/auxiliary/util/u_bitmask.h
index 87f1110296..98b85ddecd 100644
--- a/src/gallium/auxiliary/util/u_bitmask.h
+++ b/src/gallium/auxiliary/util/u_bitmask.h
@@ -36,6 +36,9 @@
#define U_HANDLE_BITMASK_H_
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 97fa99ec65..dfb142b9e1 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -42,6 +42,7 @@
#include "util/u_blit.h"
#include "util/u_draw_quad.h"
+#include "util/u_format.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sampler.h"
@@ -56,15 +57,18 @@ struct blit_state
struct cso_context *cso;
struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depthstencil;
+ struct pipe_depth_stencil_alpha_state depthstencil_keep;
+ struct pipe_depth_stencil_alpha_state depthstencil_write;
struct pipe_rasterizer_state rasterizer;
struct pipe_sampler_state sampler;
struct pipe_viewport_state viewport;
struct pipe_clip_state clip;
struct pipe_vertex_element velem[2];
+ enum pipe_texture_target internal_target;
void *vs;
void *fs[TGSI_WRITEMASK_XYZW + 1];
+ void *fs_depth;
struct pipe_resource *vbuf; /**< quad vertices */
unsigned vbuf_slot;
@@ -95,7 +99,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->blend.rt[0].colormask = PIPE_MASK_RGBA;
/* no-op depth/stencil/alpha */
- memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
+ memset(&ctx->depthstencil_keep, 0, sizeof(ctx->depthstencil_keep));
+ memset(&ctx->depthstencil_write, 0, sizeof(ctx->depthstencil_write));
+ ctx->depthstencil_write.depth.enabled = 1;
+ ctx->depthstencil_write.depth.writemask = 1;
+ ctx->depthstencil_write.depth.func = PIPE_FUNC_ALWAYS;
/* rasterizer */
memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
@@ -110,7 +118,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
ctx->sampler.min_img_filter = 0; /* set later */
ctx->sampler.mag_img_filter = 0; /* set later */
- ctx->sampler.normalized_coords = 1;
/* vertex elements state */
memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
@@ -145,6 +152,11 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso)
ctx->vertices[i][1][3] = 1.0f; /* q */
}
+ if(pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
+ ctx->internal_target = PIPE_TEXTURE_2D;
+ else
+ ctx->internal_target = PIPE_TEXTURE_RECT;
+
return ctx;
}
@@ -164,6 +176,9 @@ util_destroy_blit(struct blit_state *ctx)
if (ctx->fs[i])
pipe->delete_fs_state(pipe, ctx->fs[i]);
+ if (ctx->fs_depth)
+ pipe->delete_fs_state(pipe, ctx->fs_depth);
+
pipe_resource_reference(&ctx->vbuf, NULL);
FREE(ctx);
@@ -271,7 +286,7 @@ regions_overlap(int srcX0, int srcY0,
* \param writemask controls which channels in the dest surface are sourced
* from the src surface. Disabled channels are sourced
* from (0,0,0,1).
- * XXX need some control over blitting Z and/or stencil.
+ * XXX need some control over blitting stencil.
*/
void
util_blit_pixels_writemask(struct blit_state *ctx,
@@ -294,8 +309,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
const int srcW = abs(srcX1 - srcX0);
const int srcH = abs(srcY1 - srcY0);
unsigned offset;
- boolean overlap;
+ boolean overlap, dst_is_depth;
float s0, t0, s1, t1;
+ boolean normalized;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
@@ -335,7 +351,6 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
-
/* Create a temporary texture when src and dest alias or when src
* is anything other than a 2d texture.
* XXX should just use appropriate shader to access 1d / 3d slice / cube face,
@@ -347,7 +362,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
dst->face == srcsub.face &&
dst->level == srcsub.level &&
dst->zslice == srcZ0) ||
- src_tex->target != PIPE_TEXTURE_2D)
+ (src_tex->target != PIPE_TEXTURE_2D &&
+ src_tex->target != PIPE_TEXTURE_RECT))
{
struct pipe_resource texTemp;
struct pipe_resource *tex;
@@ -372,7 +388,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* create temp texture */
memset(&texTemp, 0, sizeof(texTemp));
- texTemp.target = PIPE_TEXTURE_2D;
+ texTemp.target = ctx->internal_target;
texTemp.format = src_tex->format;
texTemp.last_level = 0;
texTemp.width0 = srcW;
@@ -392,10 +408,19 @@ util_blit_pixels_writemask(struct blit_state *ctx,
src_tex, srcsub, srcLeft, srcTop, srcZ0, /* src */
srcW, srcH); /* size */
- s0 = 0.0f;
- s1 = 1.0f;
- t0 = 0.0f;
- t1 = 1.0f;
+ normalized = tex->target != PIPE_TEXTURE_RECT;
+ if(normalized) {
+ s0 = 0.0f;
+ s1 = 1.0f;
+ t0 = 0.0f;
+ t1 = 1.0f;
+ }
+ else {
+ s0 = 0;
+ s1 = srcW;
+ t0 = 0;
+ t1 = srcH;
+ }
u_sampler_view_default_template(&sv_templ, tex, tex->format);
sampler_view = pipe->create_sampler_view(pipe, tex, &sv_templ);
@@ -415,20 +440,29 @@ util_blit_pixels_writemask(struct blit_state *ctx,
return;
}
- s0 = srcX0 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
- s1 = srcX1 / (float)(u_minify(sampler_view->texture->width0, srcsub.level));
- t0 = srcY0 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
- t1 = srcY1 / (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+ normalized = sampler_view->texture->target != PIPE_TEXTURE_RECT;
+ if(normalized)
+ {
+ s0 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ s1 /= (float)(u_minify(sampler_view->texture->width0, srcsub.level));
+ t0 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ t1 /= (float)(u_minify(sampler_view->texture->height0, srcsub.level));
+ }
}
+ dst_is_depth = util_format_is_depth_or_stencil(dst->format);
- assert(screen->is_format_supported(screen, sampler_view->format, PIPE_TEXTURE_2D,
+ assert(screen->is_format_supported(screen, sampler_view->format, ctx->internal_target,
sampler_view->texture->nr_samples,
PIPE_BIND_SAMPLER_VIEW, 0));
- assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
+ assert(screen->is_format_supported(screen, dst->format, ctx->internal_target,
dst->texture->nr_samples,
- PIPE_BIND_RENDER_TARGET, 0));
-
+ dst_is_depth ? PIPE_BIND_DEPTH_STENCIL :
+ PIPE_BIND_RENDER_TARGET, 0));
/* save state (restored below) */
cso_save_blend(ctx->cso);
cso_save_depth_stencil_alpha(ctx->cso);
@@ -444,12 +478,15 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso,
+ dst_is_depth ? &ctx->depthstencil_write :
+ &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
/* we've limited this already with the sampler view but you never know... */
@@ -472,22 +509,35 @@ util_blit_pixels_writemask(struct blit_state *ctx,
/* texture */
cso_set_fragment_sampler_views(ctx->cso, 1, &sampler_view);
- if (ctx->fs[writemask] == NULL)
- ctx->fs[writemask] =
- util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
- TGSI_INTERPOLATE_LINEAR,
- writemask);
-
/* shaders */
- cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ if (dst_is_depth) {
+ if (ctx->fs_depth == NULL)
+ ctx->fs_depth =
+ util_make_fragment_tex_shader_writedepth(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth);
+ } else {
+ if (ctx->fs[writemask] == NULL)
+ ctx->fs[writemask] =
+ util_make_fragment_tex_shader_writemask(pipe, TGSI_TEXTURE_2D,
+ TGSI_INTERPOLATE_LINEAR,
+ writemask);
+
+ cso_set_fragment_shader_handle(ctx->cso, ctx->fs[writemask]);
+ }
cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
/* drawing dest */
memset(&fb, 0, sizeof(fb));
fb.width = dst->width;
fb.height = dst->height;
- fb.nr_cbufs = 1;
- fb.cbufs[0] = dst;
+ if (dst_is_depth) {
+ fb.zsbuf = dst;
+ } else {
+ fb.nr_cbufs = 1;
+ fb.cbufs[0] = dst;
+ }
cso_set_framebuffer(ctx->cso, &fb);
/* draw quad */
@@ -574,6 +624,7 @@ util_blit_pixels_tex(struct blit_state *ctx,
int dstX1, int dstY1,
float z, uint filter)
{
+ boolean normalized = src_sampler_view->texture->target != PIPE_TEXTURE_RECT;
struct pipe_framebuffer_state fb;
float s0, t0, s1, t1;
unsigned offset;
@@ -586,10 +637,18 @@ util_blit_pixels_tex(struct blit_state *ctx,
assert(tex->width0 != 0);
assert(tex->height0 != 0);
- s0 = srcX0 / (float)tex->width0;
- s1 = srcX1 / (float)tex->width0;
- t0 = srcY0 / (float)tex->height0;
- t1 = srcY1 / (float)tex->height0;
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+
+ if(normalized)
+ {
+ s0 /= (float)tex->width0;
+ s1 /= (float)tex->width0;
+ t0 /= (float)tex->height0;
+ t1 /= (float)tex->height0;
+ }
assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
PIPE_TEXTURE_2D,
@@ -611,12 +670,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
/* set misc state we care about */
cso_set_blend(ctx->cso, &ctx->blend);
- cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
+ cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil_keep);
cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
cso_set_clip(ctx->cso, &ctx->clip);
cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
/* sampler */
+ ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
cso_single_sampler(ctx->cso, 0, &ctx->sampler);
diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h
index ef95134f32..b8a0dfce13 100644
--- a/src/gallium/auxiliary/util/u_blit.h
+++ b/src/gallium/auxiliary/util/u_blit.h
@@ -30,18 +30,20 @@
#define U_BLIT_H
+#include "pipe/p_compiler.h"
+
+
#ifdef __cplusplus
extern "C" {
#endif
+struct cso_context;
struct pipe_context;
-struct pipe_surface;
struct pipe_resource;
-struct cso_context;
-
-
-struct blit_state;
+struct pipe_sampler_view;
+struct pipe_subresource;
+struct pipe_surface;
extern struct blit_state *
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b5b86b7214..f93ef26ae7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -92,7 +92,7 @@ struct blitter_context_priv
void *velem_state;
/* Sampler state for clamping to a miplevel. */
- void *sampler_state[PIPE_MAX_TEXTURE_LEVELS];
+ void *sampler_state[PIPE_MAX_TEXTURE_LEVELS * 2];
/* Rasterizer state. */
void *rs_state;
@@ -254,6 +254,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
ctx->dsa_write_depth_keep_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
+ pipe->delete_depth_stencil_alpha_state(pipe, ctx->dsa_flush_depth_stencil);
pipe->delete_rasterizer_state(pipe, ctx->rs_state);
pipe->delete_vs_state(pipe, ctx->vs_col);
@@ -271,7 +272,7 @@ void util_blitter_destroy(struct blitter_context *blitter)
if (ctx->fs_col[i])
pipe->delete_fs_state(pipe, ctx->fs_col[i]);
- for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++)
+ for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS * 2; i++)
if (ctx->sampler_state[i])
pipe->delete_sampler_state(pipe, ctx->sampler_state[i]);
@@ -319,7 +320,7 @@ static void blitter_restore_CSOs(struct blitter_context_priv *ctx)
*/
if (ctx->base.saved_fb_state.nr_cbufs != ~0) {
pipe->set_framebuffer_state(pipe, &ctx->base.saved_fb_state);
- util_assign_framebuffer_state(&ctx->base.saved_fb_state, NULL);
+ util_unreference_framebuffer_state(&ctx->base.saved_fb_state);
ctx->base.saved_fb_state.nr_cbufs = ~0;
}
@@ -417,16 +418,26 @@ static void blitter_set_clear_color(struct blitter_context_priv *ctx,
}
}
-static void get_normalized_texcoords(struct pipe_resource *src,
+static void get_texcoords(struct pipe_resource *src,
struct pipe_subresource subsrc,
unsigned x1, unsigned y1,
unsigned x2, unsigned y2,
- float out[4])
+ boolean normalized, float out[4])
{
- out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
- out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
- out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
- out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+ if(normalized)
+ {
+ out[0] = x1 / (float)u_minify(src->width0, subsrc.level);
+ out[1] = y1 / (float)u_minify(src->height0, subsrc.level);
+ out[2] = x2 / (float)u_minify(src->width0, subsrc.level);
+ out[3] = y2 / (float)u_minify(src->height0, subsrc.level);
+ }
+ else
+ {
+ out[0] = x1;
+ out[1] = y1;
+ out[2] = x2;
+ out[3] = y2;
+ }
}
static void set_texcoords_in_vertices(const float coord[4],
@@ -454,7 +465,7 @@ static void blitter_set_texcoords_2d(struct blitter_context_priv *ctx,
unsigned i;
float coord[4];
- get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &ctx->vertices[0][1][0], 8);
for (i = 0; i < 4; i++) {
@@ -489,7 +500,7 @@ static void blitter_set_texcoords_cube(struct blitter_context_priv *ctx,
float coord[4];
float st[4][2];
- get_normalized_texcoords(src, subsrc, x1, y1, x2, y2, coord);
+ get_texcoords(src, subsrc, x1, y1, x2, y2, TRUE, coord);
set_texcoords_in_vertices(coord, &st[0][0], 2);
util_map_texcoords2d_onto_cubemap(subsrc.face,
@@ -523,7 +534,7 @@ static void blitter_draw_quad(struct blitter_context_priv *ctx)
static INLINE
void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
- int miplevel)
+ int miplevel, boolean normalized)
{
struct pipe_context *pipe = ctx->base.pipe;
struct pipe_sampler_state *sampler_state = &ctx->template_sampler_state;
@@ -531,18 +542,19 @@ void **blitter_get_sampler_state(struct blitter_context_priv *ctx,
assert(miplevel < PIPE_MAX_TEXTURE_LEVELS);
/* Create the sampler state on-demand. */
- if (!ctx->sampler_state[miplevel]) {
+ if (!ctx->sampler_state[miplevel * 2 + normalized]) {
sampler_state->lod_bias = miplevel;
sampler_state->min_lod = miplevel;
sampler_state->max_lod = miplevel;
+ sampler_state->normalized_coords = normalized;
- ctx->sampler_state[miplevel] = pipe->create_sampler_state(pipe,
+ ctx->sampler_state[miplevel * 2 + normalized] = pipe->create_sampler_state(pipe,
sampler_state);
}
/* Return void** so that it can be passed to bind_fragment_sampler_states
* directly. */
- return &ctx->sampler_state[miplevel];
+ return &ctx->sampler_state[miplevel * 2 + normalized];
}
static INLINE
@@ -568,6 +580,8 @@ pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target)
return TGSI_TEXTURE_1D;
case PIPE_TEXTURE_2D:
return TGSI_TEXTURE_2D;
+ case PIPE_TEXTURE_RECT:
+ return TGSI_TEXTURE_RECT;
case PIPE_TEXTURE_3D:
return TGSI_TEXTURE_3D;
case PIPE_TEXTURE_CUBE:
@@ -716,6 +730,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
struct pipe_sampler_view viewTempl, *view;
unsigned bind;
boolean is_stencil, is_depth;
+ boolean normalized;
/* Give up if textures are not set. */
assert(dst && src);
@@ -787,6 +802,8 @@ void util_blitter_copy_region(struct blitter_context *blitter,
fb_state.zsbuf = 0;
}
+ normalized = src->target != PIPE_TEXTURE_RECT;
+
/* Initialize sampler view. */
u_sampler_view_default_template(&viewTempl, src, src->format);
view = pipe->create_sampler_view(pipe, src, &viewTempl);
@@ -795,7 +812,7 @@ void util_blitter_copy_region(struct blitter_context *blitter,
pipe->bind_rasterizer_state(pipe, ctx->rs_state);
pipe->bind_vs_state(pipe, ctx->vs_tex);
pipe->bind_fragment_sampler_states(pipe, 1,
- blitter_get_sampler_state(ctx, subsrc.level));
+ blitter_get_sampler_state(ctx, subsrc.level, normalized));
pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
pipe->set_fragment_sampler_views(pipe, 1, &view);
pipe->set_framebuffer_state(pipe, &fb_state);
@@ -806,11 +823,12 @@ void util_blitter_copy_region(struct blitter_context *blitter,
/* Draw the quad with the draw_rectangle callback. */
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
{
/* Set texture coordinates. */
float coord[4];
- get_normalized_texcoords(src, subsrc, srcx, srcy,
- srcx+width, srcy+height, coord);
+ get_texcoords(src, subsrc, srcx, srcy,
+ srcx+width, srcy+height, normalized, coord);
/* Draw. */
blitter->draw_rectangle(blitter, dstx, dsty, dstx+width, dsty+height, 0,
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index f316587dea..e33d2e283f 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -27,6 +27,7 @@
#ifndef U_BLITTER_H
#define U_BLITTER_H
+#include "util/u_framebuffer.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
@@ -258,45 +259,12 @@ void util_blitter_save_vertex_shader(struct blitter_context *blitter,
blitter->saved_vs = vs;
}
-/* XXX This should probably be moved elsewhere. */
-static INLINE
-void util_assign_framebuffer_state(struct pipe_framebuffer_state *dst,
- const struct pipe_framebuffer_state *src)
-{
- unsigned i;
-
- if (src) {
- /* Reference all surfaces. */
- for (i = 0; i < src->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]);
- }
- for (; i < dst->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], NULL);
- }
-
- pipe_surface_reference(&dst->zsbuf, src->zsbuf);
-
- dst->nr_cbufs = src->nr_cbufs;
- dst->width = src->width;
- dst->height = src->height;
- } else {
- /* Set all surfaces to NULL. */
- for (i = 0; i < dst->nr_cbufs; i++) {
- pipe_surface_reference(&dst->cbufs[i], NULL);
- }
-
- pipe_surface_reference(&dst->zsbuf, NULL);
-
- dst->nr_cbufs = 0;
- }
-}
-
static INLINE
void util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
{
blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
- util_assign_framebuffer_state(&blitter->saved_fb_state, state);
+ util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
static INLINE
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 5056351307..32519b148b 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -73,7 +73,9 @@
#endif
+#ifdef DEBUG
DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", FALSE)
+#endif
struct util_cpu_caps util_cpu_caps;
@@ -83,61 +85,6 @@ static int has_cpuid(void);
#endif
-#if defined(PIPE_ARCH_X86)
-
-/* The sigill handlers */
-#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
-static void
-sigill_handler_sse(int signal, struct sigcontext sc)
-{
- /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
- * instructions are 3 bytes long. We must increment the instruction
- * pointer manually to avoid repeated execution of the offending
- * instruction.
- *
- * If the SIGILL is caused by a divide-by-zero when unmasked
- * exceptions aren't supported, the SIMD FPU status and control
- * word will be restored at the end of the test, so we don't need
- * to worry about doing it here. Besides, we may not be able to...
- */
- sc.eip += 3;
-
- util_cpu_caps.has_sse=0;
-}
-
-static void
-sigfpe_handler_sse(int signal, struct sigcontext sc)
-{
- if (sc.fpstate->magic != 0xffff) {
- /* Our signal context has the extended FPU state, so reset the
- * divide-by-zero exception mask and clear the divide-by-zero
- * exception bit.
- */
- sc.fpstate->mxcsr |= 0x00000200;
- sc.fpstate->mxcsr &= 0xfffffffb;
- } else {
- /* If we ever get here, we're completely hosed.
- */
- }
-}
-#endif /* PIPE_OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */
-
-#if defined(PIPE_OS_WINDOWS)
-static LONG CALLBACK
-win32_sig_handler_sse(EXCEPTION_POINTERS* ep)
-{
- if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){
- ep->ContextRecord->Eip +=3;
- util_cpu_caps.has_sse=0;
- return EXCEPTION_CONTINUE_EXECUTION;
- }
- return EXCEPTION_CONTINUE_SEARCH;
-}
-#endif /* PIPE_OS_WINDOWS */
-
-#endif /* PIPE_ARCH_X86 */
-
-
#if defined(PIPE_ARCH_PPC) && !defined(PIPE_OS_APPLE)
static jmp_buf __lv_powerpc_jmpbuf;
static volatile sig_atomic_t __lv_powerpc_canjump = 0;
@@ -194,123 +141,8 @@ check_os_altivec_support(void)
}
#endif /* PIPE_ARCH_PPC */
-/* If we're running on a processor that can do SSE, let's see if we
- * are allowed to or not. This will catch 2.4.0 or later kernels that
- * haven't been configured for a Pentium III but are running on one,
- * and RedHat patched 2.2 kernels that have broken exception handling
- * support for user space apps that do SSE.
- */
-#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
-static void
-check_os_katmai_support(void)
-{
-#if defined(PIPE_ARCH_X86)
-#if defined(PIPE_OS_FREEBSD)
- int has_sse=0, ret;
- int len = sizeof (has_sse);
-
- ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0);
- if (ret || !has_sse)
- util_cpu_caps.has_sse=0;
-
-#elif defined(PIPE_OS_NETBSD) || defined(PIPE_OS_OPENBSD)
- int has_sse, has_sse2, ret, mib[2];
- int varlen;
-
- mib[0] = CTL_MACHDEP;
- mib[1] = CPU_SSE;
- varlen = sizeof (has_sse);
-
- ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0);
- if (ret < 0 || !has_sse) {
- util_cpu_caps.has_sse = 0;
- } else {
- util_cpu_caps.has_sse = 1;
- }
-
- mib[1] = CPU_SSE2;
- varlen = sizeof (has_sse2);
- ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0);
- if (ret < 0 || !has_sse2) {
- util_cpu_caps.has_sse2 = 0;
- } else {
- util_cpu_caps.has_sse2 = 1;
- }
- util_cpu_caps.has_sse = 0; /* FIXME ?!?!? */
-
-#elif defined(PIPE_OS_WINDOWS)
- LPTOP_LEVEL_EXCEPTION_FILTER exc_fil;
- if (util_cpu_caps.has_sse) {
- exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse);
-#if defined(PIPE_CC_GCC)
- __asm __volatile ("xorps %xmm0, %xmm0");
-#elif defined(PIPE_CC_MSVC)
- __asm {
- xorps xmm0, xmm0 /* executing SSE instruction */
- }
-#else
-#error Unsupported compiler
-#endif
- SetUnhandledExceptionFilter(exc_fil);
- }
-#elif defined(PIPE_OS_LINUX)
- struct sigaction saved_sigill;
- struct sigaction saved_sigfpe;
-
- /* Save the original signal handlers.
- */
- sigaction(SIGILL, NULL, &saved_sigill);
- sigaction(SIGFPE, NULL, &saved_sigfpe);
-
- signal(SIGILL, (void (*)(int))sigill_handler_sse);
- signal(SIGFPE, (void (*)(int))sigfpe_handler_sse);
-
- /* Emulate test for OSFXSR in CR4. The OS will set this bit if it
- * supports the extended FPU save and restore required for SSE. If
- * we execute an SSE instruction on a PIII and get a SIGILL, the OS
- * doesn't support Streaming SIMD Exceptions, even if the processor
- * does.
- */
- if (util_cpu_caps.has_sse) {
- __asm __volatile ("xorps %xmm1, %xmm0");
- }
-
- /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if
- * it supports unmasked SIMD FPU exceptions. If we unmask the
- * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
- * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE
- * as expected, we're okay but we need to clean up after it.
- *
- * Are we being too stringent in our requirement that the OS support
- * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by
- * setting CR4.OSFXSR but don't support unmasked exceptions. Win98
- * doesn't even support them. We at least know the user-space SSE
- * support is good in kernels that do support unmasked exceptions,
- * and therefore to be safe I'm going to leave this test in here.
- */
- if (util_cpu_caps.has_sse) {
- /* test_os_katmai_exception_support(); */
- }
-
- /* Restore the original signal handlers.
- */
- sigaction(SIGILL, &saved_sigill, NULL);
- sigaction(SIGFPE, &saved_sigfpe, NULL);
-
-#else
- /* We can't use POSIX signal handling to test the availability of
- * SSE, so we disable it by default.
- */
- util_cpu_caps.has_sse = 0;
-#endif /* __linux__ */
-#endif
-
-#if defined(PIPE_ARCH_X86_64)
- util_cpu_caps.has_sse = 1;
-#endif
-}
-
+#if defined(PIPE_ARCH_X86) || defined (PIPE_ARCH_X86_64)
static int has_cpuid(void)
{
#if defined(PIPE_ARCH_X86)
@@ -469,9 +301,6 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = regs2[2] & 0xFF;
}
- if (util_cpu_caps.has_sse)
- check_os_katmai_support();
-
if (!util_cpu_caps.has_sse) {
util_cpu_caps.has_sse2 = 0;
util_cpu_caps.has_sse3 = 0;
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
new file mode 100644
index 0000000000..1c90ff3106
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_state.h>
+#include <util/u_format.h>
+#include <util/u_debug_describe.h>
+#include <util/u_string.h>
+
+void
+debug_describe_reference(char* buf, const struct pipe_reference*ptr)
+{
+ strcpy(buf, "pipe_object");
+}
+
+void
+debug_describe_resource(char* buf, const struct pipe_resource *ptr)
+{
+ switch(ptr->target)
+ {
+ case PIPE_BUFFER:
+ util_sprintf(buf, "pipe_buffer<%u>", (unsigned)util_format_get_stride(ptr->format, ptr->width0));
+ break;
+ case PIPE_TEXTURE_1D:
+ util_sprintf(buf, "pipe_texture1d<%u,%s,%u>", ptr->width0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_2D:
+ util_sprintf(buf, "pipe_texture2d<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_RECT:
+ util_sprintf(buf, "pipe_texture_rect<%u,%u,%s>", ptr->width0, ptr->height0, util_format_short_name(ptr->format));
+ break;
+ case PIPE_TEXTURE_CUBE:
+ util_sprintf(buf, "pipe_texture_cube<%u,%u,%s,%u>", ptr->width0, ptr->height0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ case PIPE_TEXTURE_3D:
+ util_sprintf(buf, "pipe_texture3d<%u,%u,%u,%s,%u>", ptr->width0, ptr->height0, ptr->depth0, util_format_short_name(ptr->format), ptr->last_level);
+ break;
+ default:
+ util_sprintf(buf, "pipe_martian_resource<%u>", ptr->target);
+ break;
+ }
+}
+
+void
+debug_describe_surface(char* buf, const struct pipe_surface *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_surface<%s,%u,%u,%u>", res, ptr->face, ptr->level, ptr->zslice);
+}
+
+void
+debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->texture);
+ util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
+}
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
new file mode 100644
index 0000000000..26d1f803bf
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_DESCRIBE_H_
+#define U_DEBUG_DESCRIBE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_reference;
+struct pipe_resource;
+struct pipe_surface;
+struct pipe_sampler_view;
+
+/* a 256-byte buffer is necessary and sufficient */
+void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
+void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
+void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
+void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_DESCRIBE_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.c b/src/gallium/auxiliary/util/u_debug_refcnt.c
new file mode 100644
index 0000000000..40a26c9c69
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.c
@@ -0,0 +1,181 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+/* see http://www.mozilla.org/performance/refcnt-balancer.html for what do with the output
+ * on Linux, use tools/addr2line.sh to postprocess it before anything else
+ **/
+#include <util/u_debug.h>
+#include <util/u_debug_refcnt.h>
+#include <util/u_debug_stack.h>
+#include <util/u_debug_symbol.h>
+#include <util/u_string.h>
+#include <util/u_hash_table.h>
+#include <os/os_thread.h>
+#include <os/os_stream.h>
+
+int debug_refcnt_state;
+
+struct os_stream* stream;
+
+/* TODO: maybe move this serial machinery to a stand-alone module and expose it? */
+static pipe_mutex serials_mutex;
+static struct util_hash_table* serials_hash;
+static unsigned serials_last;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+static boolean debug_serial(void* p, unsigned* pserial)
+{
+ unsigned serial;
+ boolean found = TRUE;
+ pipe_mutex_lock(serials_mutex);
+ if(!serials_hash)
+ serials_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ serial = (unsigned)(uintptr_t)util_hash_table_get(serials_hash, p);
+ if(!serial)
+ {
+ /* time to stop logging... (you'll have a 100 GB logfile at least at this point)
+ * TODO: avoid this
+ */
+ serial = ++serials_last;
+ if(!serial)
+ {
+ debug_error("More than 2^32 objects detected, aborting.\n");
+ os_abort();
+ }
+
+ util_hash_table_set(serials_hash, p, (void*)(uintptr_t)serial);
+ found = FALSE;
+ }
+ pipe_mutex_unlock(serials_mutex);
+ *pserial = serial;
+ return found;
+}
+
+static void debug_serial_delete(void* p)
+{
+ pipe_mutex_lock(serials_mutex);
+ util_hash_table_remove(serials_hash, p);
+ pipe_mutex_unlock(serials_mutex);
+}
+
+#define STACK_LEN 64
+
+static void dump_stack(const char* symbols[STACK_LEN])
+{
+ unsigned i;
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(symbols[i])
+ os_stream_printf(stream, "%s\n", symbols[i]);
+ }
+ os_stream_write(stream, "\n", 1);
+}
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if(debug_refcnt_state < 0)
+ return;
+
+ if(!debug_refcnt_state)
+ {
+ const char* filename = debug_get_option("GALLIUM_REFCNT_LOG", NULL);
+ if(filename && filename[0])
+ stream = os_file_stream_create(filename);
+
+ if(stream)
+ debug_refcnt_state = 1;
+ else
+ debug_refcnt_state = -1;
+ }
+
+ if(debug_refcnt_state > 0)
+ {
+ struct debug_stack_frame frames[STACK_LEN];
+ const char* symbols[STACK_LEN];
+ char buf[1024];
+
+ unsigned i;
+ unsigned refcnt = p->count;
+ unsigned serial;
+ boolean existing = debug_serial((void*)p, &serial);
+
+ debug_backtrace_capture(frames, 1, STACK_LEN);
+ for(i = 0; i < STACK_LEN; ++i)
+ {
+ if(frames[i].function)
+ symbols[i] = debug_symbol_name_cached(frames[i].function);
+ else
+ symbols[i] = 0;
+ }
+
+ get_desc(buf, p);
+
+ if(!existing)
+ {
+ os_stream_printf(stream, "<%s> %p %u Create\n", buf, p, serial);
+ dump_stack(symbols);
+
+ /* this is there to provide a gradual change even if we don't see the initialization */
+ for(i = 1; i <= refcnt - change; ++i)
+ {
+ os_stream_printf(stream, "<%s> %p %u AddRef %u\n", buf, p, serial, i);
+ dump_stack(symbols);
+ }
+ }
+
+ if(change)
+ {
+ os_stream_printf(stream, "<%s> %p %u %s %u\n", buf, p, serial, change > 0 ? "AddRef" : "Release", refcnt);
+ dump_stack(symbols);
+ }
+
+ if(!refcnt)
+ {
+ debug_serial_delete((void*)p);
+ os_stream_printf(stream, "<%s> %p %u Destroy\n", buf, p, serial);
+ dump_stack(symbols);
+ }
+
+ os_stream_flush(stream);
+ }
+}
+#endif
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
new file mode 100644
index 0000000000..bea2d1c478
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -0,0 +1,63 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_DEBUG_REFCNT_H_
+#define U_DEBUG_REFCNT_H_
+
+#include <pipe/p_config.h>
+#include <pipe/p_state.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*debug_reference_descriptor)(char*, const struct pipe_reference*);
+
+#if defined(DEBUG) && (!defined(PIPE_OS_WINDOWS) || defined(PIPE_SUBSYSTEM_WINDOWS_USER))
+
+extern int debug_refcnt_state;
+
+void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+ if (debug_refcnt_state >= 0)
+ debug_reference_slowpath(p, get_desc, change);
+}
+
+#else
+
+static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+{
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* U_DEBUG_REFCNT_H_ */
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 6e250575d6..332952af88 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -33,9 +33,12 @@
*/
#include "pipe/p_compiler.h"
+#include "os/os_thread.h"
+#include "u_string.h"
#include "u_debug.h"
#include "u_debug_symbol.h"
+#include "u_hash_table.h"
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
@@ -113,8 +116,8 @@ BOOL WINAPI j_SymGetSymFromAddr(HANDLE hProcess, DWORD Address, PDWORD Displacem
}
-static INLINE boolean
-debug_symbol_print_imagehlp(const void *addr)
+static INLINE void
+debug_symbol_name_imagehlp(const void *addr, char* buf, unsigned size)
{
HANDLE hProcess;
BYTE symbolBuffer[1024];
@@ -131,25 +134,95 @@ debug_symbol_print_imagehlp(const void *addr)
if(j_SymInitialize(hProcess, NULL, TRUE))
bSymInitialized = TRUE;
}
-
+
if(!j_SymGetSymFromAddr(hProcess, (DWORD)addr, &dwDisplacement, pSymbol))
- return FALSE;
+ buf[0] = 0;
+ else
+ {
+ strncpy(buf, pSymbol->Name, size);
+ buf[size - 1] = 0;
+ }
+}
+#endif
- debug_printf("\t%s\n", pSymbol->Name);
+#ifdef __GLIBC__
+#include <execinfo.h>
- return TRUE;
-
+/* This can only provide dynamic symbols, or binary offsets into a file.
+ *
+ * To fix this, post-process the output with tools/addr2line.sh
+ */
+static INLINE void
+debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
+{
+ char** syms = backtrace_symbols((void**)&addr, 1);
+ strncpy(buf, syms[0], size);
+ buf[size - 1] = 0;
+ free(syms);
}
#endif
-
void
-debug_symbol_print(const void *addr)
+debug_symbol_name(const void *addr, char* buf, unsigned size)
{
#if defined(PIPE_SUBSYSTEM_WINDOWS_USER) && defined(PIPE_ARCH_X86)
- if(debug_symbol_print_imagehlp(addr))
+ debug_symbol_name_imagehlp(addr, buf, size);
+ if(buf[0])
return;
#endif
-
- debug_printf("\t%p\n", addr);
+
+#ifdef __GLIBC__
+ debug_symbol_name_glibc(addr, buf, size);
+ if(buf[0])
+ return;
+#endif
+
+ util_snprintf(buf, size, "%p", addr);
+ buf[size - 1] = 0;
+}
+
+void
+debug_symbol_print(const void *addr)
+{
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ debug_printf("\t%s\n", buf);
+}
+
+struct util_hash_table* symbols_hash;
+pipe_mutex symbols_mutex;
+
+static unsigned hash_ptr(void* p)
+{
+ return (unsigned)(uintptr_t)p;
+}
+
+static int compare_ptr(void* a, void* b)
+{
+ if(a == b)
+ return 0;
+ else if(a < b)
+ return -1;
+ else
+ return 1;
+}
+
+const char*
+debug_symbol_name_cached(const void *addr)
+{
+ const char* name;
+ pipe_mutex_lock(symbols_mutex);
+ if(!symbols_hash)
+ symbols_hash = util_hash_table_create(hash_ptr, compare_ptr);
+ name = util_hash_table_get(symbols_hash, (void*)addr);
+ if(!name)
+ {
+ char buf[1024];
+ debug_symbol_name(addr, buf, sizeof(buf));
+ name = strdup(buf);
+
+ util_hash_table_set(symbols_hash, (void*)addr, (void*)name);
+ }
+ pipe_mutex_unlock(symbols_mutex);
+ return name;
}
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.h b/src/gallium/auxiliary/util/u_debug_symbol.h
index 021586987b..b247706c2a 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.h
+++ b/src/gallium/auxiliary/util/u_debug_symbol.h
@@ -43,8 +43,13 @@ extern "C" {
void
-debug_symbol_print(const void *addr);
+debug_symbol_name(const void *addr, char* buf, unsigned size);
+
+const char*
+debug_symbol_name_cached(const void *addr);
+void
+debug_symbol_print(const void *addr);
#ifdef __cplusplus
}
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index 99f260bf96..fd1bbe5ffd 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -1,9 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_DIRTY_SURFACES_H_
#define U_DIRTY_SURFACES_H_
+#include "pipe/p_state.h"
+
#include "util/u_double_list.h"
#include "util/u_math.h"
+struct pipe_context;
+
typedef void (*util_dirty_surface_flush_t) (struct pipe_context *, struct pipe_surface *);
struct util_dirty_surfaces
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
index 2a91ea0f9a..f06d09ef91 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
#include "pipe/p_context.h"
+#include "pipe/p_state.h"
static INLINE void
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 9d1c1713a7..980cadf22d 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -106,6 +106,9 @@ util_dynarray_trim(struct util_dynarray *buf)
#define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + ((buf)->size -= sizeof(type)))
#define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
#define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
+#define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
+#define util_dynarray_begin(buf) ((buf)->data)
+#define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, (buf)->size))
#endif /* U_DYNARRAY_H */
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index b7fe2d3003..6a931a9581 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1255,6 +1255,7 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx,
make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
break;
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 540305c146..78473bf35a 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -33,6 +33,8 @@
#include "pipe/p_state.h"
#include "pipe/p_screen.h"
#include "util/u_debug.h"
+#include "util/u_debug_describe.h"
+#include "util/u_debug_refcnt.h"
#include "util/u_atomic.h"
#include "util/u_box.h"
#include "util/u_math.h"
@@ -67,7 +69,9 @@ pipe_is_referenced(struct pipe_reference *reference)
* \return TRUE if the object's refcount hits zero and should be destroyed.
*/
static INLINE boolean
-pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+pipe_reference_described(struct pipe_reference *ptr,
+ struct pipe_reference *reference,
+ debug_reference_descriptor get_desc)
{
boolean destroy = FALSE;
@@ -76,6 +80,7 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (reference) {
assert(pipe_is_referenced(reference));
p_atomic_inc(&reference->count);
+ debug_reference(reference, get_desc, 1);
}
if (ptr) {
@@ -83,41 +88,49 @@ pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
if (p_atomic_dec_zero(&ptr->count)) {
destroy = TRUE;
}
+ debug_reference(ptr, get_desc, -1);
}
}
return destroy;
}
+static INLINE boolean
+pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
+{
+ return pipe_reference_described(ptr, reference,
+ (debug_reference_descriptor)debug_describe_reference);
+}
static INLINE void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
struct pipe_surface *old_surf = *ptr;
- if (pipe_reference(&(*ptr)->reference, &surf->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &surf->reference,
+ (debug_reference_descriptor)debug_describe_surface))
old_surf->texture->screen->tex_surface_destroy(old_surf);
*ptr = surf;
}
-
static INLINE void
pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
struct pipe_resource *old_tex = *ptr;
- if (pipe_reference(&(*ptr)->reference, &tex->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
+ (debug_reference_descriptor)debug_describe_resource))
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
*ptr = tex;
}
-
static INLINE void
pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
{
struct pipe_sampler_view *old_view = *ptr;
- if (pipe_reference(&(*ptr)->reference, &view->reference))
+ if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+ (debug_reference_descriptor)debug_describe_sampler_view))
old_view->context->sampler_view_destroy(old_view->context, old_view);
*ptr = view;
}
diff --git a/src/gallium/auxiliary/util/u_linkage.c b/src/gallium/auxiliary/util/u_linkage.c
new file mode 100644
index 0000000000..2f6f41ba84
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.c
@@ -0,0 +1,149 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_linkage.h"
+
+/* we must only record the registers that are actually used, not just declared */
+static INLINE boolean
+util_semantic_set_test_and_set(struct util_semantic_set *set, unsigned value)
+{
+ unsigned mask = 1 << (value % (sizeof(long) * 8));
+ unsigned long *p = &set->masks[value / (sizeof(long) * 8)];
+ unsigned long v = *p & mask;
+ *p |= mask;
+ return !!v;
+}
+
+unsigned
+util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file)
+{
+ struct tgsi_shader_info info;
+ struct tgsi_parse_context parse;
+ unsigned count = 0;
+ ubyte *semantic_name;
+ ubyte *semantic_index;
+
+ tgsi_scan_shader(tokens, &info);
+
+ if(file == TGSI_FILE_INPUT)
+ {
+ semantic_name = info.input_semantic_name;
+ semantic_index = info.input_semantic_index;
+ }
+ else if(file == TGSI_FILE_OUTPUT)
+ {
+ semantic_name = info.output_semantic_name;
+ semantic_index = info.output_semantic_index;
+ }
+ else
+ {
+ assert(0);
+ semantic_name = NULL;
+ semantic_index = NULL;
+ }
+
+ tgsi_parse_init(&parse, tokens);
+
+ memset(set->masks, 0, sizeof(set->masks));
+ while(!tgsi_parse_end_of_tokens(&parse))
+ {
+ tgsi_parse_token(&parse);
+
+ if(parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION)
+ {
+ const struct tgsi_full_instruction *finst = &parse.FullToken.FullInstruction;
+ unsigned i;
+ for(i = 0; i < finst->Instruction.NumDstRegs; ++i)
+ {
+ if(finst->Dst[i].Register.File == file)
+ {
+ unsigned idx = finst->Dst[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+
+ for(i = 0; i < finst->Instruction.NumSrcRegs; ++i)
+ {
+ if(finst->Src[i].Register.File == file)
+ {
+ unsigned idx = finst->Src[i].Register.Index;
+ if(semantic_name[idx] == TGSI_SEMANTIC_GENERIC)
+ {
+ if(!util_semantic_set_test_and_set(set, semantic_index[idx]))
+ ++count;
+ }
+ }
+ }
+ }
+ }
+ tgsi_parse_free(&parse);
+
+ return count;
+}
+
+#define UTIL_SEMANTIC_SET_FOR_EACH(i, set) for(i = 0; i < 256; ++i) if(set->masks[i / (sizeof(long) * 8)] & (1 << (i % (sizeof(long) * 8))))
+
+void
+util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots)
+{
+ int first = -1;
+ int last = -1;
+ unsigned i;
+
+ memset(layout, 0xff, num_slots);
+
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ {
+ if(first < 0)
+ first = i;
+ last = i;
+ }
+
+ if(last < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i] = i;
+ }
+ else if((last - first) < efficient_slots)
+ {
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[i - first] = i;
+ }
+ else
+ {
+ unsigned idx = 0;
+ UTIL_SEMANTIC_SET_FOR_EACH(i, set)
+ layout[idx++] = i;
+ }
+}
diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
new file mode 100644
index 0000000000..4720e0ee60
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -0,0 +1,66 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_LINKAGE_H_
+#define U_LINKAGE_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+struct util_semantic_set
+{
+ unsigned long masks[256 / 8 / sizeof(unsigned long)];
+};
+
+static INLINE bool
+util_semantic_set_contains(struct util_semantic_set *set, unsigned char value)
+{
+ return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8))));
+}
+
+unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, const struct tgsi_token *tokens, enum tgsi_file_type file);
+
+/* efficient_slots is the number of slots such that hardware performance is
+ * the same for using that amount, with holes, or less slots but with less
+ * holes.
+ *
+ * num_slots is the size of the layout array and hardware limit instead.
+ *
+ * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
+ */
+void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
+
+static INLINE void
+util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
+{
+ int i;
+ memset(table, 0xff, sizeof(table));
+
+ for(i = 0; i < num_slots; ++i)
+ table[layout[i]] = first_slot_value + i;
+}
+
+#endif /* U_LINKAGE_H_ */
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index fe19466436..69a7681494 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -361,16 +361,6 @@ util_is_inf_or_nan(float x)
/**
- * Test whether x is a power of two.
- */
-static INLINE boolean
-util_is_pot(unsigned x)
-{
- return (x & (x - 1)) == 0;
-}
-
-
-/**
* Find first bit set in word. Least significant bit is 1.
* Return 0 if no bits set.
*/
@@ -566,6 +556,9 @@ util_bswap16(uint16_t n)
#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+#define MIN4( A, B, C, D ) MIN2( MIN2( A, B ), MIN2(C, D) )
+#define MAX4( A, B, C, D ) MAX2( MAX2( A, B ), MAX2(C, D) )
+
/**
* Align a value, only works pot alignemnts.
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 5f113f742b..aae8b8bdf1 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -42,12 +42,18 @@
#include "util/u_math.h"
-
+/**
+ * Helper union for packing pixel values.
+ * Will often contain values in formats which are too complex to be described
+ * in simple terms, hence might just effectively contain a number of bytes.
+ * Must be big enough to hold data for all formats (currently 256 bits).
+ */
union util_color {
ubyte ub;
ushort us;
uint ui;
float f[4];
+ double d[4];
};
/**
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 9bbcf1c8c4..56fcfac069 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -32,6 +32,7 @@
#include "util/u_format.h"
#include "util/u_rect.h"
+#include "util/u_pack_color.h"
/**
@@ -94,7 +95,7 @@ util_fill_rect(ubyte * dst,
unsigned dst_y,
unsigned width,
unsigned height,
- uint32_t value)
+ union util_color *uc)
{
unsigned i, j;
unsigned width_size;
@@ -110,40 +111,54 @@ util_fill_rect(ubyte * dst,
dst_y /= blockheight;
width = (width + blockwidth - 1)/blockwidth;
height = (height + blockheight - 1)/blockheight;
-
+
dst += dst_x * blocksize;
dst += dst_y * dst_stride;
width_size = width * blocksize;
-
+
switch (blocksize) {
case 1:
if(dst_stride == width_size)
- memset(dst, (ubyte) value, height * width_size);
+ memset(dst, uc->ub, height * width_size);
else {
- for (i = 0; i < height; i++) {
- memset(dst, (ubyte) value, width_size);
- dst += dst_stride;
- }
+ for (i = 0; i < height; i++) {
+ memset(dst, uc->ub, width_size);
+ dst += dst_stride;
+ }
}
break;
case 2:
for (i = 0; i < height; i++) {
- uint16_t *row = (uint16_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = (uint16_t) value;
- dst += dst_stride;
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->us;
+ dst += dst_stride;
}
break;
case 4:
for (i = 0; i < height; i++) {
- uint32_t *row = (uint32_t *)dst;
- for (j = 0; j < width; j++)
- *row++ = value;
- dst += dst_stride;
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = uc->ui;
+ dst += dst_stride;
+ }
+ break;
+ case 8:
+ case 12:
+ case 16:
+ case 24:
+ case 32:
+ for (i = 0; i < height; i++) {
+ ubyte *row = dst;
+ for (j = 0; j < width; j++) {
+ memcpy(row, uc, blocksize);
+ row += blocksize;
+ }
+ dst += dst_stride;
}
break;
default:
- assert(0);
- break;
+ assert(0);
+ break;
}
}
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index 40d57e662d..4cb90d3c31 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -26,17 +26,67 @@
**************************************************************************/
-/**
- * Pipe copy/fill rect helpers.
+#ifndef U_RECT_H
+#define U_RECT_H
+
+#include "pipe/p_compiler.h"
+
+struct u_rect {
+ int x0, x1;
+ int y0, y1;
+};
+
+/* Do two rectangles intersect?
*/
+static INLINE boolean
+u_rect_test_intersection(const struct u_rect *a,
+ const struct u_rect *b)
+{
+ return (!(a->x1 < b->x0 ||
+ b->x1 < a->x0 ||
+ a->y1 < b->y0 ||
+ b->y1 < a->y0));
+}
+/* Find the intersection of two rectangles known to intersect.
+ */
+static INLINE void
+u_rect_find_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ /* Caller should verify intersection exists before calling.
+ */
+ if (b->x0 < a->x0) b->x0 = a->x0;
+ if (b->x1 > a->x1) b->x1 = a->x1;
+ if (b->y0 < a->y0) b->y0 = a->y0;
+ if (b->y1 > a->y1) b->y1 = a->y1;
+}
-#ifndef U_RECT_H
-#define U_RECT_H
+static INLINE void
+u_rect_possible_intersection(const struct u_rect *a,
+ struct u_rect *b)
+{
+ if (u_rect_test_intersection(a,b)) {
+ u_rect_find_intersection(a,b);
+ }
+ else {
+ b->x0 = b->x1 = b->y0 = b->y1 = 0;
+ }
+}
#include "pipe/p_format.h"
+#include "util/u_pack_color.h"
+
+
+
+/**********************************************************************
+ * Pipe copy/fill rect helpers.
+ */
+/* These really should move to a different file:
+ */
+#include "pipe/p_format.h"
extern void
util_copy_rect(ubyte * dst, enum pipe_format format,
@@ -47,7 +97,7 @@ util_copy_rect(ubyte * dst, enum pipe_format format,
extern void
util_fill_rect(ubyte * dst, enum pipe_format format,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
- unsigned width, unsigned height, uint32_t value);
+ unsigned width, unsigned height, union util_color *uc);
#endif /* U_RECT_H */
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index 5b682f496c..58ef68377f 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -37,6 +37,7 @@
#include "pipe/p_context.h"
#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
#include "util/u_simple_shaders.h"
#include "util/u_debug.h"
#include "tgsi/tgsi_ureg.h"
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 206e1ec311..7f80fc1270 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -1,5 +1,12 @@
/* Originally written by Ben Skeggs for the nv50 driver*/
-#include <pipe/p_defines.h>
+
+#ifndef U_SPLIT_PRIM_H
+#define U_SPLIT_PRIM_H
+
+#include "pipe/p_defines.h"
+#include "pipe/p_compiler.h"
+
+#include "util/u_debug.h"
struct util_split_prim {
void *priv;
@@ -48,7 +55,7 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
}
}
- if (s->p_start + s->close_first + max_verts >= s->p_end) {
+ if ((s->p_end - s->p_start) + s->close_first <= max_verts) {
s->emit(s->priv, s->p_start, s->p_end - s->p_start);
if (s->close_first)
s->emit(s->priv, s->start, 1);
@@ -103,3 +110,5 @@ util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
s->p_start += (max_verts - repeat);
return FALSE;
}
+
+#endif /* U_SPLIT_PRIM_H */
diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c
index 607c31f5ee..c5d68f8df8 100644
--- a/src/gallium/auxiliary/util/u_staging.c
+++ b/src/gallium/auxiliary/util/u_staging.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "util/u_staging.h"
#include "pipe/p_context.h"
#include "util/u_memory.h"
@@ -8,7 +34,7 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
{
memset(template, 0, sizeof(struct pipe_resource));
if(pt->target != PIPE_BUFFER && depth <= 1)
- template->target = PIPE_TEXTURE_2D;
+ template->target = PIPE_TEXTURE_RECT;
else
template->target = pt->target;
template->format = pt->format;
@@ -23,20 +49,16 @@ util_staging_resource_template(struct pipe_resource *pt, unsigned width, unsigne
}
struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box,
- bool direct)
+ bool direct, struct util_staging_transfer *tx)
{
struct pipe_screen *pscreen = pipe->screen;
- struct util_staging_transfer *tx;
- struct pipe_resource staging_resource_template;
- tx = CALLOC_STRUCT(util_staging_transfer);
- if (!tx)
- return NULL;
+ struct pipe_resource staging_resource_template;
pipe_resource_reference(&tx->base.resource, pt);
tx->base.sr = sr;
diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h
index 602faa2971..1aab78cc88 100644
--- a/src/gallium/auxiliary/util/u_staging.h
+++ b/src/gallium/auxiliary/util/u_staging.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
/* Direct3D 10/11 has no concept of transfers. Applications instead
* create resources with a STAGING or DYNAMIC usage, copy between them
* and the real resource and use Map to map the STAGING/DYNAMIC resource.
@@ -21,15 +47,15 @@ struct util_staging_transfer {
};
/* user must be stride, slice_stride and offset */
-/* pt->usage == PIPE_USAGE_DYNAMIC should be a good value to pass for direct */
-/* staging resource is currently created with PIPE_USAGE_DYNAMIC */
+/* pt->usage == PIPE_USAGE_DYNAMIC || pt->usage == PIPE_USAGE_STAGING should be a good value to pass for direct */
+/* staging resource is currently created with PIPE_USAGE_STAGING */
struct util_staging_transfer *
-util_staging_transfer_new(struct pipe_context *pipe,
+util_staging_transfer_init(struct pipe_context *pipe,
struct pipe_resource *pt,
struct pipe_subresource sr,
unsigned usage,
const struct pipe_box *box,
- bool direct);
+ bool direct, struct util_staging_transfer *tx);
void
util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx);
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index cab7691c70..af99163b2e 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -216,7 +216,7 @@ util_clear_render_target(struct pipe_context *pipe,
assert(dst->texture);
if (!dst->texture)
return;
- util_pack_color(rgba, dst->texture->format, &uc);
+
dst_trans = pipe_get_transfer(pipe,
dst->texture,
dst->face,
@@ -232,46 +232,10 @@ util_clear_render_target(struct pipe_context *pipe,
if (dst_map) {
assert(dst_trans->stride > 0);
- switch (util_format_get_blocksize(dst->texture->format)) {
- case 1:
- case 2:
- case 4:
- util_pack_color(rgba, dst->texture->format, &uc);
- util_fill_rect(dst_map, dst->texture->format,
- dst_trans->stride,
- 0, 0, width, height, uc.ui);
- break;
- case 8:
- {
- /* expand the 4-byte clear value to an 8-byte value */
- /* should probably not convert back from ubyte but not
- sure what this code really achieved since it doesn't even
- check for format type... */
- ushort *row = (ushort *) dst_map;
- ushort val0 = UBYTE_TO_USHORT((uc.ui >> 0) & 0xff);
- ushort val1 = UBYTE_TO_USHORT((uc.ui >> 8) & 0xff);
- ushort val2 = UBYTE_TO_USHORT((uc.ui >> 16) & 0xff);
- ushort val3 = UBYTE_TO_USHORT((uc.ui >> 24) & 0xff);
- unsigned i, j;
- val0 = (val0 << 8) | val0;
- val1 = (val1 << 8) | val1;
- val2 = (val2 << 8) | val2;
- val3 = (val3 << 8) | val3;
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- row[j*4+0] = val0;
- row[j*4+1] = val1;
- row[j*4+2] = val2;
- row[j*4+3] = val3;
- }
- row += dst_trans->stride/2;
- }
- }
- break;
- default:
- assert(0);
- break;
- }
+ util_pack_color(rgba, dst->texture->format, &uc);
+ util_fill_rect(dst_map, dst->texture->format,
+ dst_trans->stride,
+ 0, 0, width, height, &uc);
}
pipe->transfer_unmap(pipe, dst_trans);
diff --git a/src/gallium/auxiliary/util/u_surfaces.c b/src/gallium/auxiliary/util/u_surfaces.c
index 7733ad24d0..404e121995 100644
--- a/src/gallium/auxiliary/util/u_surfaces.c
+++ b/src/gallium/auxiliary/util/u_surfaces.c
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#include "u_surfaces.h"
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index af978c7057..17d8a5d3a5 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -1,3 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Luca Barbieri
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
#ifndef U_SURFACES_H_
#define U_SURFACES_H_
@@ -22,7 +48,7 @@ struct pipe_surface *util_surfaces_do_get(struct util_surfaces *us, unsigned sur
static INLINE struct pipe_surface *
util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size, struct pipe_screen *pscreen, struct pipe_resource *pt, unsigned face, unsigned level, unsigned zslice, unsigned flags)
{
- if(likely(pt->target == PIPE_TEXTURE_2D && us->u.array))
+ if(likely((pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT) && us->u.array))
{
struct pipe_surface *ps = us->u.array[level];
if(ps)
@@ -52,7 +78,7 @@ void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
static INLINE void
util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
- if(likely(ps->texture->target == PIPE_TEXTURE_2D))
+ if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
{
us->u.array[ps->level] = 0;
return;
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index 986eee0743..558351d0ce 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -29,7 +29,10 @@
#define P_TILE_H
#include "pipe/p_compiler.h"
+#include "pipe/p_format.h"
+#include "pipe/p_state.h"
+struct pipe_context;
struct pipe_transfer;
/**
diff --git a/src/gallium/auxiliary/util/u_transfer.h b/src/gallium/auxiliary/util/u_transfer.h
index eb07945d15..e3a38730f2 100644
--- a/src/gallium/auxiliary/util/u_transfer.h
+++ b/src/gallium/auxiliary/util/u_transfer.h
@@ -8,6 +8,7 @@
#include "pipe/p_state.h"
struct pipe_context;
+struct winsys_handle;
boolean u_default_resource_get_handle(struct pipe_screen *screen,
struct pipe_resource *resource,
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index a124924fc8..de016df02e 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,11 +32,8 @@
#ifndef U_UPLOAD_MGR_H
#define U_UPLOAD_MGR_H
-#include "pipe/p_defines.h"
-
-struct pipe_screen;
+struct pipe_context;
struct pipe_resource;
-struct u_upload_mgr;
struct u_upload_mgr *u_upload_create( struct pipe_context *pipe,