summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore5
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile72
-rw-r--r--src/gallium/drivers/llvmpipe/README166
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript92
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.c64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_alpha.h54
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend.h98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c363
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c109
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c326
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c726
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h67
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c388
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h94
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.c58
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.h43
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c163
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h132
-rw-r--r--src/gallium/drivers/llvmpipe/lp_debug.h73
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c138
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.c152
-rw-r--r--src/gallium/drivers/llvmpipe/lp_fence.h64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c146
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.h50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c207
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.h180
-rw-r--r--src/gallium/drivers/llvmpipe/lp_limits.h75
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.c95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_perf.h82
-rw-r--r--src/gallium/drivers/llvmpipe/lp_public.h10
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.c146
-rw-r--r--src/gallium/drivers/llvmpipe/lp_query.h59
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c1032
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.h252
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_priv.h226
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast_tri.c280
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c465
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h339
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene_queue.c124
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene_queue.h51
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c367
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.h79
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c953
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.h156
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h162
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_line.c47
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c46
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c723
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_vbuf.c550
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h135
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_blend.c172
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_clip.c94
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c198
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c1322
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h107
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_gs.c112
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_rasterizer.c97
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c231
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_so.c138
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c83
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vertex.c101
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vs.c118
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.c158
-rw-r--r--src/gallium/drivers/llvmpipe/lp_surface.h42
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h144
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c905
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c453
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c279
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c408
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_printf.c175
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_sincos.c207
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.c219
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample.h48
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c1288
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h237
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.c328
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_image.h57
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h97
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py403
-rw-r--r--src/gallium/drivers/llvmpipe/sse_mathfun.h773
80 files changed, 19478 insertions, 0 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
new file mode 100644
index 0000000000..a1b6f56e0d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -0,0 +1,5 @@
+lp_tile_soa.c
+lp_test_blend
+lp_test_conv
+lp_test_format
+lp_test_printf
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
new file mode 100644
index 0000000000..ee28179c30
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -0,0 +1,72 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = llvmpipe
+
+DEFINES += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+
+C_SOURCES = \
+ lp_bld_alpha.c \
+ lp_bld_blend_aos.c \
+ lp_bld_blend_logicop.c \
+ lp_bld_blend_soa.c \
+ lp_bld_depth.c \
+ lp_bld_interp.c \
+ lp_clear.c \
+ lp_context.c \
+ lp_draw_arrays.c \
+ lp_fence.c \
+ lp_flush.c \
+ lp_jit.c \
+ lp_perf.c \
+ lp_query.c \
+ lp_rast.c \
+ lp_rast_tri.c \
+ lp_scene.c \
+ lp_scene_queue.c \
+ lp_screen.c \
+ lp_setup.c \
+ lp_setup_line.c \
+ lp_setup_point.c \
+ lp_setup_tri.c \
+ lp_setup_vbuf.c \
+ lp_state_blend.c \
+ lp_state_clip.c \
+ lp_state_derived.c \
+ lp_state_fs.c \
+ lp_state_gs.c \
+ lp_state_rasterizer.c \
+ lp_state_sampler.c \
+ lp_state_so.c \
+ lp_state_surface.c \
+ lp_state_vertex.c \
+ lp_state_vs.c \
+ lp_surface.c \
+ lp_tex_sample.c \
+ lp_texture.c \
+ lp_tile_image.c \
+ lp_tile_soa.c
+
+CPP_SOURCES = \
+
+PROGS := lp_test_format \
+ lp_test_blend \
+ lp_test_conv \
+ lp_test_printf \
+ lp_test_sincos
+
+lp_test_sincos.o : sse_mathfun.h
+
+PROGS_DEPS := ../../auxiliary/libgallium.a
+
+include ../../Makefile.template
+
+lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_pack.py ../../auxiliary/util/u_format.csv
+ python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@
+
+LDFLAGS += $(LLVM_LDFLAGS)
+LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS)
+LD=g++
+
+$(PROGS): lp_test_main.o libllvmpipe.a
+
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
new file mode 100644
index 0000000000..8b5539d2c5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/README
@@ -0,0 +1,166 @@
+LLVMPIPE -- a fork of softpipe that employs LLVM for code generation.
+
+
+Status
+======
+
+Done so far is:
+
+ - the whole fragment pipeline is code generated in a single function
+
+ - input interpolation
+
+ - depth testing
+
+ - texture sampling
+ - 1D/2D/3D/cube maps supported
+ - all texture wrap modes supported
+ - all texture filtering modes supported
+ - perhaps not all texture formats yet supported
+
+ - fragment shader TGSI translation
+ - same level of support as the TGSI SSE2 exec machine, with the exception
+ we don't fallback to TGSI interpretation when an unsupported opcode is
+ found, but just ignore it
+ - done in SoA layout
+ - input interpolation also code generated
+
+ - alpha testing
+
+ - blend (including logic ops)
+ - both in SoA and AoS layouts, but only the former used for now
+
+ - code is generic
+ - intermediates can be vectors of floats, ubytes, fixed point, etc, and of
+ any width and length
+ - not all operations are implemented for these types yet though
+
+Most mesa/progs/demos/* work.
+
+To do (probably by this order):
+
+ - code generate stipple and stencil testing
+
+ - translate TGSI control flow instructions, and all other remaining opcodes
+
+ - integrate with the draw module for VS code generation
+
+ - code generate the triangle setup and rasterization
+
+
+Requirements
+============
+
+ - A x86 or amd64 processor. 64bit mode is preferred.
+
+ Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will
+ yield the most efficient code. The less features the CPU has the more
+ likely is that you ran into underperforming, buggy, or incomplete code.
+
+ See /proc/cpuinfo to know what your CPU supports.
+
+ - LLVM 2.6 (or later)
+
+ For Linux, on a recent Debian based distribution do:
+
+ aptitude install llvm-dev
+
+ For Windows download pre-built MSVC 9.0 or MinGW binaries from
+ http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment
+ variable to the extracted path.
+
+ For MSVC there are two set of binaries: llvm-x.x-msvc32mt.7z and
+ llvm-x.x-msvc32mtd.7z .
+
+ You have to set the LLVM=/path/to/llvm-x.x-msvc32mtd env var when passing
+ debug=yes to scons, and LLVM=/path/to/llvm-x.x-msvc32mt when building with
+ debug=no. This is necessary as LLVM builds as static library so the chosen
+ MS CRT must match.
+
+ The version of LLVM from SVN ("2.7svn") from mid-March 2010 is pretty
+ stable and has some features not in version 2.6.
+
+ - scons (optional)
+
+ - udis86, http://udis86.sourceforge.net/ (optional). My personal repository
+ supports more opcodes which haven't been merged upstream yet:
+
+ git clone git://anongit.freedesktop.org/~jrfonseca/udis86
+ cd udis86
+ ./autogen.sh
+ ./configure --with-pic
+ make
+ sudo make install
+
+
+Building
+========
+
+To build everything on Linux invoke scons as:
+
+ scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=xlib dri=false
+
+Alternatively, you can build it with GNU make, if you prefer, by invoking it as
+
+ make linux-llvm
+
+but the rest of these instructions assume that scons is used.
+
+For windows is everything the except except the winsys:
+
+ scons debug=yes statetrackers=mesa drivers=llvmpipe winsys=gdi dri=false
+
+Using
+=====
+
+On Linux, building will create a drop-in alternative for libGL.so. To use it
+set the environment variables:
+
+ export LD_LIBRARY_PATH=$PWD/build/linux-x86_64-debug/lib:$LD_LIBRARY_PATH
+
+or
+
+ export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/lib:$LD_LIBRARY_PATH
+
+For performance evaluation pass debug=no to scons, and use the corresponding
+lib directory without the "-debug" suffix.
+
+On Windows, building will create a drop-in alternative for opengl32.dll. To use
+it put it in the same directory as the application. It can also be used by
+replacing the native ICD driver, but it's quite an advanced usage, so if you
+need to ask, don't even try it.
+
+
+Unit testing
+============
+
+Building will also create several unit tests in
+build/linux-???-debug/gallium/drivers/llvmpipe:
+
+ - lp_test_blend: blending
+ - lp_test_conv: SIMD vector conversion
+ - lp_test_format: pixel unpacking/packing
+
+Some of this tests can output results and benchmarks to a tab-separated-file
+for posterior analysis, e.g.:
+
+ build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
+
+
+Development Notes
+=================
+
+- When looking to this code by the first time start in lp_state_fs.c, and
+ then skim through the lp_bld_* functions called in there, and the comments
+ at the top of the lp_bld_*.c functions.
+
+- The driver-independent parts of the LLVM / Gallium code are found in
+ src/gallium/auxiliary/gallivm/. The filenames and function prefixes
+ need to be renamed from "lp_bld_" to something else though.
+
+- We use LLVM-C bindings for now. They are not documented, but follow the C++
+ interfaces very closely, and appear to be complete enough for code
+ generation. See
+ http://npcontemplation.blogspot.com/2008/06/secret-of-llvm-c-bindings.html
+ for a stand-alone example.
+ See the llvm-c/Core.h file for reference.
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
new file mode 100644
index 0000000000..a1ef71da89
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -0,0 +1,92 @@
+Import('*')
+
+if not env['llvm']:
+ print 'warning: LLVM disabled: not building llvmpipe'
+ Return()
+
+env = env.Clone()
+
+env.Tool('udis86')
+
+env.Append(CPPPATH = ['.'])
+
+env.CodeGenerate(
+ target = 'lp_tile_soa.c',
+ script = 'lp_tile_soa.py',
+ source = ['#src/gallium/auxiliary/util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+# XXX: Our dependency scanner only finds depended modules in relative dirs.
+env.Depends('lp_tile_soa.c', [
+ '#src/gallium/auxiliary/util/u_format_parse.py',
+ '#src/gallium/auxiliary/util/u_format_pack.py',
+])
+
+llvmpipe = env.ConvenienceLibrary(
+ target = 'llvmpipe',
+ source = [
+ 'lp_bld_alpha.c',
+ 'lp_bld_blend_aos.c',
+ 'lp_bld_blend_logicop.c',
+ 'lp_bld_blend_soa.c',
+ 'lp_bld_depth.c',
+ 'lp_bld_interp.c',
+ 'lp_clear.c',
+ 'lp_context.c',
+ 'lp_draw_arrays.c',
+ 'lp_fence.c',
+ 'lp_flush.c',
+ 'lp_jit.c',
+ 'lp_perf.c',
+ 'lp_query.c',
+ 'lp_rast.c',
+ 'lp_rast_tri.c',
+ 'lp_scene.c',
+ 'lp_scene_queue.c',
+ 'lp_screen.c',
+ 'lp_setup.c',
+ 'lp_setup_line.c',
+ 'lp_setup_point.c',
+ 'lp_setup_tri.c',
+ 'lp_setup_vbuf.c',
+ 'lp_state_blend.c',
+ 'lp_state_clip.c',
+ 'lp_state_derived.c',
+ 'lp_state_fs.c',
+ 'lp_state_gs.c',
+ 'lp_state_rasterizer.c',
+ 'lp_state_sampler.c',
+ 'lp_state_so.c',
+ 'lp_state_surface.c',
+ 'lp_state_vertex.c',
+ 'lp_state_vs.c',
+ 'lp_surface.c',
+ 'lp_tex_sample.c',
+ 'lp_texture.c',
+ 'lp_tile_image.c',
+ 'lp_tile_soa.c',
+ ])
+
+
+if env['platform'] != 'embedded':
+ env = env.Clone()
+
+ env.Prepend(LIBS = [llvmpipe] + gallium)
+
+ tests = [
+ 'format',
+ 'blend',
+ 'conv',
+ 'printf',
+ 'sincos',
+ ]
+
+ for test in tests:
+ target = env.Program(
+ target = 'lp_test_' + test,
+ source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+ )
+ env.InstallProgram(target)
+
+Export('llvmpipe')
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.c b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
new file mode 100644
index 0000000000..8514030cde
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.c
@@ -0,0 +1,64 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_alpha.h"
+
+
+void
+lp_build_alpha_test(LLVMBuilderRef builder,
+ const struct pipe_alpha_state *state,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ LLVMValueRef ref)
+{
+ struct lp_build_context bld;
+
+ lp_build_context_init(&bld, builder, type);
+
+ if(state->enabled) {
+ LLVMValueRef test = lp_build_cmp(&bld, state->func, alpha, ref);
+
+ lp_build_name(test, "alpha_mask");
+
+ lp_build_mask_update(mask, test);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_alpha.h b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
new file mode 100644
index 0000000000..0f99fec65e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_alpha.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Alpha testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_ALPHA_H
+#define LP_BLD_ALPHA_H
+
+
+#include "gallivm/lp_bld.h"
+
+struct pipe_alpha_state;
+struct lp_type;
+struct lp_build_mask_context;
+
+
+void
+lp_build_alpha_test(LLVMBuilderRef builder,
+ const struct pipe_alpha_state *state,
+ struct lp_type type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef alpha,
+ LLVMValueRef ref);
+
+
+#endif /* !LP_BLD_ALPHA_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
new file mode 100644
index 0000000000..5cecec3d7f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -0,0 +1,98 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_BLD_BLEND_H
+#define LP_BLD_BLEND_H
+
+
+#include "gallivm/lp_bld.h"
+
+#include "pipe/p_format.h"
+
+
+struct pipe_blend_state;
+struct lp_type;
+struct lp_build_context;
+
+
+/**
+ * Whether the blending function is commutative or not.
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func);
+
+
+/**
+ * Whether the blending functions are the reverse of each other.
+ */
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func);
+
+
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2);
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle);
+
+
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef const_[4],
+ LLVMValueRef res[4]);
+
+
+/**
+ * Apply a logic op.
+ *
+ * src/dst parameters are packed values. It should work regardless the inputs
+ * are scalars, or a vector.
+ */
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+ unsigned logicop_func,
+ LLVMValueRef src,
+ LLVMValueRef dst);
+
+
+#endif /* !LP_BLD_BLEND_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
new file mode 100644
index 0000000000..70d08e71f6
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -0,0 +1,363 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- AoS layout.
+ *
+ * AoS blending is in general much slower than SoA, but there are some cases
+ * where it might be faster. In particular, if a pixel is rendered only once
+ * then the overhead of tiling and untiling will dominate over the speedup that
+ * SoA gives. So we might want to detect such cases and fallback to AoS in the
+ * future, but for now this function is here for historical/benchmarking
+ * purposes.
+ *
+ * Run lp_blend_test after any change to this file.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_aos_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef const_;
+
+ LLVMValueRef inv_src;
+ LLVMValueRef inv_dst;
+ LLVMValueRef inv_const;
+ LLVMValueRef saturate;
+
+ LLVMValueRef rgb_src_factor;
+ LLVMValueRef alpha_src_factor;
+ LLVMValueRef rgb_dst_factor;
+ LLVMValueRef alpha_dst_factor;
+};
+
+
+static LLVMValueRef
+lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
+ unsigned factor,
+ boolean alpha)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return bld->src;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(alpha)
+ return bld->base.one;
+ else {
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ if(!bld->saturate)
+ bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
+ return bld->saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return bld->const_;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src)
+ bld->inv_src = lp_build_comp(&bld->base, bld->src);
+ return bld->inv_src;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst)
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ return bld->inv_dst;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_const)
+ bld->inv_const = lp_build_comp(&bld->base, bld->const_);
+ return bld->inv_const;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+enum lp_build_blend_swizzle {
+ LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
+ LP_BUILD_BLEND_SWIZZLE_AAAA = 1
+};
+
+
+/**
+ * How should we shuffle the base factor.
+ */
+static enum lp_build_blend_swizzle
+lp_build_blend_factor_swizzle(unsigned factor)
+{
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ case PIPE_BLENDFACTOR_ZERO:
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ return LP_BUILD_BLEND_SWIZZLE_AAAA;
+ default:
+ assert(0);
+ return LP_BUILD_BLEND_SWIZZLE_RGBA;
+ }
+}
+
+
+static LLVMValueRef
+lp_build_blend_swizzle(struct lp_build_blend_aos_context *bld,
+ LLVMValueRef rgb,
+ LLVMValueRef alpha,
+ enum lp_build_blend_swizzle rgb_swizzle,
+ unsigned alpha_swizzle)
+{
+ if(rgb == alpha) {
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
+ return rgb;
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
+ return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
+ }
+ else {
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
+ boolean cond[4] = {0, 0, 0, 0};
+ cond[alpha_swizzle] = 1;
+ return lp_build_select_aos(&bld->base, alpha, rgb, cond);
+ }
+ if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+ unsigned char swizzle[4];
+ swizzle[0] = alpha_swizzle;
+ swizzle[1] = alpha_swizzle;
+ swizzle[2] = alpha_swizzle;
+ swizzle[3] = alpha_swizzle;
+ swizzle[alpha_swizzle] += 4;
+ return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
+ }
+ }
+ assert(0);
+ return bld->base.undef;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendFuncSeparate.xml
+ */
+static LLVMValueRef
+lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
+ LLVMValueRef factor1,
+ unsigned rgb_factor,
+ unsigned alpha_factor,
+ unsigned alpha_swizzle)
+{
+ LLVMValueRef rgb_factor_;
+ LLVMValueRef alpha_factor_;
+ LLVMValueRef factor2;
+ enum lp_build_blend_swizzle rgb_swizzle;
+
+ rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
+ alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
+
+ rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
+
+ factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
+
+ return lp_build_mul(&bld->base, factor1, factor2);
+}
+
+
+/**
+ * Is (a OP b) == (b OP a)?
+ */
+boolean
+lp_build_blend_func_commutative(unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ case PIPE_BLEND_MIN:
+ case PIPE_BLEND_MAX:
+ return TRUE;
+ case PIPE_BLEND_SUBTRACT:
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return FALSE;
+ default:
+ assert(0);
+ return TRUE;
+ }
+}
+
+
+boolean
+lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
+{
+ if(rgb_func == alpha_func)
+ return FALSE;
+ if(rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT)
+ return TRUE;
+ if(rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT)
+ return TRUE;
+ return FALSE;
+}
+
+
+/**
+ * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
+ */
+LLVMValueRef
+lp_build_blend_func(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef term1,
+ LLVMValueRef term2)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return lp_build_add(bld, term1, term2);
+ case PIPE_BLEND_SUBTRACT:
+ return lp_build_sub(bld, term1, term2);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return lp_build_sub(bld, term2, term1);
+ case PIPE_BLEND_MIN:
+ return lp_build_min(bld, term1, term2);
+ case PIPE_BLEND_MAX:
+ return lp_build_max(bld, term1, term2);
+ default:
+ assert(0);
+ return bld->zero;
+ }
+}
+
+
+LLVMValueRef
+lp_build_blend_aos(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src,
+ LLVMValueRef dst,
+ LLVMValueRef const_,
+ unsigned alpha_swizzle)
+{
+ struct lp_build_blend_aos_context bld;
+ LLVMValueRef src_term;
+ LLVMValueRef dst_term;
+
+ /* FIXME: color masking not implemented yet */
+ assert(blend->rt[rt].colormask == 0xf);
+
+ if(!blend->rt[rt].blend_enable)
+ return src;
+
+ /* It makes no sense to blend unless values are normalized */
+ assert(type.norm);
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ bld.src = src;
+ bld.dst = dst;
+ bld.const_ = const_;
+
+ /* TODO: There are still a few optimization opportunities here. For certain
+ * combinations it is possible to reorder the operations and therefore saving
+ * some instructions. */
+
+ src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor,
+ blend->rt[rt].alpha_src_factor, alpha_swizzle);
+ dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor,
+ blend->rt[rt].alpha_dst_factor, alpha_swizzle);
+
+ lp_build_name(src_term, "src_term");
+ lp_build_name(dst_term, "dst_term");
+
+ if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) {
+ return lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term);
+ }
+ else {
+ /* Seperate RGB / A functions */
+
+ LLVMValueRef rgb;
+ LLVMValueRef alpha;
+
+ rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term);
+ alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term);
+
+ return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
new file mode 100644
index 0000000000..1eac0a5c89
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_logicop.c
@@ -0,0 +1,109 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- logic ops.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+
+#include "lp_bld_blend.h"
+
+
+LLVMValueRef
+lp_build_logicop(LLVMBuilderRef builder,
+ unsigned logicop_func,
+ LLVMValueRef src,
+ LLVMValueRef dst)
+{
+ LLVMTypeRef type;
+ LLVMValueRef res;
+
+ type = LLVMTypeOf(src);
+
+ switch (logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ res = LLVMConstNull(type);
+ break;
+ case PIPE_LOGICOP_NOR:
+ res = LLVMBuildNot(builder, LLVMBuildOr(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_AND_INVERTED:
+ res = LLVMBuildAnd(builder, LLVMBuildNot(builder, src, ""), dst, "");
+ break;
+ case PIPE_LOGICOP_COPY_INVERTED:
+ res = LLVMBuildNot(builder, src, "");
+ break;
+ case PIPE_LOGICOP_AND_REVERSE:
+ res = LLVMBuildAnd(builder, src, LLVMBuildNot(builder, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_INVERT:
+ res = LLVMBuildNot(builder, dst, "");
+ break;
+ case PIPE_LOGICOP_XOR:
+ res = LLVMBuildXor(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_NAND:
+ res = LLVMBuildNot(builder, LLVMBuildAnd(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_AND:
+ res = LLVMBuildAnd(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_EQUIV:
+ res = LLVMBuildNot(builder, LLVMBuildXor(builder, src, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_NOOP:
+ res = dst;
+ break;
+ case PIPE_LOGICOP_OR_INVERTED:
+ res = LLVMBuildOr(builder, LLVMBuildNot(builder, src, ""), dst, "");
+ break;
+ case PIPE_LOGICOP_COPY:
+ res = src;
+ break;
+ case PIPE_LOGICOP_OR_REVERSE:
+ res = LLVMBuildOr(builder, src, LLVMBuildNot(builder, dst, ""), "");
+ break;
+ case PIPE_LOGICOP_OR:
+ res = LLVMBuildOr(builder, src, dst, "");
+ break;
+ case PIPE_LOGICOP_SET:
+ res = LLVMConstAllOnes(type);
+ break;
+ default:
+ assert(0);
+ res = src;
+ }
+
+ return res;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
new file mode 100644
index 0000000000..b9c7a6ceed
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -0,0 +1,326 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Blend LLVM IR generation -- SoA layout.
+ *
+ * Blending in SoA is much faster than AoS, especially when separate rgb/alpha
+ * factors/functions are used, since no channel masking/shuffling is necessary
+ * and we can achieve the full throughput of the SIMD operations. Furthermore
+ * the fragment shader output is also in SoA, so it fits nicely with the rest
+ * of the fragment pipeline.
+ *
+ * The drawback is that to be displayed the color buffer needs to be in AoS
+ * layout, so we need to tile/untile the color buffer before/after rendering.
+ * A color buffer like
+ *
+ * R11 G11 B11 A11 R12 G12 B12 A12 R13 G13 B13 A13 R14 G14 B14 A14 ...
+ * R21 G21 B21 A21 R22 G22 B22 A22 R23 G23 B23 A23 R24 G24 B24 A24 ...
+ *
+ * R31 G31 B31 A31 R32 G32 B32 A32 R33 G33 B33 A33 R34 G34 B34 A34 ...
+ * R41 G41 B41 A41 R42 G42 B42 A42 R43 G43 B43 A43 R44 G44 B44 A44 ...
+ *
+ * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
+ *
+ * will actually be stored in memory as
+ *
+ * R11 R12 R21 R22 R13 R14 R23 R24 ... G11 G12 G21 G22 G13 G14 G23 G24 ... B11 B12 B21 B22 B13 B14 B23 B24 ... A11 A12 A21 A22 A13 A14 A23 A24 ...
+ * R31 R32 R41 R42 R33 R34 R43 R44 ... G31 G32 G41 G42 G33 G34 G43 G44 ... B31 B32 B41 B42 B33 B34 B43 B44 ... A31 A32 A41 A42 A33 A34 A43 A44 ...
+ * ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
+ *
+ * NOTE: Run lp_blend_test after any change to this file.
+ *
+ * You can also run lp_blend_test to obtain AoS vs SoA benchmarks. Invoking it
+ * as:
+ *
+ * lp_blend_test -o blend.tsv
+ *
+ * will generate a tab-seperated-file with the test results and performance
+ * measurements.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "lp_bld_blend.h"
+
+
+/**
+ * We may use the same values several times, so we keep them here to avoid
+ * recomputing them. Also reusing the values allows us to do simplifications
+ * that LLVM optimization passes wouldn't normally be able to do.
+ */
+struct lp_build_blend_soa_context
+{
+ struct lp_build_context base;
+
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+
+ LLVMValueRef inv_src[4];
+ LLVMValueRef inv_dst[4];
+ LLVMValueRef inv_con[4];
+
+ LLVMValueRef src_alpha_saturate;
+
+ /**
+ * We store all factors in a table in order to eliminate redundant
+ * multiplications later.
+ * Indexes are: factor[src,dst][color,term][r,g,b,a]
+ */
+ LLVMValueRef factor[2][2][4];
+
+ /**
+ * Table with all terms.
+ * Indexes are: term[src,dst][r,g,b,a]
+ */
+ LLVMValueRef term[2][4];
+};
+
+
+/**
+ * Build a single SOA blend factor for a color channel.
+ * \param i the color channel in [0,3]
+ */
+static LLVMValueRef
+lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
+ unsigned factor, unsigned i)
+{
+ /*
+ * Compute src/first term RGB
+ */
+ switch (factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return bld->base.one;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return bld->src[i];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return bld->src[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return bld->dst[i];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return bld->dst[3];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if(i == 3)
+ return bld->base.one;
+ else {
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ if(!bld->src_alpha_saturate)
+ bld->src_alpha_saturate = lp_build_min(&bld->base, bld->src[3], bld->inv_dst[3]);
+ return bld->src_alpha_saturate;
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return bld->con[i];
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return bld->con[3];
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_ZERO:
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ if(!bld->inv_src[i])
+ bld->inv_src[i] = lp_build_comp(&bld->base, bld->src[i]);
+ return bld->inv_src[i];
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ if(!bld->inv_src[3])
+ bld->inv_src[3] = lp_build_comp(&bld->base, bld->src[3]);
+ return bld->inv_src[3];
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ if(!bld->inv_dst[i])
+ bld->inv_dst[i] = lp_build_comp(&bld->base, bld->dst[i]);
+ return bld->inv_dst[i];
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ if(!bld->inv_dst[3])
+ bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
+ return bld->inv_dst[3];
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ if(!bld->inv_con[i])
+ bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
+ return bld->inv_con[i];
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ if(!bld->inv_con[3])
+ bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
+ return bld->inv_con[3];
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* TODO */
+ assert(0);
+ return bld->base.zero;
+ default:
+ assert(0);
+ return bld->base.zero;
+ }
+}
+
+
+/**
+ * Generate blend code in SOA mode.
+ * \param rt render target index (to index the blend / colormask state)
+ * \param src src/fragment color
+ * \param dst dst/framebuffer color
+ * \param con constant blend color
+ * \param res the result/output
+ */
+void
+lp_build_blend_soa(LLVMBuilderRef builder,
+ const struct pipe_blend_state *blend,
+ struct lp_type type,
+ unsigned rt,
+ LLVMValueRef src[4],
+ LLVMValueRef dst[4],
+ LLVMValueRef con[4],
+ LLVMValueRef res[4])
+{
+ struct lp_build_blend_soa_context bld;
+ unsigned i, j, k;
+
+ assert(rt < PIPE_MAX_COLOR_BUFS);
+
+ /* Setup build context */
+ memset(&bld, 0, sizeof bld);
+ lp_build_context_init(&bld.base, builder, type);
+ for (i = 0; i < 4; ++i) {
+ bld.src[i] = src[i];
+ bld.dst[i] = dst[i];
+ bld.con[i] = con[i];
+ }
+
+ for (i = 0; i < 4; ++i) {
+ /* only compute blending for the color channels enabled for writing */
+ if (blend->rt[rt].colormask & (1 << i)) {
+ if (blend->logicop_enable) {
+ if(!type.floating) {
+ res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
+ }
+ else
+ res[i] = dst[i];
+ }
+ else if (blend->rt[rt].blend_enable) {
+ unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor;
+ unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor;
+ unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
+ boolean func_commutative = lp_build_blend_func_commutative(func);
+
+ /* It makes no sense to blend unless values are normalized */
+ assert(type.norm);
+
+ /*
+ * Compute src/dst factors.
+ */
+
+ bld.factor[0][0][i] = src[i];
+ bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
+ bld.factor[1][0][i] = dst[i];
+ bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
+
+ /*
+ * Compute src/dst terms
+ */
+
+ for(k = 0; k < 2; ++k) {
+ /* See if this multiplication has been previously computed */
+ for(j = 0; j < i; ++j) {
+ if((bld.factor[k][0][j] == bld.factor[k][0][i] &&
+ bld.factor[k][1][j] == bld.factor[k][1][i]) ||
+ (bld.factor[k][0][j] == bld.factor[k][1][i] &&
+ bld.factor[k][1][j] == bld.factor[k][0][i]))
+ break;
+ }
+
+ if(j < i)
+ bld.term[k][i] = bld.term[k][j];
+ else
+ bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i], bld.factor[k][1][i]);
+
+ if (src_factor == PIPE_BLENDFACTOR_ZERO &&
+ (dst_factor == PIPE_BLENDFACTOR_DST_ALPHA ||
+ dst_factor == PIPE_BLENDFACTOR_INV_DST_ALPHA)) {
+ /* XXX special case these combos to work around an apparent
+ * bug in LLVM.
+ * This hack disables the check for multiplication by zero
+ * in lp_bld_mul(). When we optimize away the
+ * multiplication, something goes wrong during code
+ * generation and we segfault at runtime.
+ */
+ LLVMValueRef zeroSave = bld.base.zero;
+ bld.base.zero = NULL;
+ bld.term[k][i] = lp_build_mul(&bld.base, bld.factor[k][0][i],
+ bld.factor[k][1][i]);
+ bld.base.zero = zeroSave;
+ }
+ }
+
+ /*
+ * Combine terms
+ */
+
+ /* See if this function has been previously applied */
+ for(j = 0; j < i; ++j) {
+ unsigned prev_func = j < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
+ unsigned func_reverse = lp_build_blend_func_reverse(func, prev_func);
+
+ if((!func_reverse &&
+ bld.term[0][j] == bld.term[0][i] &&
+ bld.term[1][j] == bld.term[1][i]) ||
+ ((func_commutative || func_reverse) &&
+ bld.term[0][j] == bld.term[1][i] &&
+ bld.term[1][j] == bld.term[0][i]))
+ break;
+ }
+
+ if(j < i)
+ res[i] = res[j];
+ else
+ res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
+ }
+ else {
+ res[i] = src[i];
+ }
+ }
+ else {
+ res[i] = dst[i];
+ }
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
new file mode 100644
index 0000000000..e05bbe5011
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -0,0 +1,726 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * To be done accurately/efficiently the depth/stencil test must be done with
+ * the same type/format of the depth/stencil buffer, which implies massaging
+ * the incoming depths to fit into place. Using a more straightforward
+ * type/format for depth/stencil values internally and only convert when
+ * flushing would avoid this, but it would most likely result in depth fighting
+ * artifacts.
+ *
+ * We are free to use a different pixel layout though. Since our basic
+ * processing unit is a quad (2x2 pixel block) we store the depth/stencil
+ * values tiled, a quad at time. That is, a depth buffer containing
+ *
+ * Z11 Z12 Z13 Z14 ...
+ * Z21 Z22 Z23 Z24 ...
+ * Z31 Z32 Z33 Z34 ...
+ * Z41 Z42 Z43 Z44 ...
+ * ... ... ... ... ...
+ *
+ * will actually be stored in memory as
+ *
+ * Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
+ * Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
+ * ... ... ... ... ... ... ... ... ...
+ *
+ *
+ * Stencil test:
+ * Two-sided stencil test is supported but probably not as efficient as
+ * it could be. Currently, we use if/then/else constructs to do the
+ * operations for front vs. back-facing polygons. We could probably do
+ * both the front and back arithmetic then use a Select() instruction to
+ * choose the result depending on polyon orientation. We'd have to
+ * measure performance both ways and see which is better.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_swizzle.h"
+
+#include "lp_bld_depth.h"
+
+
+/** Used to select fields from pipe_stencil_state */
+enum stencil_op {
+ S_FAIL_OP,
+ Z_FAIL_OP,
+ Z_PASS_OP
+};
+
+
+
+/**
+ * Do the stencil test comparison (compare FB stencil values against ref value).
+ * This will be used twice when generating two-sided stencil code.
+ * \param stencil the front/back stencil state
+ * \param stencilRef the stencil reference value, replicated as a vector
+ * \param stencilVals vector of stencil values from framebuffer
+ * \return vector mask of pass/fail values (~0 or 0)
+ */
+static LLVMValueRef
+lp_build_stencil_test_single(struct lp_build_context *bld,
+ const struct pipe_stencil_state *stencil,
+ LLVMValueRef stencilRef,
+ LLVMValueRef stencilVals)
+{
+ const unsigned stencilMax = 255; /* XXX fix */
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+
+ assert(type.sign);
+
+ assert(stencil->enabled);
+
+ if (stencil->valuemask != stencilMax) {
+ /* compute stencilRef = stencilRef & valuemask */
+ LLVMValueRef valuemask = lp_build_const_int_vec(type, stencil->valuemask);
+ stencilRef = LLVMBuildAnd(bld->builder, stencilRef, valuemask, "");
+ /* compute stencilVals = stencilVals & valuemask */
+ stencilVals = LLVMBuildAnd(bld->builder, stencilVals, valuemask, "");
+ }
+
+ res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
+
+ return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test comparison.
+ * \sa lp_build_stencil_test_single
+ * \param face an integer indicating front (+) or back (-) facing polygon.
+ * If NULL, assume front-facing.
+ */
+static LLVMValueRef
+lp_build_stencil_test(struct lp_build_context *bld,
+ const struct pipe_stencil_state stencil[2],
+ LLVMValueRef stencilRefs[2],
+ LLVMValueRef stencilVals,
+ LLVMValueRef face)
+{
+ LLVMValueRef res;
+
+ assert(stencil[0].enabled);
+
+ if (stencil[1].enabled && face) {
+ /* do two-sided test */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef front_facing;
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef result = bld->undef;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ lp_build_flow_scope_declare(flow_ctx, &result);
+
+ /* front_facing = face > 0.0 */
+ front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
+ {
+ result = lp_build_stencil_test_single(bld, &stencil[0],
+ stencilRefs[0], stencilVals);
+ }
+ lp_build_else(&if_ctx);
+ {
+ result = lp_build_stencil_test_single(bld, &stencil[1],
+ stencilRefs[1], stencilVals);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+
+ res = result;
+ }
+ else {
+ /* do single-side test */
+ res = lp_build_stencil_test_single(bld, &stencil[0],
+ stencilRefs[0], stencilVals);
+ }
+
+ return res;
+}
+
+
+/**
+ * Apply the stencil operator (add/sub/keep/etc) to the given vector
+ * of stencil values.
+ * \return new stencil values vector
+ */
+static LLVMValueRef
+lp_build_stencil_op_single(struct lp_build_context *bld,
+ const struct pipe_stencil_state *stencil,
+ enum stencil_op op,
+ LLVMValueRef stencilRef,
+ LLVMValueRef stencilVals,
+ LLVMValueRef mask)
+
+{
+ const unsigned stencilMax = 255; /* XXX fix */
+ struct lp_type type = bld->type;
+ LLVMValueRef res;
+ LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
+ unsigned stencil_op;
+
+ assert(type.sign);
+
+ switch (op) {
+ case S_FAIL_OP:
+ stencil_op = stencil->fail_op;
+ break;
+ case Z_FAIL_OP:
+ stencil_op = stencil->zfail_op;
+ break;
+ case Z_PASS_OP:
+ stencil_op = stencil->zpass_op;
+ break;
+ default:
+ assert(0 && "Invalid stencil_op mode");
+ stencil_op = PIPE_STENCIL_OP_KEEP;
+ }
+
+ switch (stencil_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ res = stencilVals;
+ /* we can return early for this case */
+ return res;
+ case PIPE_STENCIL_OP_ZERO:
+ res = bld->zero;
+ break;
+ case PIPE_STENCIL_OP_REPLACE:
+ res = stencilRef;
+ break;
+ case PIPE_STENCIL_OP_INCR:
+ res = lp_build_add(bld, stencilVals, bld->one);
+ res = lp_build_min(bld, res, max);
+ break;
+ case PIPE_STENCIL_OP_DECR:
+ res = lp_build_sub(bld, stencilVals, bld->one);
+ res = lp_build_max(bld, res, bld->zero);
+ break;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ res = lp_build_add(bld, stencilVals, bld->one);
+ res = LLVMBuildAnd(bld->builder, res, max, "");
+ break;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ res = lp_build_sub(bld, stencilVals, bld->one);
+ res = LLVMBuildAnd(bld->builder, res, max, "");
+ break;
+ case PIPE_STENCIL_OP_INVERT:
+ res = LLVMBuildNot(bld->builder, stencilVals, "");
+ res = LLVMBuildAnd(bld->builder, res, max, "");
+ break;
+ default:
+ assert(0 && "bad stencil op mode");
+ res = NULL;
+ }
+
+ if (stencil->writemask != stencilMax) {
+ /* compute res = (res & mask) | (stencilVals & ~mask) */
+ LLVMValueRef mask = lp_build_const_int_vec(type, stencil->writemask);
+ LLVMValueRef cmask = LLVMBuildNot(bld->builder, mask, "notWritemask");
+ LLVMValueRef t1 = LLVMBuildAnd(bld->builder, res, mask, "t1");
+ LLVMValueRef t2 = LLVMBuildAnd(bld->builder, stencilVals, cmask, "t2");
+ res = LLVMBuildOr(bld->builder, t1, t2, "t1_or_t2");
+ }
+
+ /* only the update the vector elements enabled by 'mask' */
+ res = lp_build_select(bld, mask, res, stencilVals);
+
+ return res;
+}
+
+
+/**
+ * Do the one or two-sided stencil test op/update.
+ */
+static LLVMValueRef
+lp_build_stencil_op(struct lp_build_context *bld,
+ const struct pipe_stencil_state stencil[2],
+ enum stencil_op op,
+ LLVMValueRef stencilRefs[2],
+ LLVMValueRef stencilVals,
+ LLVMValueRef mask,
+ LLVMValueRef face)
+
+{
+ assert(stencil[0].enabled);
+
+ if (stencil[1].enabled && face) {
+ /* do two-sided op */
+ struct lp_build_flow_context *flow_ctx;
+ struct lp_build_if_state if_ctx;
+ LLVMValueRef front_facing;
+ LLVMValueRef zero = LLVMConstReal(LLVMFloatType(), 0.0);
+ LLVMValueRef result = bld->undef;
+
+ flow_ctx = lp_build_flow_create(bld->builder);
+ lp_build_flow_scope_begin(flow_ctx);
+
+ lp_build_flow_scope_declare(flow_ctx, &result);
+
+ /* front_facing = face > 0.0 */
+ front_facing = LLVMBuildFCmp(bld->builder, LLVMRealUGT, face, zero, "");
+
+ lp_build_if(&if_ctx, flow_ctx, bld->builder, front_facing);
+ {
+ result = lp_build_stencil_op_single(bld, &stencil[0], op,
+ stencilRefs[0], stencilVals, mask);
+ }
+ lp_build_else(&if_ctx);
+ {
+ result = lp_build_stencil_op_single(bld, &stencil[1], op,
+ stencilRefs[1], stencilVals, mask);
+ }
+ lp_build_endif(&if_ctx);
+
+ lp_build_flow_scope_end(flow_ctx);
+ lp_build_flow_destroy(flow_ctx);
+
+ return result;
+ }
+ else {
+ /* do single-sided op */
+ return lp_build_stencil_op_single(bld, &stencil[0], op,
+ stencilRefs[0], stencilVals, mask);
+ }
+}
+
+
+
+/**
+ * Return a type appropriate for depth/stencil testing.
+ */
+struct lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+ unsigned length)
+{
+ struct lp_type type;
+ unsigned swizzle;
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ swizzle = format_desc->swizzle[0];
+ assert(swizzle < 4);
+
+ memset(&type, 0, sizeof type);
+ type.width = format_desc->block.bits;
+
+ if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
+ type.floating = TRUE;
+ assert(swizzle == 0);
+ assert(format_desc->channel[swizzle].size == format_desc->block.bits);
+ }
+ else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ assert(format_desc->block.bits <= 32);
+ if(format_desc->channel[swizzle].normalized)
+ type.norm = TRUE;
+ }
+ else
+ assert(0);
+
+ assert(type.width <= length);
+ type.length = length / type.width;
+
+ return type;
+}
+
+
+/**
+ * Compute bitmask and bit shift to apply to the incoming fragment Z values
+ * and the Z buffer values needed before doing the Z comparison.
+ *
+ * Note that we leave the Z bits in the position that we find them
+ * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us
+ * get by with fewer bit twiddling steps.
+ */
+static boolean
+get_z_shift_and_mask(const struct util_format_description *format_desc,
+ unsigned *shift, unsigned *mask)
+{
+ const unsigned total_bits = format_desc->block.bits;
+ unsigned z_swizzle;
+ unsigned chan;
+ unsigned padding_left, padding_right;
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ z_swizzle = format_desc->swizzle[0];
+
+ if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ return FALSE;
+
+ padding_right = 0;
+ for (chan = 0; chan < z_swizzle; ++chan)
+ padding_right += format_desc->channel[chan].size;
+
+ padding_left =
+ total_bits - (padding_right + format_desc->channel[z_swizzle].size);
+
+ if (padding_left || padding_right) {
+ unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
+ unsigned long long mask_right = (1ULL << (padding_right)) - 1;
+ *mask = mask_left ^ mask_right;
+ }
+ else {
+ *mask = 0xffffffff;
+ }
+
+ *shift = padding_left;
+
+ return TRUE;
+}
+
+
+/**
+ * Compute bitmask and bit shift to apply to the framebuffer pixel values
+ * to put the stencil bits in the least significant position.
+ * (i.e. 0x000000ff)
+ */
+static boolean
+get_s_shift_and_mask(const struct util_format_description *format_desc,
+ unsigned *shift, unsigned *mask)
+{
+ unsigned s_swizzle;
+ unsigned chan, sz;
+
+ s_swizzle = format_desc->swizzle[1];
+
+ if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
+ return FALSE;
+
+ *shift = 0;
+ for (chan = 0; chan < s_swizzle; chan++)
+ *shift += format_desc->channel[chan].size;
+
+ sz = format_desc->channel[s_swizzle].size;
+ *mask = (1U << sz) - 1U;
+
+ return TRUE;
+}
+
+
+/**
+ * Perform the occlusion test and increase the counter.
+ * Test the depth mask. Add the number of channel which has none zero mask
+ * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
+ * The counter will add 4.
+ *
+ * \param type holds element type of the mask vector.
+ * \param maskvalue is the depth test mask.
+ * \param counter is a pointer of the uint32 counter.
+ */
+static void
+lp_build_occlusion_count(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef maskvalue,
+ LLVMValueRef counter)
+{
+ LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
+ LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
+ LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
+ LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
+ LLVMValueRef maskarray[4] = {
+ LLVMConstInt(LLVMInt32Type(), 0, 0),
+ LLVMConstInt(LLVMInt32Type(), 4, 0),
+ LLVMConstInt(LLVMInt32Type(), 8, 0),
+ LLVMConstInt(LLVMInt32Type(), 12, 0),
+ };
+ LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
+ LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
+ LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
+ LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
+ LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
+ LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
+ LLVMBuildStore(builder, incr, counter);
+}
+
+
+
+/**
+ * Generate code for performing depth and/or stencil tests.
+ * We operate on a vector of values (typically a 2x2 quad).
+ *
+ * \param depth the depth test state
+ * \param stencil the front/back stencil state
+ * \param type the data type of the fragment depth/stencil values
+ * \param format_desc description of the depth/stencil surface
+ * \param mask the alive/dead pixel mask for the quad (vector)
+ * \param stencil_refs the front/back stencil ref values (scalar)
+ * \param z_src the incoming depth/stencil values (a 2x2 quad)
+ * \param zs_dst_ptr pointer to depth/stencil values in framebuffer
+ * \param facing contains float value indicating front/back facing polygon
+ */
+void
+lp_build_depth_stencil_test(LLVMBuilderRef builder,
+ const struct pipe_depth_state *depth,
+ const struct pipe_stencil_state stencil[2],
+ struct lp_type type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef stencil_refs[2],
+ LLVMValueRef z_src,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef face,
+ LLVMValueRef counter)
+{
+ struct lp_build_context bld;
+ struct lp_build_context sbld;
+ struct lp_type s_type;
+ LLVMValueRef zs_dst, z_dst = NULL;
+ LLVMValueRef stencil_vals = NULL;
+ LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
+ LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
+ LLVMValueRef orig_mask = mask->value;
+
+ /* Sanity checking */
+ {
+ const unsigned z_swizzle = format_desc->swizzle[0];
+ const unsigned s_swizzle = format_desc->swizzle[1];
+
+ assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
+ s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
+
+ assert(depth->enabled || stencil[0].enabled);
+
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
+ assert(format_desc->block.width == 1);
+ assert(format_desc->block.height == 1);
+
+ if (stencil[0].enabled) {
+ assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
+ format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
+ }
+
+ assert(z_swizzle < 4);
+ assert(format_desc->block.bits == type.width);
+ if (type.floating) {
+ assert(z_swizzle == 0);
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_FLOAT);
+ assert(format_desc->channel[z_swizzle].size ==
+ format_desc->block.bits);
+ }
+ else {
+ assert(format_desc->channel[z_swizzle].type ==
+ UTIL_FORMAT_TYPE_UNSIGNED);
+ assert(format_desc->channel[z_swizzle].normalized);
+ assert(!type.fixed);
+ assert(!type.sign);
+ assert(type.norm);
+ }
+ }
+
+
+ /* Setup build context for Z vals */
+ lp_build_context_init(&bld, builder, type);
+
+ /* Setup build context for stencil vals */
+ s_type = lp_type_int_vec(type.width);
+ lp_build_context_init(&sbld, builder, s_type);
+
+ /* Load current z/stencil value from z/stencil buffer */
+ zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
+
+ lp_build_name(zs_dst, "zsbufval");
+
+
+ /* Compute and apply the Z/stencil bitmasks and shifts.
+ */
+ {
+ unsigned z_shift, z_mask;
+ unsigned s_shift, s_mask;
+
+ if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
+ if (z_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
+ z_src = LLVMBuildLShr(builder, z_src, shift, "");
+ }
+
+ if (z_mask != 0xffffffff) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
+ z_src = LLVMBuildAnd(builder, z_src, mask, "");
+ z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
+ z_bitmask = mask; /* used below */
+ }
+ else {
+ z_dst = zs_dst;
+ }
+
+ lp_build_name(z_dst, "zsbuf.z");
+ }
+
+ if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+ if (s_shift) {
+ LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
+ stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+ stencil_shift = shift; /* used below */
+ }
+ else {
+ stencil_vals = zs_dst;
+ }
+
+ if (s_mask != 0xffffffff) {
+ LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
+ stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
+ }
+
+ lp_build_name(stencil_vals, "stencil");
+ }
+ }
+
+
+ if (stencil[0].enabled) {
+ /* convert scalar stencil refs into vectors */
+ stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
+ stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);
+
+ s_pass_mask = lp_build_stencil_test(&sbld, stencil,
+ stencil_refs, stencil_vals, face);
+
+ /* apply stencil-fail operator */
+ {
+ LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
+ stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
+ stencil_refs, stencil_vals,
+ s_fail_mask, face);
+ }
+ }
+
+ if (depth->enabled) {
+ /* compare src Z to dst Z, returning 'pass' mask */
+ z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);
+
+ if (!stencil[0].enabled) {
+ /* We can potentially skip all remaining operations here, but only
+ * if stencil is disabled because we still need to update the stencil
+ * buffer values. Don't need to update Z buffer values.
+ */
+ lp_build_mask_update(mask, z_pass);
+ }
+
+ if (depth->writemask) {
+ LLVMValueRef zselectmask = mask->value;
+
+ /* mask off bits that failed Z test */
+ zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");
+
+ /* mask off bits that failed stencil test */
+ if (s_pass_mask) {
+ zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
+ }
+
+ /* if combined Z/stencil format, mask off the stencil bits */
+ if (z_bitmask) {
+ zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
+ }
+
+ /* Mix the old and new Z buffer values.
+ * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
+ */
+ z_dst = lp_build_select(&bld, zselectmask, z_src, z_dst);
+ }
+
+ if (stencil[0].enabled) {
+ /* update stencil buffer values according to z pass/fail result */
+ LLVMValueRef z_fail_mask, z_pass_mask;
+
+ /* apply Z-fail operator */
+ z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
+ stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
+ stencil_refs, stencil_vals,
+ z_fail_mask, face);
+
+ /* apply Z-pass operator */
+ z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
+ stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ z_pass_mask, face);
+ }
+ }
+ else {
+ /* No depth test: apply Z-pass operator to stencil buffer values which
+ * passed the stencil test.
+ */
+ s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
+ stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
+ stencil_refs, stencil_vals,
+ s_pass_mask, face);
+ }
+
+ /* The Z bits are already in the right place but we may need to shift the
+ * stencil bits before ORing Z with Stencil to make the final pixel value.
+ */
+ if (stencil_vals && stencil_shift)
+ stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
+ stencil_shift, "");
+
+ /* Finally, merge/store the z/stencil values */
+ if ((depth->enabled && depth->writemask) ||
+ (stencil[0].enabled && stencil[0].writemask)) {
+
+ if (z_dst && stencil_vals)
+ zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
+ else if (z_dst)
+ zs_dst = z_dst;
+ else
+ zs_dst = stencil_vals;
+
+ LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
+ }
+
+ if (s_pass_mask)
+ lp_build_mask_update(mask, s_pass_mask);
+
+ if (depth->enabled && stencil[0].enabled)
+ lp_build_mask_update(mask, z_pass);
+
+ if (counter)
+ lp_build_occlusion_count(builder, type, mask->value, counter);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
new file mode 100644
index 0000000000..e257a5bd7d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Depth/stencil testing to LLVM IR translation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_DEPTH_H
+#define LP_BLD_DEPTH_H
+
+
+#include "gallivm/lp_bld.h"
+
+
+struct pipe_depth_state;
+struct util_format_description;
+struct lp_type;
+struct lp_build_mask_context;
+
+
+struct lp_type
+lp_depth_type(const struct util_format_description *format_desc,
+ unsigned length);
+
+
+void
+lp_build_depth_stencil_test(LLVMBuilderRef builder,
+ const struct pipe_depth_state *depth,
+ const struct pipe_stencil_state stencil[2],
+ struct lp_type type,
+ const struct util_format_description *format_desc,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef stencil_refs[2],
+ LLVMValueRef zs_src,
+ LLVMValueRef zs_dst_ptr,
+ LLVMValueRef facing,
+ LLVMValueRef counter);
+
+
+#endif /* !LP_BLD_DEPTH_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
new file mode 100644
index 0000000000..90d2b26f9f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -0,0 +1,388 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_scan.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "lp_bld_interp.h"
+
+
+/*
+ * The shader JIT function operates on blocks of quads.
+ * Each block has 2x2 quads and each quad has 2x2 pixels.
+ *
+ * We iterate over the quads in order 0, 1, 2, 3:
+ *
+ * #################
+ * # | # | #
+ * #---0---#---1---#
+ * # | # | #
+ * #################
+ * # | # | #
+ * #---2---#---3---#
+ * # | # | #
+ * #################
+ *
+ * Within each quad, we have four pixels which are represented in SOA
+ * order:
+ *
+ * #########
+ * # 0 | 1 #
+ * #---+---#
+ * # 2 | 3 #
+ * #########
+ *
+ * So the green channel (for example) of the four pixels is stored in
+ * a single vector register: {g0, g1, g2, g3}.
+ */
+
+
+static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
+static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
+
+
+static void
+attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
+{
+ if(attrib == 0)
+ lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
+ else
+ lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
+}
+
+
+/**
+ * Initialize the bld->a0, dadx, dady fields. This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ LLVMBuilderRef builder = coeff_bld->builder;
+ LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
+ LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
+ LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+ LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+ LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
+ LLVMValueRef oow = NULL;
+ unsigned attrib;
+ unsigned chan;
+
+ /* TODO: Use more vector operations */
+
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if (mask & (1 << chan)) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
+ LLVMValueRef a0 = zero;
+ LLVMValueRef dadx = zero;
+ LLVMValueRef dady = zero;
+ LLVMValueRef dadxy = zero;
+ LLVMValueRef dadq;
+ LLVMValueRef dadq2;
+ LLVMValueRef a;
+
+ switch (interp) {
+ case LP_INTERP_PERSPECTIVE:
+ /* fall-through */
+
+ case LP_INTERP_LINEAR:
+ if (attrib == 0 && chan == 0) {
+ dadxy = dadx = one;
+ }
+ else if (attrib == 0 && chan == 1) {
+ dadxy = dady = one;
+ }
+ else {
+ dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
+ dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
+ dadxy = LLVMBuildAdd(builder, dadx, dady, "");
+ attrib_name(dadx, attrib, chan, ".dadx");
+ attrib_name(dady, attrib, chan, ".dady");
+ attrib_name(dadxy, attrib, chan, ".dadxy");
+ }
+ /* fall-through */
+
+ case LP_INTERP_CONSTANT:
+ case LP_INTERP_FACING:
+ a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
+ attrib_name(a0, attrib, chan, ".a0");
+ break;
+
+ case LP_INTERP_POSITION:
+ /* Nothing to do as the position coeffs are already setup in slot 0 */
+ continue;
+
+ default:
+ assert(0);
+ break;
+ }
+
+ /*
+ * dadq = {0, dadx, dady, dadx + dady}
+ */
+
+ dadq = coeff_bld->undef;
+ dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
+ dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
+ dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
+ dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
+
+ /*
+ * dadq2 = 2 * dq
+ */
+
+ dadq2 = LLVMBuildAdd(builder, dadq, dadq, "");
+
+ /*
+ * a = a0 + x * dadx + y * dady
+ */
+
+ if (attrib == 0 && chan == 0) {
+ a = bld->x;
+ }
+ else if (attrib == 0 && chan == 1) {
+ a = bld->y;
+ }
+ else {
+ a = a0;
+ if (interp != LP_INTERP_CONSTANT &&
+ interp != LP_INTERP_FACING) {
+ a = LLVMBuildAdd(builder, a,
+ LLVMBuildMul(builder, bld->x, dadx, ""),
+ "");
+ a = LLVMBuildAdd(builder, a,
+ LLVMBuildMul(builder, bld->y, dady, ""),
+ "");
+ }
+ }
+
+ /*
+ * a = {a, a, a, a}
+ */
+
+ a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
+
+ /*
+ * Compute the attrib values on the upper-left corner of each quad.
+ */
+
+ a = LLVMBuildAdd(builder, a, dadq2, "");
+
+ /*
+ * a *= 1 / w
+ * dadq *= 1 / w
+ */
+
+ if (interp == LP_INTERP_PERSPECTIVE) {
+ LLVMValueRef w = bld->a[0][3];
+ assert(attrib != 0);
+ assert(bld->mask[0] & TGSI_WRITEMASK_W);
+ if (!oow) {
+ oow = lp_build_rcp(coeff_bld, w);
+ lp_build_name(oow, "oow");
+ }
+ a = lp_build_mul(coeff_bld, a, oow);
+ dadq = lp_build_mul(coeff_bld, dadq, oow);
+ }
+
+ attrib_name(a, attrib, chan, ".a");
+ attrib_name(dadq, attrib, chan, ".dadq");
+
+ bld->a [attrib][chan] = a;
+ bld->dadq[attrib][chan] = dadq;
+ }
+ }
+ }
+}
+
+
+/**
+ * Increment the shader input attribute values.
+ * This is called when we move from one quad to the next.
+ */
+static void
+attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+ LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
+ unsigned attrib;
+ unsigned chan;
+
+ assert(quad_index < 4);
+
+ for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ const unsigned mask = bld->mask[attrib];
+ const unsigned interp = bld->interp[attrib];
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if(mask & (1 << chan)) {
+ LLVMValueRef a = coeff_bld->undef;
+ if (interp == LP_INTERP_CONSTANT ||
+ interp == LP_INTERP_FACING) {
+ a = bld->a[attrib][chan];
+ }
+ else if (interp == LP_INTERP_POSITION) {
+ assert(attrib > 0);
+ a = bld->attribs[0][chan];
+ }
+ else {
+ a = bld->a[attrib][chan];
+
+ /*
+ * Broadcast the attribute value for this quad into all elements
+ */
+
+ a = LLVMBuildShuffleVector(coeff_bld->builder,
+ a, coeff_bld->undef, shuffle, "");
+
+ /*
+ * Add the derivatives
+ */
+
+ a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);
+
+ attrib_name(a, attrib, chan, "");
+ }
+ bld->attribs[attrib][chan] = a;
+ }
+ }
+ }
+}
+
+
+/**
+ * Generate the position vectors.
+ *
+ * Parameter x0, y0 are the integer values with upper left coordinates.
+ */
+static void
+pos_init(struct lp_build_interp_soa_context *bld,
+ LLVMValueRef x0,
+ LLVMValueRef y0)
+{
+ struct lp_build_context *coeff_bld = &bld->coeff_bld;
+
+ bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
+ bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
+}
+
+
+/**
+ * Initialize fragment shader input attribute info.
+ */
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ unsigned num_inputs,
+ const struct lp_shader_input *inputs,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
+ LLVMValueRef x0,
+ LLVMValueRef y0)
+{
+ struct lp_type coeff_type;
+ unsigned attrib;
+ unsigned chan;
+
+ memset(bld, 0, sizeof *bld);
+
+ memset(&coeff_type, 0, sizeof coeff_type);
+ coeff_type.floating = TRUE;
+ coeff_type.sign = TRUE;
+ coeff_type.width = 32;
+ coeff_type.length = QUAD_SIZE;
+
+ /* XXX: we don't support interpolating into any other types */
+ assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
+
+ lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
+
+ /* For convenience */
+ bld->pos = bld->attribs[0];
+ bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
+
+ /* Position */
+ bld->num_attribs = 1;
+ bld->mask[0] = TGSI_WRITEMASK_XYZW;
+ bld->interp[0] = LP_INTERP_LINEAR;
+
+ /* Inputs */
+ for (attrib = 0; attrib < num_inputs; ++attrib) {
+ bld->mask[1 + attrib] = inputs[attrib].usage_mask;
+ bld->interp[1 + attrib] = inputs[attrib].interp;
+ }
+ bld->num_attribs = 1 + num_inputs;
+
+ /* Ensure all masked out input channels have a valid value */
+ for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+ for (chan = 0; chan < NUM_CHANNELS; ++chan) {
+ bld->attribs[attrib][chan] = bld->coeff_bld.undef;
+ }
+ }
+
+ pos_init(bld, x0, y0);
+
+ coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+
+ attribs_update(bld, 0);
+}
+
+
+/**
+ * Advance the position and inputs to the given quad within the block.
+ */
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+ int quad_index)
+{
+ assert(quad_index < 4);
+
+ attribs_update(bld, quad_index);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
new file mode 100644
index 0000000000..2905513301
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Position and shader input interpolation.
+ *
+ * Special attention is given to the interpolation of side by side quads.
+ * Multiplications are made only for the first quad. Interpolation of
+ * inputs for posterior quads are done exclusively with additions, and
+ * perspective divide if necessary.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_BLD_INTERP_H
+#define LP_BLD_INTERP_H
+
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_type.h"
+
+#include "tgsi/tgsi_exec.h"
+
+#include "lp_setup.h"
+
+
+struct lp_build_interp_soa_context
+{
+ /* QUAD_SIZE x float */
+ struct lp_build_context coeff_bld;
+
+ unsigned num_attribs;
+ unsigned mask[1 + PIPE_MAX_SHADER_INPUTS]; /**< TGSI_WRITE_MASK_x */
+ enum lp_interp interp[1 + PIPE_MAX_SHADER_INPUTS];
+
+ LLVMValueRef x;
+ LLVMValueRef y;
+
+ LLVMValueRef a [1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef dadq[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+ LLVMValueRef attribs[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+
+ /*
+ * Convenience pointers. Callers may access this one.
+ */
+ const LLVMValueRef *pos;
+ const LLVMValueRef (*inputs)[NUM_CHANNELS];
+};
+
+
+void
+lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
+ unsigned num_inputs,
+ const struct lp_shader_input *inputs,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a0_ptr,
+ LLVMValueRef dadx_ptr,
+ LLVMValueRef dady_ptr,
+ LLVMValueRef x,
+ LLVMValueRef y);
+
+void
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+ int quad_index);
+
+
+#endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
new file mode 100644
index 0000000000..3e8c410925
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -0,0 +1,58 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Michel Dänzer
+ */
+
+
+#include "pipe/p_defines.h"
+#include "lp_clear.h"
+#include "lp_context.h"
+#include "lp_setup.h"
+
+
+/**
+ * Clear the given buffers to the specified values.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+llvmpipe_clear(struct pipe_context *pipe,
+ unsigned buffers,
+ const float *rgba,
+ double depth,
+ unsigned stencil)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->no_rast)
+ return;
+
+ lp_setup_clear( llvmpipe->setup, rgba, depth, stencil, buffers );
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.h b/src/gallium/drivers/llvmpipe/lp_clear.h
new file mode 100644
index 0000000000..6d4ffccdf4
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_clear.h
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ */
+
+#ifndef LP_CLEAR_H
+#define LP_CLEAR_H
+
+#include "pipe/p_state.h"
+struct pipe_context;
+
+extern void
+llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+ double depth, unsigned stencil);
+
+
+#endif /* LP_CLEAR_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
new file mode 100644
index 0000000000..3db4f12ebb
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -0,0 +1,163 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2008 VMware, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "lp_clear.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_perf.h"
+#include "lp_state.h"
+#include "lp_surface.h"
+#include "lp_query.h"
+#include "lp_setup.h"
+
+static void llvmpipe_destroy( struct pipe_context *pipe )
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ uint i, j;
+
+ lp_print_counters();
+
+ /* This will also destroy llvmpipe->setup:
+ */
+ if (llvmpipe->draw)
+ draw_destroy( llvmpipe->draw );
+
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+ pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL);
+ }
+
+ pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ pipe_sampler_view_reference(&llvmpipe->fragment_sampler_views[i], NULL);
+ }
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ pipe_sampler_view_reference(&llvmpipe->vertex_sampler_views[i], NULL);
+ }
+
+ for (i = 0; i < Elements(llvmpipe->constants); i++) {
+ for (j = 0; j < Elements(llvmpipe->constants[i]); j++) {
+ pipe_resource_reference(&llvmpipe->constants[i][j], NULL);
+ }
+ }
+
+ align_free( llvmpipe );
+}
+
+
+struct pipe_context *
+llvmpipe_create_context( struct pipe_screen *screen, void *priv )
+{
+ struct llvmpipe_context *llvmpipe;
+
+ llvmpipe = align_malloc(sizeof(struct llvmpipe_context), 16);
+ if (!llvmpipe)
+ return NULL;
+
+ util_init_math();
+
+ memset(llvmpipe, 0, sizeof *llvmpipe);
+
+ make_empty_list(&llvmpipe->fs_variants_list);
+
+ llvmpipe->pipe.winsys = screen->winsys;
+ llvmpipe->pipe.screen = screen;
+ llvmpipe->pipe.priv = priv;
+
+ /* Init the pipe context methods */
+ llvmpipe->pipe.destroy = llvmpipe_destroy;
+ llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
+ llvmpipe->pipe.clear = llvmpipe_clear;
+ llvmpipe->pipe.flush = llvmpipe_flush;
+
+ llvmpipe_init_blend_funcs(llvmpipe);
+ llvmpipe_init_clip_funcs(llvmpipe);
+ llvmpipe_init_draw_funcs(llvmpipe);
+ llvmpipe_init_sampler_funcs(llvmpipe);
+ llvmpipe_init_query_funcs( llvmpipe );
+ llvmpipe_init_vertex_funcs(llvmpipe);
+ llvmpipe_init_so_funcs(llvmpipe);
+ llvmpipe_init_fs_funcs(llvmpipe);
+ llvmpipe_init_vs_funcs(llvmpipe);
+ llvmpipe_init_gs_funcs(llvmpipe);
+ llvmpipe_init_rasterizer_funcs(llvmpipe);
+ llvmpipe_init_context_resource_funcs( &llvmpipe->pipe );
+ llvmpipe_init_surface_functions(llvmpipe);
+
+ /*
+ * Create drawing context and plug our rendering stage into it.
+ */
+ llvmpipe->draw = draw_create(&llvmpipe->pipe);
+ if (!llvmpipe->draw)
+ goto fail;
+
+ /* FIXME: devise alternative to draw_texture_samplers */
+
+ if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
+ llvmpipe->no_rast = TRUE;
+
+ llvmpipe->setup = lp_setup_create( &llvmpipe->pipe,
+ llvmpipe->draw );
+ if (!llvmpipe->setup)
+ goto fail;
+
+ /* plug in AA line/point stages */
+ draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe);
+ draw_install_aapoint_stage(llvmpipe->draw, &llvmpipe->pipe);
+ draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe);
+
+ /* convert points and lines into triangles: */
+ draw_wide_point_threshold(llvmpipe->draw, 0.0);
+ draw_wide_line_threshold(llvmpipe->draw, 0.0);
+
+#if USE_DRAW_STAGE_PSTIPPLE
+ /* Do polygon stipple w/ texture map + frag prog? */
+ draw_install_pstipple_stage(llvmpipe->draw, &llvmpipe->pipe);
+#endif
+
+ lp_reset_counters();
+
+ return &llvmpipe->pipe;
+
+ fail:
+ llvmpipe_destroy(&llvmpipe->pipe);
+ return NULL;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
new file mode 100644
index 0000000000..986e604ce7
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -0,0 +1,132 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LP_CONTEXT_H
+#define LP_CONTEXT_H
+
+#include "pipe/p_context.h"
+
+#include "draw/draw_vertex.h"
+
+#include "lp_tex_sample.h"
+#include "lp_jit.h"
+#include "lp_setup.h"
+#include "lp_state_fs.h"
+
+
+struct llvmpipe_vbuf_render;
+struct draw_context;
+struct draw_stage;
+struct lp_fragment_shader;
+struct lp_vertex_shader;
+struct lp_blend_state;
+struct lp_setup_context;
+struct lp_velems_state;
+
+struct llvmpipe_context {
+ struct pipe_context pipe; /**< base class */
+
+ /** Constant state objects */
+ const struct pipe_blend_state *blend;
+ const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
+ const struct pipe_depth_stencil_alpha_state *depth_stencil;
+ const struct pipe_rasterizer_state *rasterizer;
+ struct lp_fragment_shader *fs;
+ const struct lp_vertex_shader *vs;
+ const struct lp_geometry_shader *gs;
+ const struct lp_velems_state *velems;
+ const struct lp_so_state *so;
+
+ /** Other rendering state */
+ struct pipe_blend_color blend_color;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_clip_state clip;
+ struct pipe_resource *constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_poly_stipple poly_stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_view *vertex_sampler_views[PIPE_MAX_VERTEX_SAMPLERS];
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ struct {
+ struct llvmpipe_resource *buffer[PIPE_MAX_SO_BUFFERS];
+ int offset[PIPE_MAX_SO_BUFFERS];
+ int so_count[PIPE_MAX_SO_BUFFERS];
+ int num_buffers;
+ } so_target;
+
+ unsigned num_samplers;
+ unsigned num_fragment_sampler_views;
+ unsigned num_vertex_samplers;
+ unsigned num_vertex_sampler_views;
+ unsigned num_vertex_buffers;
+
+ unsigned dirty; /**< Mask of LP_NEW_x flags */
+
+ int active_query_count;
+
+ /** Mapped vertex buffers */
+ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
+
+ /** Vertex format */
+ struct vertex_info vertex_info;
+
+ /** Fragment shader input interpolation info */
+ unsigned num_inputs;
+ struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
+
+ /** The tiling engine */
+ struct lp_setup_context *setup;
+
+ /** The primitive drawing context */
+ struct draw_context *draw;
+
+ unsigned tex_timestamp;
+ boolean no_rast;
+
+ struct lp_fs_variant_list_item fs_variants_list;
+ unsigned nr_fs_variants;
+};
+
+
+struct pipe_context *
+llvmpipe_create_context( struct pipe_screen *screen, void *priv );
+
+
+static INLINE struct llvmpipe_context *
+llvmpipe_context( struct pipe_context *pipe )
+{
+ return (struct llvmpipe_context *)pipe;
+}
+
+#endif /* LP_CONTEXT_H */
+
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
new file mode 100644
index 0000000000..92fb2b3ee5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_DEBUG_H
+#define LP_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+extern void
+st_print_current(void);
+
+
+#define DEBUG_PIPE 0x1
+#define DEBUG_TGSI 0x2
+#define DEBUG_TEX 0x4
+#define DEBUG_SETUP 0x10
+#define DEBUG_RAST 0x20
+#define DEBUG_QUERY 0x40
+#define DEBUG_SCREEN 0x80
+#define DEBUG_SHOW_TILES 0x200
+#define DEBUG_SHOW_SUBTILES 0x400
+#define DEBUG_COUNTERS 0x800
+
+
+#ifdef DEBUG
+extern int LP_DEBUG;
+#else
+#define LP_DEBUG 0
+#endif
+
+void st_debug_init( void );
+
+static INLINE void
+LP_DBG( unsigned flag, const char *fmt, ... )
+{
+ if (LP_DEBUG & flag)
+ {
+ va_list args;
+
+ va_start( args, fmt );
+ debug_vprintf( fmt, args );
+ va_end( args );
+ }
+}
+
+
+#endif /* LP_DEBUG_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
new file mode 100644
index 0000000000..98780d7631
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "util/u_prim.h"
+
+#include "lp_context.h"
+#include "lp_state.h"
+
+#include "draw/draw_context.h"
+
+
+
+/**
+ * Draw vertex arrays, with optional indexing.
+ * Basically, map the vertex buffers (and drawing surfaces), then hand off
+ * the drawing to the 'draw' module.
+ */
+static void
+llvmpipe_draw_range_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned min_index,
+ unsigned max_index,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+ struct draw_context *draw = lp->draw;
+ unsigned i;
+
+ if (lp->dirty)
+ llvmpipe_update_derived( lp );
+
+ /*
+ * Map vertex buffers
+ */
+ for (i = 0; i < lp->num_vertex_buffers; i++) {
+ void *buf = llvmpipe_resource_data(lp->vertex_buffer[i].buffer);
+ draw_set_mapped_vertex_buffer(draw, i, buf);
+ }
+
+ /* Map index buffer, if present */
+ if (indexBuffer) {
+ void *mapped_indexes = llvmpipe_resource_data(indexBuffer);
+ draw_set_mapped_element_buffer_range(draw, indexSize, indexBias,
+ min_index,
+ max_index,
+ mapped_indexes);
+ }
+ else {
+ /* no index/element buffer */
+ draw_set_mapped_element_buffer_range(draw, 0, 0, start,
+ start + count - 1, NULL);
+ }
+
+ /* draw! */
+ draw_arrays(draw, mode, start, count);
+
+ /*
+ * unmap vertex/index buffers
+ */
+ for (i = 0; i < lp->num_vertex_buffers; i++) {
+ draw_set_mapped_vertex_buffer(draw, i, NULL);
+ }
+ if (indexBuffer) {
+ draw_set_mapped_element_buffer(draw, 0, 0, NULL);
+ }
+
+ /*
+ * TODO: Flush only when a user vertex/index buffer is present
+ * (or even better, modify draw module to do this
+ * internally when this condition is seen?)
+ */
+ draw_flush(draw);
+}
+
+
+static void
+llvmpipe_draw_elements(struct pipe_context *pipe,
+ struct pipe_resource *indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned mode, unsigned start, unsigned count)
+{
+ llvmpipe_draw_range_elements( pipe, indexBuffer,
+ indexSize, indexBias,
+ 0, 0xffffffff,
+ mode, start, count );
+}
+
+
+static void
+llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ llvmpipe_draw_elements(pipe, NULL, 0, 0, mode, start, count);
+}
+
+
+void
+llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
+ llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
+ llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c b/src/gallium/drivers/llvmpipe/lp_fence.c
new file mode 100644
index 0000000000..75d8d2b825
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_fence.c
@@ -0,0 +1,152 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+
+
+/**
+ * Create a new fence object.
+ *
+ * The rank will be the number of bins in the scene. Whenever a rendering
+ * thread hits a fence command, it'll increment the fence counter. When
+ * the counter == the rank, the fence is finished.
+ *
+ * \param rank the expected finished value of the fence counter.
+ */
+struct lp_fence *
+lp_fence_create(unsigned rank)
+{
+ struct lp_fence *fence = CALLOC_STRUCT(lp_fence);
+
+ pipe_reference_init(&fence->reference, 1);
+
+ pipe_mutex_init(fence->mutex);
+ pipe_condvar_init(fence->signalled);
+
+ fence->rank = rank;
+
+ return fence;
+}
+
+
+/** Destroy a fence. Called when refcount hits zero. */
+static void
+lp_fence_destroy(struct lp_fence *fence)
+{
+ pipe_mutex_destroy(fence->mutex);
+ pipe_condvar_destroy(fence->signalled);
+ FREE(fence);
+}
+
+
+/**
+ * For reference counting.
+ * This is a Gallium API function.
+ */
+static void
+llvmpipe_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct lp_fence *old = (struct lp_fence *) *ptr;
+ struct lp_fence *f = (struct lp_fence *) fence;
+
+ if (pipe_reference(&old->reference, &f->reference)) {
+ lp_fence_destroy(old);
+ }
+}
+
+
+/**
+ * Has the fence been executed/finished?
+ * This is a Gallium API function.
+ */
+static int
+llvmpipe_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flag)
+{
+ struct lp_fence *f = (struct lp_fence *) fence;
+
+ return f->count == f->rank;
+}
+
+
+/**
+ * Wait for the fence to finish.
+ * This is a Gallium API function.
+ */
+static int
+llvmpipe_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence_handle,
+ unsigned flag)
+{
+ struct lp_fence *fence = (struct lp_fence *) fence_handle;
+
+ pipe_mutex_lock(fence->mutex);
+ while (fence->count < fence->rank) {
+ pipe_condvar_wait(fence->signalled, fence->mutex);
+ }
+ pipe_mutex_unlock(fence->mutex);
+
+ return 0;
+}
+
+
+/**
+ * Called by the rendering threads to increment the fence counter.
+ * When the counter == the rank, the fence is finished.
+ */
+void
+lp_fence_signal(struct lp_fence *fence)
+{
+ pipe_mutex_lock(fence->mutex);
+
+ fence->count++;
+ assert(fence->count <= fence->rank);
+
+ LP_DBG(DEBUG_RAST, "%s count=%u rank=%u\n", __FUNCTION__,
+ fence->count, fence->rank);
+
+ pipe_condvar_signal(fence->signalled);
+
+ pipe_mutex_unlock(fence->mutex);
+}
+
+
+void
+llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen)
+{
+ screen->fence_reference = llvmpipe_fence_reference;
+ screen->fence_signalled = llvmpipe_fence_signalled;
+ screen->fence_finish = llvmpipe_fence_finish;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
new file mode 100644
index 0000000000..d9270f5784
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -0,0 +1,64 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_FENCE_H
+#define LP_FENCE_H
+
+
+#include "os/os_thread.h"
+#include "pipe/p_state.h"
+
+
+struct pipe_screen;
+
+
+struct lp_fence
+{
+ struct pipe_reference reference;
+
+ pipe_mutex mutex;
+ pipe_condvar signalled;
+
+ unsigned rank;
+ unsigned count;
+};
+
+
+struct lp_fence *
+lp_fence_create(unsigned rank);
+
+
+void
+lp_fence_signal(struct lp_fence *fence);
+
+
+void
+llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
+
+
+#endif /* LP_FENCE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
new file mode 100644
index 0000000000..0cd288bb73
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -0,0 +1,146 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "util/u_string.h"
+#include "draw/draw_context.h"
+#include "lp_flush.h"
+#include "lp_context.h"
+#include "lp_setup.h"
+
+
+/**
+ * \param flags bitmask of PIPE_FLUSH_x flags
+ * \param fence if non-null, returns pointer to a fench which can be waited on
+ */
+void
+llvmpipe_flush( struct pipe_context *pipe,
+ unsigned flags,
+ struct pipe_fence_handle **fence )
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ if (fence) {
+ /* if we're going to flush the setup/rasterization modules, emit
+ * a fence.
+ * XXX this (and the code below) may need fine tuning...
+ */
+ *fence = lp_setup_fence( llvmpipe->setup );
+ }
+
+ /* ask the setup module to flush */
+ lp_setup_flush(llvmpipe->setup, flags);
+
+ /* Enable to dump BMPs of the color/depth buffers each frame */
+ if (0) {
+ if (flags & PIPE_FLUSH_FRAME) {
+ static unsigned frame_no = 1;
+ char filename[256];
+ unsigned i;
+
+ for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
+ util_snprintf(filename, sizeof(filename), "cbuf%u_%u", i, frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.cbufs[0]);
+ }
+
+ if (0) {
+ util_snprintf(filename, sizeof(filename), "zsbuf_%u", frame_no);
+ debug_dump_surface_bmp(&llvmpipe->pipe, filename, llvmpipe->framebuffer.zsbuf);
+ }
+
+ ++frame_no;
+ }
+ }
+}
+
+
+/**
+ * Flush context if necessary.
+ *
+ * Returns FALSE if it would have block, but do_not_block was set, TRUE
+ * otherwise.
+ *
+ * TODO: move this logic to an auxiliary library?
+ */
+boolean
+llvmpipe_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face,
+ unsigned level,
+ unsigned flush_flags,
+ boolean read_only,
+ boolean cpu_access,
+ boolean do_not_block)
+{
+ unsigned referenced;
+
+ referenced = pipe->is_resource_referenced(pipe, resource, face, level);
+
+ if ((referenced & PIPE_REFERENCED_FOR_WRITE) ||
+ ((referenced & PIPE_REFERENCED_FOR_READ) && !read_only)) {
+
+ if (cpu_access) {
+ /*
+ * Flush and wait.
+ */
+
+ struct pipe_fence_handle *fence = NULL;
+
+ if (do_not_block)
+ return FALSE;
+
+ /*
+ * Do the unswizzling in parallel.
+ *
+ * XXX: Don't abuse the PIPE_FLUSH_FRAME flag for this.
+ */
+ flush_flags |= PIPE_FLUSH_FRAME;
+
+ llvmpipe_flush(pipe, flush_flags, &fence);
+
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ } else {
+ /*
+ * Just flush.
+ */
+
+ llvmpipe_flush(pipe, flush_flags, NULL);
+ }
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.h b/src/gallium/drivers/llvmpipe/lp_flush.h
new file mode 100644
index 0000000000..7b605681a9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_flush.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_FLUSH_H
+#define LP_FLUSH_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_context;
+struct pipe_fence_handle;
+
+void
+llvmpipe_flush(struct pipe_context *pipe, unsigned flags,
+ struct pipe_fence_handle **fence);
+
+boolean
+llvmpipe_flush_resource(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ unsigned face,
+ unsigned level,
+ unsigned flush_flags,
+ boolean read_only,
+ boolean cpu_access,
+ boolean do_not_block);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
new file mode 100644
index 0000000000..23aa34ddec
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "util/u_memory.h"
+#include "util/u_cpu_detect.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_screen.h"
+#include "gallivm/lp_bld_intr.h"
+#include "lp_jit.h"
+
+
+static void
+lp_jit_init_globals(struct llvmpipe_screen *screen)
+{
+ LLVMTypeRef texture_type;
+
+ /* struct lp_jit_texture */
+ {
+ LLVMTypeRef elem_types[LP_JIT_TEXTURE_NUM_FIELDS];
+
+ elem_types[LP_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
+ elem_types[LP_JIT_TEXTURE_ROW_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS);
+ elem_types[LP_JIT_TEXTURE_IMG_STRIDE] =
+ LLVMArrayType(LLVMInt32Type(), LP_MAX_TEXTURE_LEVELS);
+ elem_types[LP_JIT_TEXTURE_DATA] =
+ LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
+ LP_MAX_TEXTURE_LEVELS);
+
+ texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, width,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_WIDTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, height,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_HEIGHT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, depth,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_DEPTH);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, last_level,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_LAST_LEVEL);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, row_stride,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_ROW_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, img_stride,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_IMG_STRIDE);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_texture, data,
+ screen->target, texture_type,
+ LP_JIT_TEXTURE_DATA);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_texture,
+ screen->target, texture_type);
+
+ LLVMAddTypeName(screen->module, "texture", texture_type);
+ }
+
+ /* struct lp_jit_context */
+ {
+ LLVMTypeRef elem_types[LP_JIT_CTX_COUNT];
+ LLVMTypeRef context_type;
+
+ elem_types[LP_JIT_CTX_CONSTANTS] = LLVMPointerType(LLVMFloatType(), 0);
+ elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
+ elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
+ elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
+ elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType();
+ elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType();
+ elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType();
+ elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType();
+ elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
+ elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
+ PIPE_MAX_SAMPLERS);
+
+ context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
+
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
+ screen->target, context_type,
+ LP_JIT_CTX_CONSTANTS);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
+ screen->target, context_type,
+ LP_JIT_CTX_ALPHA_REF);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_front,
+ screen->target, context_type,
+ LP_JIT_CTX_STENCIL_REF_FRONT);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
+ screen->target, context_type,
+ LP_JIT_CTX_STENCIL_REF_BACK);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin,
+ screen->target, context_type,
+ LP_JIT_CTX_SCISSOR_XMIN);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin,
+ screen->target, context_type,
+ LP_JIT_CTX_SCISSOR_YMIN);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax,
+ screen->target, context_type,
+ LP_JIT_CTX_SCISSOR_XMAX);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax,
+ screen->target, context_type,
+ LP_JIT_CTX_SCISSOR_YMAX);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
+ screen->target, context_type,
+ LP_JIT_CTX_BLEND_COLOR);
+ LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
+ screen->target, context_type,
+ LP_JIT_CTX_TEXTURES);
+ LP_CHECK_STRUCT_SIZE(struct lp_jit_context,
+ screen->target, context_type);
+
+ LLVMAddTypeName(screen->module, "context", context_type);
+
+ screen->context_ptr_type = LLVMPointerType(context_type, 0);
+ }
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ LLVMDumpModule(screen->module);
+ }
+}
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen)
+{
+ if(screen->engine)
+ LLVMDisposeExecutionEngine(screen->engine);
+
+ if(screen->pass)
+ LLVMDisposePassManager(screen->pass);
+}
+
+
+void
+lp_jit_screen_init(struct llvmpipe_screen *screen)
+{
+ lp_build_init();
+
+ screen->module = lp_build_module;
+ screen->provider = lp_build_provider;
+ screen->engine = lp_build_engine;
+ screen->target = lp_build_target;
+
+ screen->pass = LLVMCreateFunctionPassManager(screen->provider);
+ LLVMAddTargetData(screen->target, screen->pass);
+
+ if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ /* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(screen->pass);
+ LLVMAddPromoteMemoryToRegisterPass(screen->pass);
+ LLVMAddConstantPropagationPass(screen->pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(screen->pass);
+ }
+ LLVMAddGVNPass(screen->pass);
+ } else {
+ /* We need at least this pass to prevent the backends to fail in
+ * unexpected ways.
+ */
+ LLVMAddPromoteMemoryToRegisterPass(screen->pass);
+ }
+
+ lp_jit_init_globals(screen);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
new file mode 100644
index 0000000000..8d06e65725
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -0,0 +1,180 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * C - JIT interfaces
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#ifndef LP_JIT_H
+#define LP_JIT_H
+
+
+#include "gallivm/lp_bld_struct.h"
+
+#include "pipe/p_state.h"
+#include "lp_texture.h"
+
+
+struct llvmpipe_screen;
+
+
+struct lp_jit_texture
+{
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t last_level;
+ uint32_t row_stride[LP_MAX_TEXTURE_LEVELS];
+ uint32_t img_stride[LP_MAX_TEXTURE_LEVELS];
+ const void *data[LP_MAX_TEXTURE_LEVELS];
+};
+
+
+enum {
+ LP_JIT_TEXTURE_WIDTH = 0,
+ LP_JIT_TEXTURE_HEIGHT,
+ LP_JIT_TEXTURE_DEPTH,
+ LP_JIT_TEXTURE_LAST_LEVEL,
+ LP_JIT_TEXTURE_ROW_STRIDE,
+ LP_JIT_TEXTURE_IMG_STRIDE,
+ LP_JIT_TEXTURE_DATA,
+ LP_JIT_TEXTURE_NUM_FIELDS /* number of fields above */
+};
+
+
+
+/**
+ * This structure is passed directly to the generated fragment shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the lp_jit_context_* macros and
+ * lp_jit_init_types function. Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct lp_jit_context
+{
+ const float *constants;
+
+ float alpha_ref_value;
+
+ uint32_t stencil_ref_front, stencil_ref_back;
+
+ /** floats, not ints */
+ float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax;
+
+ /* FIXME: store (also?) in floats */
+ uint8_t *blend_color;
+
+ struct lp_jit_texture textures[PIPE_MAX_SAMPLERS];
+};
+
+
+/**
+ * These enum values must match the position of the fields in the
+ * lp_jit_context struct above.
+ */
+enum {
+ LP_JIT_CTX_CONSTANTS = 0,
+ LP_JIT_CTX_ALPHA_REF,
+ LP_JIT_CTX_STENCIL_REF_FRONT,
+ LP_JIT_CTX_STENCIL_REF_BACK,
+ LP_JIT_CTX_SCISSOR_XMIN,
+ LP_JIT_CTX_SCISSOR_YMIN,
+ LP_JIT_CTX_SCISSOR_XMAX,
+ LP_JIT_CTX_SCISSOR_YMAX,
+ LP_JIT_CTX_BLEND_COLOR,
+ LP_JIT_CTX_TEXTURES,
+ LP_JIT_CTX_COUNT
+};
+
+
+#define lp_jit_context_constants(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_CONSTANTS, "constants")
+
+#define lp_jit_context_alpha_ref_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_ALPHA_REF, "alpha_ref_value")
+
+#define lp_jit_context_stencil_ref_front_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_FRONT, "stencil_ref_front")
+
+#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
+
+#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin")
+
+#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin")
+
+#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax")
+
+#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax")
+
+#define lp_jit_context_blend_color(_builder, _ptr) \
+ lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
+
+#define lp_jit_context_textures(_builder, _ptr) \
+ lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CTX_TEXTURES, "textures")
+
+
+
+typedef void
+(*lp_jit_frag_func)(const struct lp_jit_context *context,
+ uint32_t x,
+ uint32_t y,
+ float facing,
+ const void *a0,
+ const void *dadx,
+ const void *dady,
+ uint8_t **color,
+ void *depth,
+ const int32_t c1,
+ const int32_t c2,
+ const int32_t c3,
+ const int32_t *step1,
+ const int32_t *step2,
+ const int32_t *step3,
+ uint32_t *counter);
+
+
+void
+lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
+
+
+void
+lp_jit_screen_init(struct llvmpipe_screen *screen);
+
+
+#endif /* LP_JIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_limits.h b/src/gallium/drivers/llvmpipe/lp_limits.h
new file mode 100644
index 0000000000..d1c431475d
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_limits.h
@@ -0,0 +1,75 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Implementation limits for LLVMpipe driver.
+ */
+
+#ifndef LP_LIMITS_H
+#define LP_LIMITS_H
+
+
+/**
+ * Tile size (width and height). This needs to be a power of two.
+ */
+#define TILE_ORDER 6
+#define TILE_SIZE (1 << TILE_ORDER)
+
+
+/**
+ * Max texture sizes
+ */
+#define LP_MAX_TEXTURE_2D_LEVELS 13 /* 4K x 4K for now */
+#define LP_MAX_TEXTURE_3D_LEVELS 10 /* 512 x 512 x 512 for now */
+
+
+/** This must be the larger of LP_MAX_TEXTURE_2D/3D_LEVELS */
+#define LP_MAX_TEXTURE_LEVELS LP_MAX_TEXTURE_2D_LEVELS
+
+
+/**
+ * Max drawing surface size is the max texture size
+ */
+#define LP_MAX_HEIGHT (1 << (LP_MAX_TEXTURE_LEVELS - 1))
+#define LP_MAX_WIDTH (1 << (LP_MAX_TEXTURE_LEVELS - 1))
+
+
+#define LP_MAX_THREADS 8
+
+
+/**
+ * Max bytes per scene. This may be replaced by a runtime parameter.
+ */
+#define LP_MAX_SCENE_SIZE (512 * 1024 * 1024)
+
+/**
+ * Max number of shader variants (for all shaders combined,
+ * per context) that will be kept around.
+ */
+#define LP_MAX_SHADER_VARIANTS 1024
+
+#endif /* LP_LIMITS_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.c b/src/gallium/drivers/llvmpipe/lp_perf.c
new file mode 100644
index 0000000000..a316597675
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_perf.c
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_debug.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+
+
+
+struct lp_counters lp_count;
+
+
+void
+lp_reset_counters(void)
+{
+ memset(&lp_count, 0, sizeof(lp_count));
+}
+
+
+void
+lp_print_counters(void)
+{
+ if (LP_DEBUG & DEBUG_COUNTERS) {
+ unsigned total_64, total_16, total_4;
+ float p1, p2, p3;
+
+ debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
+ debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
+
+ total_64 = (lp_count.nr_empty_64 +
+ lp_count.nr_fully_covered_64 +
+ lp_count.nr_partially_covered_64);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
+
+ debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
+ debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
+ debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
+
+ total_16 = (lp_count.nr_empty_16 +
+ lp_count.nr_fully_covered_16 +
+ lp_count.nr_partially_covered_16);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_16 / (float) total_16;
+ p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
+ p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
+
+ debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
+ debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
+ debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
+
+ total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
+
+ p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
+ p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
+
+ debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
+ debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
+
+ debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
+ debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
+ debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
+
+ debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
+ debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
+ debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
+
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_perf.h b/src/gallium/drivers/llvmpipe/lp_perf.h
new file mode 100644
index 0000000000..a9629dae3c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_perf.h
@@ -0,0 +1,82 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Performance / statistic counters, etc.
+ */
+
+
+#ifndef LP_PERF_H
+#define LP_PERF_H
+
+
+/**
+ * Various counters
+ */
+struct lp_counters
+{
+ unsigned nr_tris;
+ unsigned nr_culled_tris;
+ unsigned nr_empty_64;
+ unsigned nr_fully_covered_64;
+ unsigned nr_partially_covered_64;
+ unsigned nr_empty_16;
+ unsigned nr_fully_covered_16;
+ unsigned nr_partially_covered_16;
+ unsigned nr_empty_4;
+ unsigned nr_non_empty_4;
+ unsigned nr_llvm_compiles;
+ int64_t llvm_compile_time; /**< total, in microseconds */
+
+ unsigned nr_color_tile_clear;
+ unsigned nr_color_tile_load;
+ unsigned nr_color_tile_store;
+};
+
+
+extern struct lp_counters lp_count;
+
+
+/** Increment the named counter (only for debug builds) */
+#ifdef DEBUG
+#define LP_COUNT(counter) lp_count.counter++
+#define LP_COUNT_ADD(counter, incr) lp_count.counter += (incr)
+#else
+#define LP_COUNT(counter)
+#define LP_COUNT_ADD(counter, incr) (void) incr
+#endif
+
+
+extern void
+lp_reset_counters(void);
+
+
+extern void
+lp_print_counters(void);
+
+
+#endif /* LP_PERF_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_public.h b/src/gallium/drivers/llvmpipe/lp_public.h
new file mode 100644
index 0000000000..ec6b660b48
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_public.h
@@ -0,0 +1,10 @@
+#ifndef LP_PUBLIC_H
+#define LP_PUBLIC_H
+
+struct pipe_screen;
+struct sw_winsys;
+
+struct pipe_screen *
+llvmpipe_create_screen(struct sw_winsys *winsys);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c
new file mode 100644
index 0000000000..c902c04684
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_query.c
@@ -0,0 +1,146 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Keith Whitwell, Qicheng Christopher Li, Brian Paul
+ */
+
+#include "draw/draw_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_query.h"
+#include "lp_rast.h"
+#include "lp_rast_priv.h"
+#include "lp_state.h"
+
+
+static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p )
+{
+ return (struct llvmpipe_query *)p;
+}
+
+static struct pipe_query *
+llvmpipe_create_query(struct pipe_context *pipe,
+ unsigned type)
+{
+ struct llvmpipe_query *pq;
+
+ assert(type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ pq = CALLOC_STRUCT( llvmpipe_query );
+ if (pq) {
+ pipe_mutex_init(pq->mutex);
+ }
+
+ return (struct pipe_query *) pq;
+}
+
+
+static void
+llvmpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+ pipe_mutex_destroy(pq->mutex);
+ FREE(pq);
+}
+
+
+static boolean
+llvmpipe_get_query_result(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ void *vresult)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+ uint64_t *result = (uint64_t *)vresult;
+
+ if (!pq->done) {
+ lp_setup_flush(llvmpipe->setup, 0);
+ }
+
+ if (pq->done) {
+ *result = pq->result;
+ }
+
+ return pq->done;
+}
+
+
+static void
+llvmpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+
+ /* Check if the query is already in the scene. If so, we need to
+ * flush the scene now. Real apps shouldn't re-use a query in a
+ * frame of rendering.
+ */
+ if (pq->binned) {
+ struct pipe_fence_handle *fence;
+ llvmpipe_flush(pipe, 0, &fence);
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ }
+
+ lp_setup_begin_query(llvmpipe->setup, pq);
+
+ llvmpipe->active_query_count++;
+ llvmpipe->dirty |= LP_NEW_QUERY;
+}
+
+
+static void
+llvmpipe_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+ struct llvmpipe_query *pq = llvmpipe_query(q);
+
+ lp_setup_end_query(llvmpipe->setup, pq);
+
+ assert(llvmpipe->active_query_count);
+ llvmpipe->active_query_count--;
+ llvmpipe->dirty |= LP_NEW_QUERY;
+}
+
+
+void llvmpipe_init_query_funcs(struct llvmpipe_context *llvmpipe )
+{
+ llvmpipe->pipe.create_query = llvmpipe_create_query;
+ llvmpipe->pipe.destroy_query = llvmpipe_destroy_query;
+ llvmpipe->pipe.begin_query = llvmpipe_begin_query;
+ llvmpipe->pipe.end_query = llvmpipe_end_query;
+ llvmpipe->pipe.get_query_result = llvmpipe_get_query_result;
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_query.h b/src/gallium/drivers/llvmpipe/lp_query.h
new file mode 100644
index 0000000000..721c41cb5c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_query.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Keith Whitwell, Qicheng Christopher Li, Brian Paul
+ */
+
+#ifndef LP_QUERY_H
+#define LP_QUERY_H
+
+#include <limits.h>
+#include "os/os_thread.h"
+#include "lp_limits.h"
+
+
+struct llvmpipe_context;
+
+
+struct llvmpipe_query {
+ uint64_t count[LP_MAX_THREADS]; /**< a counter for each thread */
+ uint64_t result; /**< total of all counters */
+
+ pipe_mutex mutex;
+ unsigned num_tiles, tile_count;
+
+ boolean done;
+ boolean binned; /**< has this query been binned in the scene? */
+};
+
+
+extern void llvmpipe_init_query_funcs(struct llvmpipe_context * );
+
+
+#endif /* LP_QUERY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
new file mode 100644
index 0000000000..50e44dcb2b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -0,0 +1,1032 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <limits.h>
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_surface.h"
+
+#include "lp_scene_queue.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+#include "lp_perf.h"
+#include "lp_query.h"
+#include "lp_rast.h"
+#include "lp_rast_priv.h"
+#include "lp_tile_soa.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_scene.h"
+
+
+/**
+ * Begin rasterizing a scene.
+ * Called once per scene by one thread.
+ */
+static void
+lp_rast_begin( struct lp_rasterizer *rast,
+ struct lp_scene *scene )
+{
+ const struct pipe_framebuffer_state *fb = &scene->fb;
+ int i;
+
+ rast->curr_scene = scene;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ rast->state.nr_cbufs = scene->fb.nr_cbufs;
+
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ llvmpipe_resource_map(cbuf->texture,
+ cbuf->face,
+ cbuf->level,
+ cbuf->zslice,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_LAYOUT_NONE);
+ }
+
+ if (fb->zsbuf) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ rast->zsbuf.stride = llvmpipe_resource_stride(zsbuf->texture, zsbuf->level);
+ rast->zsbuf.blocksize =
+ util_format_get_blocksize(zsbuf->texture->format);
+
+ rast->zsbuf.map = llvmpipe_resource_map(zsbuf->texture,
+ zsbuf->face,
+ zsbuf->level,
+ zsbuf->zslice,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_LAYOUT_NONE);
+ assert(rast->zsbuf.map);
+ }
+
+ lp_scene_bin_iter_begin( scene );
+}
+
+
+static void
+lp_rast_end( struct lp_rasterizer *rast )
+{
+ struct lp_scene *scene = rast->curr_scene;
+ unsigned i;
+
+ /* Unmap color buffers */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[i];
+ llvmpipe_resource_unmap(cbuf->texture,
+ cbuf->face,
+ cbuf->level,
+ cbuf->zslice);
+ }
+
+ /* Unmap z/stencil buffer */
+ if (rast->zsbuf.map) {
+ struct pipe_surface *zsbuf = scene->fb.zsbuf;
+ llvmpipe_resource_unmap(zsbuf->texture,
+ zsbuf->face,
+ zsbuf->level,
+ zsbuf->zslice);
+ rast->zsbuf.map = NULL;
+ }
+
+ lp_scene_reset( rast->curr_scene );
+
+ rast->curr_scene = NULL;
+
+#ifdef DEBUG
+ if (0)
+ debug_printf("Post render scene: tile unswizzle: %u tile swizzle: %u\n",
+ lp_tile_unswizzle_count, lp_tile_swizzle_count);
+#endif
+}
+
+
+/**
+ * Begining rasterization of a tile.
+ * \param x window X position of the tile, in pixels
+ * \param y window Y position of the tile, in pixels
+ */
+static void
+lp_rast_tile_begin(struct lp_rasterizer_task *task,
+ unsigned x, unsigned y)
+{
+ struct lp_rasterizer *rast = task->rast;
+ struct lp_scene *scene = rast->curr_scene;
+ enum lp_texture_usage usage;
+ unsigned buf;
+
+ LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ task->x = x;
+ task->y = y;
+
+ if (scene->has_color_clear)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
+ /* get pointers to color tile(s) */
+ for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct llvmpipe_resource *lpt;
+ assert(cbuf);
+ lpt = llvmpipe_resource(cbuf->texture);
+ task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
+ cbuf->face + cbuf->zslice,
+ cbuf->level,
+ usage,
+ x, y);
+ assert(task->color_tiles[buf]);
+ }
+
+ /* get pointer to depth/stencil tile */
+ {
+ struct pipe_surface *zsbuf = rast->curr_scene->fb.zsbuf;
+ if (zsbuf) {
+ struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
+
+ if (scene->has_depthstencil_clear)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
+ /* "prime" the tile: convert data from linear to tiled if necessary
+ * and update the tile's layout info.
+ */
+ (void) llvmpipe_get_texture_tile(lpt,
+ zsbuf->face + zsbuf->zslice,
+ zsbuf->level,
+ usage,
+ x, y);
+ /* Get actual pointer to the tile data. Note that depth/stencil
+ * data is tiled differently than color data.
+ */
+ task->depth_tile = lp_rast_get_depth_block_pointer(rast, x, y);
+
+ assert(task->depth_tile);
+ }
+ else {
+ task->depth_tile = NULL;
+ }
+ }
+}
+
+
+/**
+ * Clear the rasterizer's current color tile.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_clear_color(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ const uint8_t *clear_color = arg.clear_color;
+
+ unsigned i;
+
+ LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
+ clear_color[0],
+ clear_color[1],
+ clear_color[2],
+ clear_color[3]);
+
+ if (clear_color[0] == clear_color[1] &&
+ clear_color[1] == clear_color[2] &&
+ clear_color[2] == clear_color[3]) {
+ /* clear to grayscale value {x, x, x, x} */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ uint8_t *ptr = task->color_tiles[i];
+ memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
+ }
+ }
+ else {
+ /* Non-gray color.
+ * Note: if the swizzled tile layout changes (see TILE_PIXEL) this code
+ * will need to change. It'll be pretty obvious when clearing no longer
+ * works.
+ */
+ const unsigned chunk = TILE_SIZE / 4;
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ uint8_t *c = task->color_tiles[i];
+ unsigned j;
+
+ for (j = 0; j < 4 * TILE_SIZE; j++) {
+ memset(c, clear_color[0], chunk);
+ c += chunk;
+ memset(c, clear_color[1], chunk);
+ c += chunk;
+ memset(c, clear_color[2], chunk);
+ c += chunk;
+ memset(c, clear_color[3], chunk);
+ c += chunk;
+ }
+ }
+ }
+
+ LP_COUNT(nr_color_tile_clear);
+}
+
+
+/**
+ * Clear the rasterizer's current z/stencil tile.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ const struct lp_rast_clearzs *clearzs = arg.clear_zstencil;
+ unsigned clear_value = clearzs->clearzs_value;
+ unsigned clear_mask = clearzs->clearzs_mask;
+ const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
+ const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
+ const unsigned block_size = rast->zsbuf.blocksize;
+ const unsigned dst_stride = rast->zsbuf.stride * TILE_VECTOR_HEIGHT;
+ uint8_t *dst;
+ unsigned i, j;
+
+ LP_DBG(DEBUG_RAST, "%s 0x%x%x\n", __FUNCTION__, clear_value, clear_mask);
+
+ /*
+ * Clear the aera of the swizzled depth/depth buffer matching this tile, in
+ * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
+ *
+ * The swizzled depth format is such that the depths for
+ * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
+ */
+
+ dst = task->depth_tile;
+
+ assert(dst == lp_rast_get_depth_block_pointer(rast, task->x, task->y));
+
+ switch (block_size) {
+ case 1:
+ memset(dst, (uint8_t) clear_value, height * width);
+ break;
+ case 2:
+ for (i = 0; i < height; i++) {
+ uint16_t *row = (uint16_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = (uint16_t) clear_value;
+ dst += dst_stride;
+ }
+ break;
+ case 4:
+ if (clear_mask == 0xffffffff) {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++)
+ *row++ = clear_value;
+ dst += dst_stride;
+ }
+ }
+ else {
+ for (i = 0; i < height; i++) {
+ uint32_t *row = (uint32_t *)dst;
+ for (j = 0; j < width; j++) {
+ uint32_t tmp = ~clear_mask & *row;
+ *row++ = (clear_value & clear_mask) | tmp;
+ }
+ dst += dst_stride;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+/**
+ * Load tile color from the framebuffer surface.
+ * This is a bin command called during bin processing.
+ */
+#if 0
+void
+lp_rast_load_color(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ unsigned buf;
+ enum lp_texture_usage usage;
+
+ LP_DBG(DEBUG_RAST, "%s at %u, %u\n", __FUNCTION__, x, y);
+
+ if (scene->has_color_clear)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
+ /* Get pointers to color tile(s).
+ * This will convert linear data to tiled if needed.
+ */
+ for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
+ struct llvmpipe_texture *lpt;
+ assert(cbuf);
+ lpt = llvmpipe_texture(cbuf->texture);
+ task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
+ cbuf->face + cbuf->zslice,
+ cbuf->level,
+ usage,
+ task->x, task->y);
+ assert(task->color_tiles[buf]);
+ }
+}
+#endif
+
+
+/**
+ * Convert the color tile from tiled to linear layout.
+ * This is generally only done when we're flushing the scene just prior to
+ * SwapBuffers. If we didn't do this here, we'd have to convert the entire
+ * tiled color buffer to linear layout in the llvmpipe_texture_unmap()
+ * function. It's better to do it here to take advantage of
+ * threading/parallelism.
+ * This is a bin command which is stored in all bins.
+ */
+void
+lp_rast_store_color( struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ struct lp_scene *scene = rast->curr_scene;
+ unsigned buf;
+
+ for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ struct pipe_surface *cbuf = scene->fb.cbufs[buf];
+ const unsigned face = cbuf->face, level = cbuf->level;
+ struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
+ /* this will convert the tiled data to linear if needed */
+ (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
+ LP_TEX_USAGE_READ,
+ task->x, task->y);
+ }
+}
+
+
+/**
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_set_state(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_state *state = arg.set_state;
+
+ LP_DBG(DEBUG_RAST, "%s %p\n", __FUNCTION__, (void *) state);
+
+ /* just set the current state pointer for this rasterizer */
+ task->current_state = state;
+}
+
+
+/**
+ * Run the shader on all blocks in a tile. This is used when a tile is
+ * completely contained inside a triangle.
+ * This is a bin command called during bin processing.
+ */
+void
+lp_rast_shade_tile(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_rasterizer *rast = task->rast;
+ const struct lp_rast_state *state = task->current_state;
+ const struct lp_rast_shader_inputs *inputs = arg.shade_tile;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ const unsigned tile_x = task->x, tile_y = task->y;
+ unsigned x, y;
+
+ LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
+
+ /* render the whole 64x64 tile in 4x4 chunks */
+ for (y = 0; y < TILE_SIZE; y += 4){
+ for (x = 0; x < TILE_SIZE; x += 4) {
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ uint32_t *depth;
+ unsigned i;
+
+ /* color buffer */
+ for (i = 0; i < rast->state.nr_cbufs; i++)
+ color[i] = lp_rast_get_color_block_pointer(task, i,
+ tile_x + x, tile_y + y);
+
+ /* depth buffer */
+ depth = lp_rast_get_depth_block_pointer(rast, tile_x + x, tile_y + y);
+
+ /* run shader on 4x4 block */
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ tile_x + x, tile_y + y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL, &task->vis_counter);
+ }
+ }
+}
+
+
+/**
+ * Compute shading for a 4x4 block of pixels.
+ * This is a bin command called during bin processing.
+ * \param x X position of quad in window coords
+ * \param y Y position of quad in window coords
+ */
+void lp_rast_shade_quads( struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ int32_t c1, int32_t c2, int32_t c3)
+{
+ const struct lp_rast_state *state = task->current_state;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ struct lp_rasterizer *rast = task->rast;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ void *depth;
+ unsigned i;
+
+ assert(state);
+
+ /* Sanity checks */
+ assert(x % TILE_VECTOR_WIDTH == 0);
+ assert(y % TILE_VECTOR_HEIGHT == 0);
+
+ assert((x % 4) == 0);
+ assert((y % 4) == 0);
+
+ /* color buffer */
+ for (i = 0; i < rast->state.nr_cbufs; i++) {
+ color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
+ assert(lp_check_alignment(color[i], 16));
+ }
+
+ /* depth buffer */
+ depth = lp_rast_get_depth_block_pointer(rast, x, y);
+
+
+ assert(lp_check_alignment(state->jit_context.blend_color, 16));
+
+ assert(lp_check_alignment(inputs->step[0], 16));
+ assert(lp_check_alignment(inputs->step[1], 16));
+ assert(lp_check_alignment(inputs->step[2], 16));
+
+ /* run shader on 4x4 block */
+ variant->jit_function[RAST_EDGE_TEST]( &state->jit_context,
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ c1, c2, c3,
+ inputs->step[0],
+ inputs->step[1],
+ inputs->step[2],
+ &task->vis_counter);
+}
+
+
+/**
+ * Set top row and left column of the tile's pixels to white. For debugging.
+ */
+static void
+outline_tile(uint8_t *tile)
+{
+ const uint8_t val = 0xff;
+ unsigned i;
+
+ for (i = 0; i < TILE_SIZE; i++) {
+ TILE_PIXEL(tile, i, 0, 0) = val;
+ TILE_PIXEL(tile, i, 0, 1) = val;
+ TILE_PIXEL(tile, i, 0, 2) = val;
+ TILE_PIXEL(tile, i, 0, 3) = val;
+
+ TILE_PIXEL(tile, 0, i, 0) = val;
+ TILE_PIXEL(tile, 0, i, 1) = val;
+ TILE_PIXEL(tile, 0, i, 2) = val;
+ TILE_PIXEL(tile, 0, i, 3) = val;
+ }
+}
+
+
+/**
+ * Draw grid of gray lines at 16-pixel intervals across the tile to
+ * show the sub-tile boundaries. For debugging.
+ */
+static void
+outline_subtiles(uint8_t *tile)
+{
+ const uint8_t val = 0x80;
+ const unsigned step = 16;
+ unsigned i, j;
+
+ for (i = 0; i < TILE_SIZE; i += step) {
+ for (j = 0; j < TILE_SIZE; j++) {
+ TILE_PIXEL(tile, i, j, 0) = val;
+ TILE_PIXEL(tile, i, j, 1) = val;
+ TILE_PIXEL(tile, i, j, 2) = val;
+ TILE_PIXEL(tile, i, j, 3) = val;
+
+ TILE_PIXEL(tile, j, i, 0) = val;
+ TILE_PIXEL(tile, j, i, 1) = val;
+ TILE_PIXEL(tile, j, i, 2) = val;
+ TILE_PIXEL(tile, j, i, 3) = val;
+ }
+ }
+
+ outline_tile(tile);
+}
+
+
+
+/**
+ * Called when we're done writing to a color tile.
+ */
+static void
+lp_rast_tile_end(struct lp_rasterizer_task *task)
+{
+#ifdef DEBUG
+ if (LP_DEBUG & (DEBUG_SHOW_SUBTILES | DEBUG_SHOW_TILES)) {
+ struct lp_rasterizer *rast = task->rast;
+ unsigned buf;
+
+ for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
+ uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
+ task->x, task->y);
+
+ if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
+ outline_subtiles(color);
+ else if (LP_DEBUG & DEBUG_SHOW_TILES)
+ outline_tile(color);
+ }
+ }
+#else
+ (void) outline_subtiles;
+#endif
+
+ /* debug */
+ memset(task->color_tiles, 0, sizeof(task->color_tiles));
+ task->depth_tile = NULL;
+}
+
+
+
+/**
+ * Signal on a fence. This is called during bin execution/rasterization.
+ * Called per thread.
+ */
+void
+lp_rast_fence(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct lp_fence *fence = arg.fence;
+ lp_fence_signal(fence);
+}
+
+
+/**
+ * Begin a new occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+void
+lp_rast_begin_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ /* Reset the the per-task counter */
+ task->vis_counter = 0;
+}
+
+
+/**
+ * End the current occlusion query.
+ * This is a bin command put in all bins.
+ * Called per thread.
+ */
+void
+lp_rast_end_query(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ struct llvmpipe_query *pq = arg.query_obj;
+
+ pipe_mutex_lock(pq->mutex);
+ {
+ /* Accumulate the visible fragment counter from this tile in
+ * the query object.
+ */
+ pq->count[task->thread_index] += task->vis_counter;
+
+ /* check if this is the last tile in the scene */
+ pq->tile_count++;
+ if (pq->tile_count == pq->num_tiles) {
+ uint i;
+
+ /* sum the per-thread counters for the query */
+ pq->result = 0;
+ for (i = 0; i < LP_MAX_THREADS; i++) {
+ pq->result += pq->count[i];
+ }
+
+ /* reset counters (in case this query is re-used in the scene) */
+ memset(pq->count, 0, sizeof(pq->count));
+
+ pq->tile_count = 0;
+ pq->binned = FALSE;
+ pq->done = TRUE;
+ }
+ }
+ pipe_mutex_unlock(pq->mutex);
+}
+
+
+
+/**
+ * Rasterize commands for a single bin.
+ * \param x, y position of the bin's tile in the framebuffer
+ * Must be called between lp_rast_begin() and lp_rast_end().
+ * Called per thread.
+ */
+static void
+rasterize_bin(struct lp_rasterizer_task *task,
+ const struct cmd_bin *bin,
+ int x, int y)
+{
+ const struct cmd_block_list *commands = &bin->commands;
+ struct cmd_block *block;
+ unsigned k;
+
+ lp_rast_tile_begin( task, x * TILE_SIZE, y * TILE_SIZE );
+
+ /* simply execute each of the commands in the block list */
+ for (block = commands->head; block; block = block->next) {
+ for (k = 0; k < block->count; k++) {
+ block->cmd[k]( task, block->arg[k] );
+ }
+ }
+
+ lp_rast_tile_end(task);
+
+ /* Free data for this bin.
+ */
+ lp_scene_bin_reset( task->rast->curr_scene, x, y);
+}
+
+
+#define RAST(x) { lp_rast_##x, #x }
+
+static struct {
+ lp_rast_cmd cmd;
+ const char *name;
+} cmd_names[] =
+{
+ RAST(clear_color),
+ RAST(clear_zstencil),
+ RAST(triangle),
+ RAST(shade_tile),
+ RAST(set_state),
+ RAST(store_color),
+ RAST(fence),
+ RAST(begin_query),
+ RAST(end_query),
+};
+
+static void
+debug_bin( const struct cmd_bin *bin )
+{
+ const struct cmd_block *head = bin->commands.head;
+ int i, j;
+
+ for (i = 0; i < head->count; i++) {
+ debug_printf("%d: ", i);
+ for (j = 0; j < Elements(cmd_names); j++) {
+ if (head->cmd[i] == cmd_names[j].cmd) {
+ debug_printf("%s\n", cmd_names[j].name);
+ break;
+ }
+ }
+ if (j == Elements(cmd_names))
+ debug_printf("...other\n");
+ }
+
+}
+
+/* An empty bin is one that just loads the contents of the tile and
+ * stores them again unchanged. This typically happens when bins have
+ * been flushed for some reason in the middle of a frame, or when
+ * incremental updates are being made to a render target.
+ *
+ * Try to avoid doing pointless work in this case.
+ */
+static boolean
+is_empty_bin( const struct cmd_bin *bin )
+{
+ const struct cmd_block *head = bin->commands.head;
+ int i;
+
+ if (0)
+ debug_bin(bin);
+
+ /* We emit at most two load-tile commands at the start of the first
+ * command block. In addition we seem to emit a couple of
+ * set-state commands even in empty bins.
+ *
+ * As a heuristic, if a bin has more than 4 commands, consider it
+ * non-empty.
+ */
+ if (head->next != NULL ||
+ head->count > 4) {
+ return FALSE;
+ }
+
+ for (i = 0; i < head->count; i++)
+ if (head->cmd[i] != lp_rast_set_state) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+
+/**
+ * Rasterize/execute all bins within a scene.
+ * Called per thread.
+ */
+static void
+rasterize_scene(struct lp_rasterizer_task *task,
+ struct lp_scene *scene)
+{
+ /* loop over scene bins, rasterize each */
+#if 0
+ {
+ unsigned i, j;
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ rasterize_bin(task, bin, i, j);
+ }
+ }
+ }
+#else
+ {
+ struct cmd_bin *bin;
+ int x, y;
+
+ assert(scene);
+ while ((bin = lp_scene_bin_iter_next(scene, &x, &y))) {
+ if (!is_empty_bin( bin ))
+ rasterize_bin(task, bin, x, y);
+ }
+ }
+#endif
+}
+
+
+/**
+ * Called by setup module when it has something for us to render.
+ */
+void
+lp_rast_queue_scene( struct lp_rasterizer *rast,
+ struct lp_scene *scene)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ if (rast->num_threads == 0) {
+ /* no threading */
+
+ lp_rast_begin( rast, scene );
+
+ rasterize_scene( &rast->tasks[0], scene );
+
+ lp_scene_reset( scene );
+
+ lp_rast_end( rast );
+
+ rast->curr_scene = NULL;
+ }
+ else {
+ /* threaded rendering! */
+ unsigned i;
+
+ lp_scene_enqueue( rast->full_scenes, scene );
+
+ /* signal the threads that there's work to do */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_signal(&rast->tasks[i].work_ready);
+ }
+ }
+
+ LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
+}
+
+
+void
+lp_rast_finish( struct lp_rasterizer *rast )
+{
+ if (rast->num_threads == 0) {
+ /* nothing to do */
+ }
+ else {
+ int i;
+
+ /* wait for work to complete */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_wait(&rast->tasks[i].work_done);
+ }
+ }
+}
+
+
+/**
+ * This is the thread's main entrypoint.
+ * It's a simple loop:
+ * 1. wait for work
+ * 2. do work
+ * 3. signal that we're done
+ */
+static PIPE_THREAD_ROUTINE( thread_func, init_data )
+{
+ struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
+ struct lp_rasterizer *rast = task->rast;
+ boolean debug = false;
+
+ while (1) {
+ /* wait for work */
+ if (debug)
+ debug_printf("thread %d waiting for work\n", task->thread_index);
+ pipe_semaphore_wait(&task->work_ready);
+
+ if (rast->exit_flag)
+ break;
+
+ if (task->thread_index == 0) {
+ /* thread[0]:
+ * - get next scene to rasterize
+ * - map the framebuffer surfaces
+ */
+ lp_rast_begin( rast,
+ lp_scene_dequeue( rast->full_scenes, TRUE ) );
+ }
+
+ /* Wait for all threads to get here so that threads[1+] don't
+ * get a null rast->curr_scene pointer.
+ */
+ pipe_barrier_wait( &rast->barrier );
+
+ /* do work */
+ if (debug)
+ debug_printf("thread %d doing work\n", task->thread_index);
+
+ rasterize_scene(task,
+ rast->curr_scene);
+
+ /* wait for all threads to finish with this scene */
+ pipe_barrier_wait( &rast->barrier );
+
+ /* XXX: shouldn't be necessary:
+ */
+ if (task->thread_index == 0) {
+ lp_rast_end( rast );
+ }
+
+ /* signal done with work */
+ if (debug)
+ debug_printf("thread %d done working\n", task->thread_index);
+
+ pipe_semaphore_signal(&task->work_done);
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Initialize semaphores and spawn the threads.
+ */
+static void
+create_rast_threads(struct lp_rasterizer *rast)
+{
+ unsigned i;
+
+ /* NOTE: if num_threads is zero, we won't use any threads */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_init(&rast->tasks[i].work_ready, 0);
+ pipe_semaphore_init(&rast->tasks[i].work_done, 0);
+ rast->threads[i] = pipe_thread_create(thread_func,
+ (void *) &rast->tasks[i]);
+ }
+}
+
+
+
+/**
+ * Create new lp_rasterizer. If num_threads is zero, don't create any
+ * new threads, do rendering synchronously.
+ * \param num_threads number of rasterizer threads to create
+ */
+struct lp_rasterizer *
+lp_rast_create( unsigned num_threads )
+{
+ struct lp_rasterizer *rast;
+ unsigned i;
+
+ rast = CALLOC_STRUCT(lp_rasterizer);
+ if(!rast)
+ return NULL;
+
+ rast->full_scenes = lp_scene_queue_create();
+
+ for (i = 0; i < Elements(rast->tasks); i++) {
+ struct lp_rasterizer_task *task = &rast->tasks[i];
+ task->rast = rast;
+ task->thread_index = i;
+ }
+
+ rast->num_threads = num_threads;
+
+ create_rast_threads(rast);
+
+ /* for synchronizing rasterization threads */
+ pipe_barrier_init( &rast->barrier, rast->num_threads );
+
+ return rast;
+}
+
+
+/* Shutdown:
+ */
+void lp_rast_destroy( struct lp_rasterizer *rast )
+{
+ unsigned i;
+
+ /* Set exit_flag and signal each thread's work_ready semaphore.
+ * Each thread will be woken up, notice that the exit_flag is set and
+ * break out of its main loop. The thread will then exit.
+ */
+ rast->exit_flag = TRUE;
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_signal(&rast->tasks[i].work_ready);
+ }
+
+ /* Wait for threads to terminate before cleaning up per-thread data */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_thread_wait(rast->threads[i]);
+ }
+
+ /* Clean up per-thread data */
+ for (i = 0; i < rast->num_threads; i++) {
+ pipe_semaphore_destroy(&rast->tasks[i].work_ready);
+ pipe_semaphore_destroy(&rast->tasks[i].work_done);
+ }
+
+ /* for synchronizing rasterization threads */
+ pipe_barrier_destroy( &rast->barrier );
+
+ lp_scene_queue_destroy(rast->full_scenes);
+
+ FREE(rast);
+}
+
+
+/** Return number of rasterization threads */
+unsigned
+lp_rast_get_num_threads( struct lp_rasterizer *rast )
+{
+ return rast->num_threads;
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
new file mode 100644
index 0000000000..80ca68f5a2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -0,0 +1,252 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * The rast code is concerned with rasterization of command bins.
+ * Each screen tile has a bin associated with it. To render the
+ * scene we iterate over the tile bins and execute the commands
+ * in each bin.
+ * We'll do that with multiple threads...
+ */
+
+
+#ifndef LP_RAST_H
+#define LP_RAST_H
+
+#include "pipe/p_compiler.h"
+#include "lp_jit.h"
+
+
+struct lp_rasterizer;
+struct lp_scene;
+struct lp_fence;
+struct cmd_bin;
+
+/** For sub-pixel positioning */
+#define FIXED_ORDER 4
+#define FIXED_ONE (1<<FIXED_ORDER)
+
+
+struct lp_rasterizer_task;
+
+
+/**
+ * Rasterization state.
+ * Objects of this type are put into the shared data bin and pointed
+ * to by commands in the per-tile bins.
+ */
+struct lp_rast_state {
+ /* State for the shader. This also contains state which feeds into
+ * the fragment shader, such as blend color and alpha ref value.
+ */
+ struct lp_jit_context jit_context;
+
+ /* The shader itself. Probably we also need to pass a pointer to
+ * the tile color/z/stencil data somehow
+ */
+ struct lp_fragment_shader_variant *variant;
+};
+
+
+/**
+ * Coefficients necessary to run the shader at a given location.
+ * First coefficient is position.
+ * These pointers point into the bin data buffer.
+ */
+struct lp_rast_shader_inputs {
+ float facing; /** Positive for front-facing, negative for back-facing */
+
+ float (*a0)[4];
+ float (*dadx)[4];
+ float (*dady)[4];
+
+ /* edge/step info for 3 edges and 4x4 block of pixels */
+ PIPE_ALIGN_VAR(16) int step[3][16];
+};
+
+struct lp_rast_clearzs {
+ unsigned clearzs_value;
+ unsigned clearzs_mask;
+};
+
+
+/**
+ * Rasterization information for a triangle known to be in this bin,
+ * plus inputs to run the shader:
+ * These fields are tile- and bin-independent.
+ * Objects of this type are put into the lp_setup_context::data buffer.
+ */
+struct lp_rast_triangle {
+#ifdef DEBUG
+ float v[3][2];
+#endif
+
+ /* one-pixel sized trivial accept offsets for each plane */
+ int ei1;
+ int ei2;
+ int ei3;
+
+ /* one-pixel sized trivial reject offsets for each plane */
+ int eo1;
+ int eo2;
+ int eo3;
+
+ /* y deltas for vertex pairs (in fixed pt) */
+ int dy12;
+ int dy23;
+ int dy31;
+
+ /* x deltas for vertex pairs (in fixed pt) */
+ int dx12;
+ int dx23;
+ int dx31;
+
+ /* edge function values at minx,miny ?? */
+ int c1, c2, c3;
+
+ /* inputs for the shader */
+ PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
+};
+
+
+
+struct lp_rasterizer *
+lp_rast_create( unsigned num_threads );
+
+void
+lp_rast_destroy( struct lp_rasterizer * );
+
+unsigned
+lp_rast_get_num_threads( struct lp_rasterizer * );
+
+void
+lp_rast_queue_scene( struct lp_rasterizer *rast,
+ struct lp_scene *scene );
+
+void
+lp_rast_finish( struct lp_rasterizer *rast );
+
+
+union lp_rast_cmd_arg {
+ const struct lp_rast_shader_inputs *shade_tile;
+ const struct lp_rast_triangle *triangle;
+ const struct lp_rast_state *set_state;
+ uint8_t clear_color[4];
+ const struct lp_rast_clearzs *clear_zstencil;
+ struct lp_fence *fence;
+ struct llvmpipe_query *query_obj;
+};
+
+
+/* Cast wrappers. Hopefully these compile to noops!
+ */
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
+{
+ union lp_rast_cmd_arg arg;
+ arg.shade_tile = shade_tile;
+ return arg;
+}
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_triangle( const struct lp_rast_triangle *triangle )
+{
+ union lp_rast_cmd_arg arg;
+ arg.triangle = triangle;
+ return arg;
+}
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_state( const struct lp_rast_state *state )
+{
+ union lp_rast_cmd_arg arg;
+ arg.set_state = state;
+ return arg;
+}
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_fence( struct lp_fence *fence )
+{
+ union lp_rast_cmd_arg arg;
+ arg.fence = fence;
+ return arg;
+}
+
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_clearzs( const struct lp_rast_clearzs *clearzs )
+{
+ union lp_rast_cmd_arg arg;
+ arg.clear_zstencil = clearzs;
+ return arg;
+}
+
+static INLINE union lp_rast_cmd_arg
+lp_rast_arg_null( void )
+{
+ union lp_rast_cmd_arg arg;
+ arg.set_state = NULL;
+ return arg;
+}
+
+
+/**
+ * Binnable Commands.
+ * These get put into bins by the setup code and are called when
+ * the bins are executed.
+ */
+
+void lp_rast_clear_color( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_set_state( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_triangle( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_shade_tile( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_fence( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_store_color( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+
+void lp_rast_begin_query(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+void lp_rast_end_query(struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
new file mode 100644
index 0000000000..d33dd49f3a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -0,0 +1,226 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_RAST_PRIV_H
+#define LP_RAST_PRIV_H
+
+#include "os/os_thread.h"
+#include "util/u_format.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_rast.h"
+#include "lp_scene.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+#include "lp_tile_soa.h"
+#include "lp_limits.h"
+
+
+struct lp_rasterizer;
+
+
+/**
+ * Per-thread rasterization state
+ */
+struct lp_rasterizer_task
+{
+ unsigned x, y; /**< Pos of this tile in framebuffer, in pixels */
+
+ uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
+ uint8_t *depth_tile;
+
+ const struct lp_rast_state *current_state;
+
+ /** "back" pointer */
+ struct lp_rasterizer *rast;
+
+ /** "my" index */
+ unsigned thread_index;
+
+ /* occlude counter for visiable pixels */
+ uint32_t vis_counter;
+
+ pipe_semaphore work_ready;
+ pipe_semaphore work_done;
+};
+
+
+/**
+ * This is the state required while rasterizing tiles.
+ * Note that this contains per-thread information too.
+ * The tile size is TILE_SIZE x TILE_SIZE pixels.
+ */
+struct lp_rasterizer
+{
+ boolean exit_flag;
+
+ /* Framebuffer stuff
+ */
+ struct {
+ uint8_t *map;
+ unsigned stride;
+ unsigned blocksize;
+ } zsbuf;
+
+ struct {
+ unsigned nr_cbufs;
+ unsigned clear_color;
+ unsigned clear_depth;
+ char clear_stencil;
+ } state;
+
+ /** The incoming queue of scenes ready to rasterize */
+ struct lp_scene_queue *full_scenes;
+
+ /**
+ * The outgoing queue of processed scenes to return to setup module
+ *
+ * XXX: while scenes are per-context but the rasterizer is
+ * (potentially) shared, these empty scenes should be returned to
+ * the context which created them rather than retained here.
+ */
+ /* struct lp_scene_queue *empty_scenes; */
+
+ /** The scene currently being rasterized by the threads */
+ struct lp_scene *curr_scene;
+
+ /** A task object for each rasterization thread */
+ struct lp_rasterizer_task tasks[LP_MAX_THREADS];
+
+ unsigned num_threads;
+ pipe_thread threads[LP_MAX_THREADS];
+
+ /** For synchronizing the rasterization threads */
+ pipe_barrier barrier;
+};
+
+
+void lp_rast_shade_quads( struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y,
+ int32_t c1, int32_t c2, int32_t c3);
+
+
+/**
+ * Get the pointer to a 4x4 depth/stencil block.
+ * We'll map the z/stencil buffer on demand here.
+ * Note that this may be called even when there's no z/stencil buffer - return
+ * NULL in that case.
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE void *
+lp_rast_get_depth_block_pointer(const struct lp_rasterizer *rast,
+ unsigned x, unsigned y)
+{
+ void *depth;
+
+ assert((x % TILE_VECTOR_WIDTH) == 0);
+ assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+ assert(rast->zsbuf.map || !rast->curr_scene->fb.zsbuf);
+
+ if (!rast->zsbuf.map)
+ return NULL;
+
+ depth = (rast->zsbuf.map +
+ rast->zsbuf.stride * y +
+ rast->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
+
+ assert(lp_check_alignment(depth, 16));
+ return depth;
+}
+
+
+/**
+ * Get the pointer to a 4x4 color block (within a 64x64 tile).
+ * We'll map the color buffer on demand here.
+ * Note that this may be called even when there's no color buffers - return
+ * NULL in that case.
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE uint8_t *
+lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
+ unsigned buf, unsigned x, unsigned y)
+{
+ unsigned px, py, pixel_offset;
+ uint8_t *color;
+
+ assert((x % TILE_VECTOR_WIDTH) == 0);
+ assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+ color = task->color_tiles[buf];
+ assert(color);
+
+ px = x % TILE_SIZE;
+ py = y % TILE_SIZE;
+ pixel_offset = tile_pixel_offset(px, py, 0);
+
+ color = color + pixel_offset;
+
+ assert(lp_check_alignment(color, 16));
+ return color;
+}
+
+
+
+/**
+ * Shade all pixels in a 4x4 block. The fragment code omits the
+ * triangle in/out tests.
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE void
+lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
+ const struct lp_rast_shader_inputs *inputs,
+ unsigned x, unsigned y )
+{
+ struct lp_rasterizer *rast = task->rast;
+ const struct lp_rast_state *state = task->current_state;
+ struct lp_fragment_shader_variant *variant = state->variant;
+ uint8_t *color[PIPE_MAX_COLOR_BUFS];
+ void *depth;
+ unsigned i;
+
+ /* color buffer */
+ for (i = 0; i < rast->state.nr_cbufs; i++)
+ color[i] = lp_rast_get_color_block_pointer(task, i, x, y);
+
+ depth = lp_rast_get_depth_block_pointer(rast, x, y);
+
+ /* run shader on 4x4 block */
+ variant->jit_function[RAST_WHOLE]( &state->jit_context,
+ x, y,
+ inputs->facing,
+ inputs->a0,
+ inputs->dadx,
+ inputs->dady,
+ color,
+ depth,
+ INT_MIN, INT_MIN, INT_MIN,
+ NULL, NULL, NULL, &task->vis_counter );
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
new file mode 100644
index 0000000000..a5f0d14c95
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -0,0 +1,280 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Rasterization for binned triangles within a tile
+ */
+
+#include <limits.h>
+#include "util/u_math.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+#include "lp_rast_priv.h"
+#include "lp_tile_soa.h"
+
+
+/**
+ * Map an index in [0,15] to an x,y position, multiplied by 4.
+ * This is used to get the position of each subtile in a 4x4
+ * grid of edge step values.
+ * Note: we can use some bit twiddling to compute these values instead
+ * of using a look-up table, but there's no measurable performance
+ * difference.
+ */
+static const int pos_table4[16][2] = {
+ { 0, 0 },
+ { 4, 0 },
+ { 0, 4 },
+ { 4, 4 },
+ { 8, 0 },
+ { 12, 0 },
+ { 8, 4 },
+ { 12, 4 },
+ { 0, 8 },
+ { 4, 8 },
+ { 0, 12 },
+ { 4, 12 },
+ { 8, 8 },
+ { 12, 8 },
+ { 8, 12 },
+ { 12, 12 }
+};
+
+
+static const int pos_table16[16][2] = {
+ { 0, 0 },
+ { 16, 0 },
+ { 0, 16 },
+ { 16, 16 },
+ { 32, 0 },
+ { 48, 0 },
+ { 32, 16 },
+ { 48, 16 },
+ { 0, 32 },
+ { 16, 32 },
+ { 0, 48 },
+ { 16, 48 },
+ { 32, 32 },
+ { 48, 32 },
+ { 32, 48 },
+ { 48, 48 }
+};
+
+
+/**
+ * Shade all pixels in a 4x4 block.
+ */
+static void
+block_full_4(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y)
+{
+ lp_rast_shade_quads_all(task, &tri->inputs, x, y);
+}
+
+
+/**
+ * Shade all pixels in a 16x16 block.
+ */
+static void
+block_full_16(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y)
+{
+ unsigned ix, iy;
+ assert(x % 16 == 0);
+ assert(y % 16 == 0);
+ for (iy = 0; iy < 16; iy += 4)
+ for (ix = 0; ix < 16; ix += 4)
+ block_full_4(task, tri, x + ix, y + iy);
+}
+
+
+/**
+ * Pass the 4x4 pixel block to the shader function.
+ * Determination of which of the 16 pixels lies inside the triangle
+ * will be done as part of the fragment shader.
+ */
+static void
+do_block_4(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y,
+ int c1, int c2, int c3)
+{
+ assert(x >= 0);
+ assert(y >= 0);
+
+ lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
+}
+
+
+/**
+ * Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
+ * of the triangle's bounds.
+ */
+static void
+do_block_16(struct lp_rasterizer_task *task,
+ const struct lp_rast_triangle *tri,
+ int x, int y,
+ int c0, int c1, int c2)
+{
+ unsigned mask = 0;
+ int eo[3];
+ int c[3];
+ int i, j;
+
+ assert(x >= 0);
+ assert(y >= 0);
+ assert(x % 16 == 0);
+ assert(y % 16 == 0);
+
+ eo[0] = tri->eo1 * 4;
+ eo[1] = tri->eo2 * 4;
+ eo[2] = tri->eo3 * 4;
+
+ c[0] = c0;
+ c[1] = c1;
+ c[2] = c2;
+
+ for (j = 0; j < 3; j++) {
+ const int *step = tri->inputs.step[j];
+ const int cx = c[j] + eo[j];
+
+ /* Mask has bits set whenever we are outside any of the edges.
+ */
+ for (i = 0; i < 16; i++) {
+ int out = cx + step[i] * 4;
+ mask |= (out >> 31) & (1 << i);
+ }
+ }
+
+ mask = ~mask & 0xffff;
+ while (mask) {
+ int i = ffs(mask) - 1;
+ int px = x + pos_table4[i][0];
+ int py = y + pos_table4[i][1];
+ int cx1 = c0 + tri->inputs.step[0][i] * 4;
+ int cx2 = c1 + tri->inputs.step[1][i] * 4;
+ int cx3 = c2 + tri->inputs.step[2][i] * 4;
+
+ mask &= ~(1 << i);
+
+ /* Don't bother testing if the 4x4 block is entirely in/out of
+ * the triangle. It's a little faster to do it in the jit code.
+ */
+ LP_COUNT(nr_non_empty_4);
+ do_block_4(task, tri, px, py, cx1, cx2, cx3);
+ }
+}
+
+
+/**
+ * Scan the tile in chunks and figure out which pixels to rasterize
+ * for this triangle.
+ */
+void
+lp_rast_triangle(struct lp_rasterizer_task *task,
+ const union lp_rast_cmd_arg arg)
+{
+ const struct lp_rast_triangle *tri = arg.triangle;
+ const int x = task->x, y = task->y;
+ int ei[3], eo[3], c[3];
+ unsigned outmask, inmask, partial_mask;
+ unsigned i, j;
+
+ c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
+ c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
+ c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
+
+ eo[0] = tri->eo1 * 16;
+ eo[1] = tri->eo2 * 16;
+ eo[2] = tri->eo3 * 16;
+
+ ei[0] = tri->ei1 * 16;
+ ei[1] = tri->ei2 * 16;
+ ei[2] = tri->ei3 * 16;
+
+ outmask = 0;
+ inmask = 0xffff;
+
+ for (j = 0; j < 3; j++) {
+ const int *step = tri->inputs.step[j];
+ const int cox = c[j] + eo[j];
+ const int cio = ei[j]- eo[j];
+
+ /* Outmask has bits set whenever we are outside any of the
+ * edges.
+ */
+ /* Inmask has bits set whenever we are inside all of the edges.
+ */
+ for (i = 0; i < 16; i++) {
+ int out = cox + step[i] * 16;
+ int in = out + cio;
+ outmask |= (out >> 31) & (1 << i);
+ inmask &= ~((in >> 31) & (1 << i));
+ }
+ }
+
+ assert((outmask & inmask) == 0);
+
+ if (outmask == 0xffff)
+ return;
+
+ /* Invert mask, so that bits are set whenever we are at least
+ * partially inside all of the edges:
+ */
+ partial_mask = ~inmask & ~outmask & 0xffff;
+
+ /* Iterate over partials:
+ */
+ while (partial_mask) {
+ int i = ffs(partial_mask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+ int cx1 = c[0] + tri->inputs.step[0][i] * 16;
+ int cx2 = c[1] + tri->inputs.step[1][i] * 16;
+ int cx3 = c[2] + tri->inputs.step[2][i] * 16;
+
+ partial_mask &= ~(1 << i);
+
+ LP_COUNT(nr_partially_covered_16);
+ do_block_16(task, tri, px, py, cx1, cx2, cx3);
+ }
+
+ /* Iterate over fulls:
+ */
+ while (inmask) {
+ int i = ffs(inmask) - 1;
+ int px = x + pos_table16[i][0];
+ int py = y + pos_table16[i][1];
+
+ inmask &= ~(1 << i);
+
+ LP_COUNT(nr_fully_covered_16);
+ block_full_16(task, tri, px, py);
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
new file mode 100644
index 0000000000..845c175cf2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -0,0 +1,465 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_framebuffer.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_simple_list.h"
+#include "lp_scene.h"
+#include "lp_scene_queue.h"
+
+
+/** List of texture references */
+struct texture_ref {
+ struct pipe_resource *texture;
+ struct texture_ref *prev, *next; /**< linked list w/ u_simple_list.h */
+};
+
+
+
+/**
+ * Create a new scene object.
+ * \param queue the queue to put newly rendered/emptied scenes into
+ */
+struct lp_scene *
+lp_scene_create( struct pipe_context *pipe,
+ struct lp_scene_queue *queue )
+{
+ unsigned i, j;
+ struct lp_scene *scene = CALLOC_STRUCT(lp_scene);
+ if (!scene)
+ return NULL;
+
+ scene->pipe = pipe;
+ scene->empty_queue = queue;
+
+ for (i = 0; i < TILES_X; i++) {
+ for (j = 0; j < TILES_Y; j++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ bin->commands.head = bin->commands.tail = CALLOC_STRUCT(cmd_block);
+ }
+ }
+
+ scene->data.head =
+ scene->data.tail = CALLOC_STRUCT(data_block);
+
+ make_empty_list(&scene->resources);
+
+ pipe_mutex_init(scene->mutex);
+
+ return scene;
+}
+
+
+/**
+ * Free all data associated with the given scene, and the scene itself.
+ */
+void
+lp_scene_destroy(struct lp_scene *scene)
+{
+ unsigned i, j;
+
+ lp_scene_reset(scene);
+
+ for (i = 0; i < TILES_X; i++)
+ for (j = 0; j < TILES_Y; j++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ assert(bin->commands.head == bin->commands.tail);
+ FREE(bin->commands.head);
+ bin->commands.head = NULL;
+ bin->commands.tail = NULL;
+ }
+
+ FREE(scene->data.head);
+ scene->data.head = NULL;
+
+ pipe_mutex_destroy(scene->mutex);
+
+ FREE(scene);
+}
+
+
+/**
+ * Check if the scene's bins are all empty.
+ * For debugging purposes.
+ */
+boolean
+lp_scene_is_empty(struct lp_scene *scene )
+{
+ unsigned x, y;
+
+ for (y = 0; y < TILES_Y; y++) {
+ for (x = 0; x < TILES_X; x++) {
+ const struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ const struct cmd_block_list *list = &bin->commands;
+ if (list->head != list->tail || list->head->count > 0) {
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+
+/* Free data for one particular bin. May be called from the
+ * rasterizer thread(s).
+ */
+void
+lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y)
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ struct cmd_block_list *list = &bin->commands;
+ struct cmd_block *block;
+ struct cmd_block *tmp;
+
+ assert(x < TILES_X);
+ assert(y < TILES_Y);
+
+ for (block = list->head; block != list->tail; block = tmp) {
+ tmp = block->next;
+ FREE(block);
+ }
+
+ assert(list->tail->next == NULL);
+ list->head = list->tail;
+ list->head->count = 0;
+}
+
+
+/**
+ * Free all the temporary data in a scene. May be called from the
+ * rasterizer thread(s).
+ */
+void
+lp_scene_reset(struct lp_scene *scene )
+{
+ unsigned i, j;
+
+ /* Free all but last binner command lists:
+ */
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ lp_scene_bin_reset(scene, i, j);
+ }
+ }
+
+ assert(lp_scene_is_empty(scene));
+
+ /* Free all but last binned data block:
+ */
+ {
+ struct data_block_list *list = &scene->data;
+ struct data_block *block, *tmp;
+
+ for (block = list->head; block != list->tail; block = tmp) {
+ tmp = block->next;
+ FREE(block);
+ }
+
+ assert(list->tail->next == NULL);
+ list->head = list->tail;
+ list->head->used = 0;
+ }
+
+ /* Release texture refs
+ */
+ {
+ struct resource_ref *ref, *next, *ref_list = &scene->resources;
+ for (ref = ref_list->next; ref != ref_list; ref = next) {
+ next = next_elem(ref);
+ pipe_resource_reference(&ref->resource, NULL);
+ FREE(ref);
+ }
+ make_empty_list(ref_list);
+ }
+
+ scene->scene_size = 0;
+
+ scene->has_color_clear = FALSE;
+ scene->has_depthstencil_clear = FALSE;
+}
+
+
+
+
+
+
+struct cmd_block *
+lp_bin_new_cmd_block( struct cmd_block_list *list )
+{
+ struct cmd_block *block = MALLOC_STRUCT(cmd_block);
+ if (block) {
+ list->tail->next = block;
+ list->tail = block;
+ block->next = NULL;
+ block->count = 0;
+ }
+ return block;
+}
+
+
+struct data_block *
+lp_bin_new_data_block( struct data_block_list *list )
+{
+ struct data_block *block = MALLOC_STRUCT(data_block);
+ if (block) {
+ list->tail->next = block;
+ list->tail = block;
+ block->next = NULL;
+ block->used = 0;
+ }
+ return block;
+}
+
+
+/**
+ * Return number of bytes used for all bin data within a scene.
+ * This does not include resources (textures) referenced by the scene.
+ */
+unsigned
+lp_scene_data_size( const struct lp_scene *scene )
+{
+ unsigned size = 0;
+ const struct data_block *block;
+ for (block = scene->data.head; block; block = block->next) {
+ size += block->used;
+ }
+ return size;
+}
+
+
+/** Return number of bytes used for a single bin */
+unsigned
+lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y )
+{
+ struct cmd_bin *bin = lp_scene_get_bin((struct lp_scene *) scene, x, y);
+ const struct cmd_block *cmd;
+ unsigned size = 0;
+ for (cmd = bin->commands.head; cmd; cmd = cmd->next) {
+ size += (cmd->count *
+ (sizeof(lp_rast_cmd) + sizeof(union lp_rast_cmd_arg)));
+ }
+ return size;
+}
+
+
+/**
+ * Add a reference to a resource by the scene.
+ */
+void
+lp_scene_add_resource_reference(struct lp_scene *scene,
+ struct pipe_resource *resource)
+{
+ struct resource_ref *ref = CALLOC_STRUCT(resource_ref);
+ if (ref) {
+ struct resource_ref *ref_list = &scene->resources;
+ pipe_resource_reference(&ref->resource, resource);
+ insert_at_tail(ref_list, ref);
+ }
+
+ scene->scene_size += llvmpipe_resource_size(resource);
+}
+
+
+/**
+ * Does this scene have a reference to the given resource?
+ */
+boolean
+lp_scene_is_resource_referenced(const struct lp_scene *scene,
+ const struct pipe_resource *resource)
+{
+ const struct resource_ref *ref_list = &scene->resources;
+ const struct resource_ref *ref;
+ foreach (ref, ref_list) {
+ if (ref->resource == resource)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+/**
+ * Return last command in the bin
+ */
+static lp_rast_cmd
+lp_get_last_command( const struct cmd_bin *bin )
+{
+ const struct cmd_block *tail = bin->commands.tail;
+ const unsigned i = tail->count;
+ if (i > 0)
+ return tail->cmd[i - 1];
+ else
+ return NULL;
+}
+
+
+/**
+ * Replace the arg of the last command in the bin.
+ */
+static void
+lp_replace_last_command_arg( struct cmd_bin *bin,
+ const union lp_rast_cmd_arg arg )
+{
+ struct cmd_block *tail = bin->commands.tail;
+ const unsigned i = tail->count;
+ assert(i > 0);
+ tail->arg[i - 1] = arg;
+}
+
+
+
+/**
+ * Put a state-change command into all bins.
+ * If we find that the last command in a bin was also a state-change
+ * command, we can simply replace that one with the new one.
+ */
+void
+lp_scene_bin_state_command( struct lp_scene *scene,
+ lp_rast_cmd cmd,
+ const union lp_rast_cmd_arg arg )
+{
+ unsigned i, j;
+ for (i = 0; i < scene->tiles_x; i++) {
+ for (j = 0; j < scene->tiles_y; j++) {
+ struct cmd_bin *bin = lp_scene_get_bin(scene, i, j);
+ lp_rast_cmd last_cmd = lp_get_last_command(bin);
+ if (last_cmd == cmd) {
+ lp_replace_last_command_arg(bin, arg);
+ }
+ else {
+ lp_scene_bin_command( scene, i, j, cmd, arg );
+ }
+ }
+ }
+}
+
+
+/** advance curr_x,y to the next bin */
+static boolean
+next_bin(struct lp_scene *scene)
+{
+ scene->curr_x++;
+ if (scene->curr_x >= scene->tiles_x) {
+ scene->curr_x = 0;
+ scene->curr_y++;
+ }
+ if (scene->curr_y >= scene->tiles_y) {
+ /* no more bins */
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+void
+lp_scene_bin_iter_begin( struct lp_scene *scene )
+{
+ scene->curr_x = scene->curr_y = -1;
+}
+
+
+/**
+ * Return pointer to next bin to be rendered.
+ * The lp_scene::curr_x and ::curr_y fields will be advanced.
+ * Multiple rendering threads will call this function to get a chunk
+ * of work (a bin) to work on.
+ */
+struct cmd_bin *
+lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y )
+{
+ struct cmd_bin *bin = NULL;
+
+ pipe_mutex_lock(scene->mutex);
+
+ if (scene->curr_x < 0) {
+ /* first bin */
+ scene->curr_x = 0;
+ scene->curr_y = 0;
+ }
+ else if (!next_bin(scene)) {
+ /* no more bins left */
+ goto end;
+ }
+
+ bin = lp_scene_get_bin(scene, scene->curr_x, scene->curr_y);
+ *bin_x = scene->curr_x;
+ *bin_y = scene->curr_y;
+
+end:
+ /*printf("return bin %p at %d, %d\n", (void *) bin, *bin_x, *bin_y);*/
+ pipe_mutex_unlock(scene->mutex);
+ return bin;
+}
+
+
+void lp_scene_begin_binning( struct lp_scene *scene,
+ struct pipe_framebuffer_state *fb )
+{
+ assert(lp_scene_is_empty(scene));
+
+ util_copy_framebuffer_state(&scene->fb, fb);
+
+ scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
+ scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
+
+ assert(scene->tiles_x <= TILES_X);
+ assert(scene->tiles_y <= TILES_Y);
+}
+
+
+void lp_scene_rasterize( struct lp_scene *scene,
+ struct lp_rasterizer *rast )
+{
+ if (0) {
+ unsigned x, y;
+ debug_printf("rasterize scene:\n");
+ debug_printf(" data size: %u\n", lp_scene_data_size(scene));
+ for (y = 0; y < scene->tiles_y; y++) {
+ for (x = 0; x < scene->tiles_x; x++) {
+ debug_printf(" bin %u, %u size: %u\n", x, y,
+ lp_scene_bin_size(scene, x, y));
+ }
+ }
+ }
+
+ /* Enqueue the scene for rasterization, then immediately wait for
+ * it to finish.
+ */
+ lp_rast_queue_scene( rast, scene );
+
+ /* Currently just wait for the rasterizer to finish. Some
+ * threading interactions need to be worked out, particularly once
+ * transfers become per-context:
+ */
+ lp_rast_finish( rast );
+
+ util_unreference_framebuffer_state( &scene->fb );
+
+ /* put scene into the empty list */
+ lp_scene_enqueue( scene->empty_queue, scene );
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
new file mode 100644
index 0000000000..4e55d43174
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -0,0 +1,339 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Binner data structures and bin-related functions.
+ * Note: the "setup" code is concerned with building scenes while
+ * The "rast" code is concerned with consuming/executing scenes.
+ */
+
+#ifndef LP_SCENE_H
+#define LP_SCENE_H
+
+#include "os/os_thread.h"
+#include "lp_tile_soa.h"
+#include "lp_rast.h"
+
+struct lp_scene_queue;
+
+/* We're limited to 2K by 2K for 32bit fixed point rasterization.
+ * Will need a 64-bit version for larger framebuffers.
+ */
+#define TILES_X (LP_MAX_WIDTH / TILE_SIZE)
+#define TILES_Y (LP_MAX_HEIGHT / TILE_SIZE)
+
+
+#define CMD_BLOCK_MAX 128
+#define DATA_BLOCK_SIZE (16 * 1024 - sizeof(unsigned) - sizeof(void *))
+
+
+
+/* switch to a non-pointer value for this:
+ */
+typedef void (*lp_rast_cmd)( struct lp_rasterizer_task *,
+ const union lp_rast_cmd_arg );
+
+struct cmd_block {
+ lp_rast_cmd cmd[CMD_BLOCK_MAX];
+ union lp_rast_cmd_arg arg[CMD_BLOCK_MAX];
+ unsigned count;
+ struct cmd_block *next;
+};
+
+struct data_block {
+ ubyte data[DATA_BLOCK_SIZE];
+ unsigned used;
+ struct data_block *next;
+};
+
+struct cmd_block_list {
+ struct cmd_block *head;
+ struct cmd_block *tail;
+};
+
+/**
+ * For each screen tile we have one of these bins.
+ */
+struct cmd_bin {
+ struct cmd_block_list commands;
+};
+
+
+/**
+ * This stores bulk data which is shared by all bins within a scene.
+ * Examples include triangle data and state data. The commands in
+ * the per-tile bins will point to chunks of data in this structure.
+ */
+struct data_block_list {
+ struct data_block *head;
+ struct data_block *tail;
+};
+
+
+/** List of resource references */
+struct resource_ref {
+ struct pipe_resource *resource;
+ struct resource_ref *prev, *next; /**< linked list w/ u_simple_list.h */
+};
+
+
+/**
+ * All bins and bin data are contained here.
+ * Per-bin data goes into the 'tile' bins.
+ * Shared data goes into the 'data' buffer.
+ *
+ * When there are multiple threads, will want to double-buffer between
+ * scenes:
+ */
+struct lp_scene {
+ struct pipe_context *pipe;
+
+ /** the framebuffer to render the scene into */
+ struct pipe_framebuffer_state fb;
+
+ /** list of resources referenced by the scene commands */
+ struct resource_ref resources;
+
+ /** Approx memory used by the scene (in bytes). This includes the
+ * shared and per-tile bins plus any referenced resources/textures.
+ */
+ unsigned scene_size;
+
+ boolean has_color_clear;
+ boolean has_depthstencil_clear;
+
+ /**
+ * Number of active tiles in each dimension.
+ * This basically the framebuffer size divided by tile size
+ */
+ unsigned tiles_x, tiles_y;
+
+ int curr_x, curr_y; /**< for iterating over bins */
+ pipe_mutex mutex;
+
+ /* Where to place this scene once it has been rasterized:
+ */
+ struct lp_scene_queue *empty_queue;
+
+ struct cmd_bin tile[TILES_X][TILES_Y];
+ struct data_block_list data;
+};
+
+
+
+struct lp_scene *lp_scene_create(struct pipe_context *pipe,
+ struct lp_scene_queue *empty_queue);
+
+void lp_scene_destroy(struct lp_scene *scene);
+
+
+
+boolean lp_scene_is_empty(struct lp_scene *scene );
+
+void lp_scene_reset(struct lp_scene *scene );
+
+
+struct data_block *lp_bin_new_data_block( struct data_block_list *list );
+
+struct cmd_block *lp_bin_new_cmd_block( struct cmd_block_list *list );
+
+unsigned lp_scene_data_size( const struct lp_scene *scene );
+
+unsigned lp_scene_bin_size( const struct lp_scene *scene, unsigned x, unsigned y );
+
+void lp_scene_add_resource_reference(struct lp_scene *scene,
+ struct pipe_resource *resource);
+
+boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
+ const struct pipe_resource *resource );
+
+
+/**
+ * Allocate space for a command/data in the bin's data buffer.
+ * Grow the block list if needed.
+ */
+static INLINE void *
+lp_scene_alloc( struct lp_scene *scene, unsigned size)
+{
+ struct data_block_list *list = &scene->data;
+ struct data_block *tail = list->tail;
+
+ if (tail->used + size > DATA_BLOCK_SIZE) {
+ tail = lp_bin_new_data_block( list );
+ if (!tail) {
+ /* out of memory */
+ return NULL;
+ }
+ }
+
+ scene->scene_size += size;
+
+ {
+ ubyte *data = tail->data + tail->used;
+ tail->used += size;
+ return data;
+ }
+}
+
+
+/**
+ * As above, but with specific alignment.
+ */
+static INLINE void *
+lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
+ unsigned alignment )
+{
+ struct data_block_list *list = &scene->data;
+ struct data_block *tail = list->tail;
+
+ if (tail->used + size + alignment - 1 > DATA_BLOCK_SIZE) {
+ tail = lp_bin_new_data_block( list );
+ if (!tail)
+ return NULL;
+ }
+
+ scene->scene_size += size;
+
+ {
+ ubyte *data = tail->data + tail->used;
+ unsigned offset = (((uintptr_t)data + alignment - 1) & ~(alignment - 1)) - (uintptr_t)data;
+ tail->used += offset + size;
+ return data + offset;
+ }
+}
+
+
+/* Put back data if we decide not to use it, eg. culled triangles.
+ */
+static INLINE void
+lp_scene_putback_data( struct lp_scene *scene, unsigned size)
+{
+ struct data_block_list *list = &scene->data;
+ scene->scene_size -= size;
+ assert(list->tail->used >= size);
+ list->tail->used -= size;
+}
+
+
+/** Return pointer to a particular tile's bin. */
+static INLINE struct cmd_bin *
+lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
+{
+ return &scene->tile[x][y];
+}
+
+
+/** Remove all commands from a bin */
+void
+lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y);
+
+
+/* Add a command to bin[x][y].
+ */
+static INLINE void
+lp_scene_bin_command( struct lp_scene *scene,
+ unsigned x, unsigned y,
+ lp_rast_cmd cmd,
+ union lp_rast_cmd_arg arg )
+{
+ struct cmd_bin *bin = lp_scene_get_bin(scene, x, y);
+ struct cmd_block_list *list = &bin->commands;
+ struct cmd_block *tail = list->tail;
+
+ assert(x < scene->tiles_x);
+ assert(y < scene->tiles_y);
+
+ if (tail->count == CMD_BLOCK_MAX) {
+ tail = lp_bin_new_cmd_block( list );
+ if (!tail) {
+ /* out of memory - simply ignore this command (for now) */
+ return;
+ }
+ assert(tail->count == 0);
+ }
+
+ {
+ unsigned i = tail->count;
+ tail->cmd[i] = cmd;
+ tail->arg[i] = arg;
+ tail->count++;
+ }
+}
+
+
+/* Add a command to all active bins.
+ */
+static INLINE void
+lp_scene_bin_everywhere( struct lp_scene *scene,
+ lp_rast_cmd cmd,
+ const union lp_rast_cmd_arg arg )
+{
+ unsigned i, j;
+ for (i = 0; i < scene->tiles_x; i++)
+ for (j = 0; j < scene->tiles_y; j++)
+ lp_scene_bin_command( scene, i, j, cmd, arg );
+}
+
+
+void
+lp_scene_bin_state_command( struct lp_scene *scene,
+ lp_rast_cmd cmd,
+ const union lp_rast_cmd_arg arg );
+
+
+static INLINE unsigned
+lp_scene_get_num_bins( const struct lp_scene *scene )
+{
+ return scene->tiles_x * scene->tiles_y;
+}
+
+
+void
+lp_scene_bin_iter_begin( struct lp_scene *scene );
+
+struct cmd_bin *
+lp_scene_bin_iter_next( struct lp_scene *scene, int *bin_x, int *bin_y );
+
+
+void
+lp_scene_rasterize( struct lp_scene *scene,
+ struct lp_rasterizer *rast );
+
+void
+lp_scene_begin_binning( struct lp_scene *scene,
+ struct pipe_framebuffer_state *fb );
+
+
+static INLINE unsigned
+lp_scene_get_size(const struct lp_scene *scene)
+{
+ return scene->scene_size;
+}
+
+
+#endif /* LP_BIN_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.c b/src/gallium/drivers/llvmpipe/lp_scene_queue.c
new file mode 100644
index 0000000000..975db43c4e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.c
@@ -0,0 +1,124 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Scene queue. We'll use two queues. One contains "full" scenes which
+ * are produced by the "setup" code. The other contains "empty" scenes
+ * which are produced by the "rast" code when it finishes rendering a scene.
+ */
+
+#include "util/u_ringbuffer.h"
+#include "util/u_memory.h"
+#include "lp_scene_queue.h"
+
+
+
+#define MAX_SCENE_QUEUE 4
+
+struct scene_packet {
+ struct util_packet header;
+ struct lp_scene *scene;
+};
+
+/**
+ * A queue of scenes
+ */
+struct lp_scene_queue
+{
+ struct util_ringbuffer *ring;
+};
+
+
+
+/** Allocate a new scene queue */
+struct lp_scene_queue *
+lp_scene_queue_create(void)
+{
+ struct lp_scene_queue *queue = CALLOC_STRUCT(lp_scene_queue);
+ if (queue == NULL)
+ return NULL;
+
+ queue->ring = util_ringbuffer_create( MAX_SCENE_QUEUE *
+ sizeof( struct scene_packet ) / 4);
+ if (queue->ring == NULL)
+ goto fail;
+
+ return queue;
+
+fail:
+ FREE(queue);
+ return NULL;
+}
+
+
+/** Delete a scene queue */
+void
+lp_scene_queue_destroy(struct lp_scene_queue *queue)
+{
+ util_ringbuffer_destroy(queue->ring);
+ FREE(queue);
+}
+
+
+/** Remove first lp_scene from head of queue */
+struct lp_scene *
+lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait)
+{
+ struct scene_packet packet;
+ enum pipe_error ret;
+
+ packet.scene = NULL;
+
+ ret = util_ringbuffer_dequeue(queue->ring,
+ &packet.header,
+ sizeof packet / 4,
+ wait );
+ if (ret != PIPE_OK)
+ return NULL;
+
+ return packet.scene;
+}
+
+
+/** Add an lp_scene to tail of queue */
+void
+lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene)
+{
+ struct scene_packet packet;
+
+ packet.header.dwords = sizeof packet / 4;
+ packet.header.data24 = 0;
+ packet.scene = scene;
+
+ util_ringbuffer_enqueue(queue->ring, &packet.header);
+}
+
+
+
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_scene_queue.h b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
new file mode 100644
index 0000000000..fd7c65a2c8
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_scene_queue.h
@@ -0,0 +1,51 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_SCENE_QUEUE
+#define LP_SCENE_QUEUE
+
+struct lp_scene_queue;
+struct lp_scene;
+
+
+struct lp_scene_queue *
+lp_scene_queue_create(void);
+
+void
+lp_scene_queue_destroy(struct lp_scene_queue *queue);
+
+struct lp_scene *
+lp_scene_dequeue(struct lp_scene_queue *queue, boolean wait);
+
+void
+lp_scene_enqueue(struct lp_scene_queue *queue, struct lp_scene *scene);
+
+
+
+
+#endif /* LP_BIN_QUEUE */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
new file mode 100644
index 0000000000..6432cea862
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -0,0 +1,367 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+
+#include "gallivm/lp_bld_limits.h"
+#include "lp_texture.h"
+#include "lp_fence.h"
+#include "lp_jit.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_public.h"
+#include "lp_limits.h"
+
+#include "state_tracker/sw_winsys.h"
+
+#ifdef DEBUG
+int LP_DEBUG = 0;
+
+static const struct debug_named_value lp_debug_flags[] = {
+ { "pipe", DEBUG_PIPE, NULL },
+ { "tgsi", DEBUG_TGSI, NULL },
+ { "tex", DEBUG_TEX, NULL },
+ { "setup", DEBUG_SETUP, NULL },
+ { "rast", DEBUG_RAST, NULL },
+ { "query", DEBUG_QUERY, NULL },
+ { "screen", DEBUG_SCREEN, NULL },
+ { "show_tiles", DEBUG_SHOW_TILES, NULL },
+ { "show_subtiles", DEBUG_SHOW_SUBTILES, NULL },
+ { "counters", DEBUG_COUNTERS, NULL },
+ DEBUG_NAMED_VALUE_END
+};
+#endif
+
+
+static const char *
+llvmpipe_get_vendor(struct pipe_screen *screen)
+{
+ return "VMware, Inc.";
+}
+
+
+static const char *
+llvmpipe_get_name(struct pipe_screen *screen)
+{
+ return "llvmpipe";
+}
+
+
+static int
+llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ return PIPE_MAX_SAMPLERS;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
+ case PIPE_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ return 1;
+ case PIPE_CAP_GLSL:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ return 0;
+ case PIPE_CAP_POINT_SPRITE:
+ return 1;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return PIPE_MAX_COLOR_BUFS;
+ case PIPE_CAP_OCCLUSION_QUERY:
+ return 1;
+ case PIPE_CAP_TIMER_QUERY:
+ return 0;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ return 1;
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ return 1;
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ return 1;
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ return LP_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return LP_MAX_TEXTURE_3D_LEVELS;
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return LP_MAX_TEXTURE_2D_LEVELS;
+ case PIPE_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ return 1;
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 0;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 0;
+ case PIPE_CAP_MAX_VS_INSTRUCTIONS:
+ case PIPE_CAP_MAX_FS_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
+ case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
+ case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
+ case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
+ /* There is no limit in number of instructions beyond available memory */
+ return 32768;
+ case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
+ case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
+ return LP_MAX_TGSI_NESTING;
+ case PIPE_CAP_MAX_VS_INPUTS:
+ case PIPE_CAP_MAX_FS_INPUTS:
+ return PIPE_MAX_ATTRIBS;
+ case PIPE_CAP_MAX_FS_CONSTS:
+ case PIPE_CAP_MAX_VS_CONSTS:
+ /* There is no limit in number of constants beyond available memory */
+ return 32768;
+ case PIPE_CAP_MAX_VS_TEMPS:
+ case PIPE_CAP_MAX_FS_TEMPS:
+ return LP_MAX_TGSI_TEMPS;
+ case PIPE_CAP_MAX_VS_ADDRS:
+ case PIPE_CAP_MAX_FS_ADDRS:
+ return LP_MAX_TGSI_ADDRS;
+ case PIPE_CAP_MAX_VS_PREDS:
+ case PIPE_CAP_MAX_FS_PREDS:
+ return LP_MAX_TGSI_PREDS;
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+static float
+llvmpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
+{
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ /* fall-through */
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ return 255.0; /* arbitrary */
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0; /* not actually signficant at this time */
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0; /* arbitrary */
+ case PIPE_CAP_GUARD_BAND_LEFT:
+ case PIPE_CAP_GUARD_BAND_TOP:
+ case PIPE_CAP_GUARD_BAND_RIGHT:
+ case PIPE_CAP_GUARD_BAND_BOTTOM:
+ return 0.0;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+/**
+ * Query format support for creating a texture, drawing surface, etc.
+ * \param format the format to test
+ * \param type one of PIPE_TEXTURE, PIPE_SURFACE
+ */
+static boolean
+llvmpipe_is_format_supported( struct pipe_screen *_screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bind,
+ unsigned geom_flags )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if (!format_desc)
+ return FALSE;
+
+ assert(target == PIPE_BUFFER ||
+ target == PIPE_TEXTURE_1D ||
+ target == PIPE_TEXTURE_2D ||
+ target == PIPE_TEXTURE_3D ||
+ target == PIPE_TEXTURE_CUBE);
+
+ if (sample_count > 1)
+ return FALSE;
+
+ if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return FALSE;
+
+ if (format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_DISPLAY_TARGET) {
+ if(!winsys->is_displaytarget_format_supported(winsys, bind, format))
+ return FALSE;
+ }
+
+ if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return FALSE;
+
+ if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /* FIXME: Temporary restriction. See lp_state_fs.c. */
+ if (format_desc->block.bits != 32)
+ return FALSE;
+ }
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ return util_format_s3tc_enabled;
+ }
+
+ /*
+ * Everything else should be supported by u_format.
+ */
+ return TRUE;
+}
+
+
+
+
+static void
+llvmpipe_flush_frontbuffer(struct pipe_screen *_screen,
+ struct pipe_surface *surface,
+ void *context_private)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+ struct llvmpipe_resource *texture = llvmpipe_resource(surface->texture);
+
+ assert(texture->dt);
+ if (texture->dt)
+ winsys->displaytarget_display(winsys, texture->dt, context_private);
+}
+
+
+static void
+llvmpipe_destroy_screen( struct pipe_screen *_screen )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct sw_winsys *winsys = screen->winsys;
+
+ lp_jit_screen_cleanup(screen);
+
+ if(winsys->destroy)
+ winsys->destroy(winsys);
+
+ FREE(screen);
+}
+
+
+
+/**
+ * Create a new pipe_screen object
+ * Note: we're not presently subclassing pipe_screen (no llvmpipe_screen).
+ */
+struct pipe_screen *
+llvmpipe_create_screen(struct sw_winsys *winsys)
+{
+ struct llvmpipe_screen *screen;
+
+#ifdef PIPE_ARCH_X86
+ /* require SSE2 due to LLVM PR6960. */
+ util_cpu_detect();
+ if (!util_cpu_caps.has_sse2)
+ return NULL;
+#endif
+
+ screen = CALLOC_STRUCT(llvmpipe_screen);
+
+#ifdef DEBUG
+ LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
+#endif
+
+ if (!screen)
+ return NULL;
+
+ screen->winsys = winsys;
+
+ screen->base.destroy = llvmpipe_destroy_screen;
+
+ screen->base.get_name = llvmpipe_get_name;
+ screen->base.get_vendor = llvmpipe_get_vendor;
+ screen->base.get_param = llvmpipe_get_param;
+ screen->base.get_paramf = llvmpipe_get_paramf;
+ screen->base.is_format_supported = llvmpipe_is_format_supported;
+
+ screen->base.context_create = llvmpipe_create_context;
+ screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
+
+ llvmpipe_init_screen_resource_funcs(&screen->base);
+ llvmpipe_init_screen_fence_funcs(&screen->base);
+
+ lp_jit_screen_init(screen);
+
+#ifdef PIPE_OS_WINDOWS
+ /* Multithreading not supported on windows until conditions and barriers are
+ * properly implemented. */
+ screen->num_threads = 0;
+#else
+#ifdef PIPE_OS_EMBEDDED
+ screen->num_threads = 0;
+#else
+ screen->num_threads = util_cpu_caps.nr_cpus;
+#endif
+ screen->num_threads = debug_get_num_option("LP_NUM_THREADS", screen->num_threads);
+ screen->num_threads = MIN2(screen->num_threads, LP_MAX_THREADS);
+#endif
+
+ util_format_s3tc_init();
+
+ return &screen->base;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
new file mode 100644
index 0000000000..eb40f6823f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -0,0 +1,79 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LP_SCREEN_H
+#define LP_SCREEN_H
+
+#include "gallivm/lp_bld.h"
+#include <llvm-c/ExecutionEngine.h>
+
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+
+
+struct sw_winsys;
+
+
+struct llvmpipe_screen
+{
+ struct pipe_screen base;
+
+ struct sw_winsys *winsys;
+
+ LLVMModuleRef module;
+ LLVMExecutionEngineRef engine;
+ LLVMModuleProviderRef provider;
+ LLVMTargetDataRef target;
+ LLVMPassManagerRef pass;
+
+ LLVMTypeRef context_ptr_type;
+
+ unsigned num_threads;
+
+ /* Increments whenever textures are modified. Contexts can track this.
+ */
+ unsigned timestamp;
+};
+
+
+
+
+static INLINE struct llvmpipe_screen *
+llvmpipe_screen( struct pipe_screen *pipe )
+{
+ return (struct llvmpipe_screen *)pipe;
+}
+
+
+
+#endif /* LP_SCREEN_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
new file mode 100644
index 0000000000..e8aafee33f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -0,0 +1,953 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Tiling engine.
+ *
+ * Builds per-tile display lists and executes them on calls to
+ * lp_setup_flush().
+ */
+
+#include <limits.h>
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "lp_context.h"
+#include "lp_scene.h"
+#include "lp_scene_queue.h"
+#include "lp_texture.h"
+#include "lp_debug.h"
+#include "lp_fence.h"
+#include "lp_query.h"
+#include "lp_rast.h"
+#include "lp_setup_context.h"
+#include "lp_screen.h"
+#include "lp_state.h"
+#include "state_tracker/sw_winsys.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+
+
+static void set_scene_state( struct lp_setup_context *, enum setup_state );
+
+
+struct lp_scene *
+lp_setup_get_current_scene(struct lp_setup_context *setup)
+{
+ if (!setup->scene) {
+
+ /* wait for a free/empty scene
+ */
+ setup->scene = lp_scene_dequeue(setup->empty_scenes, TRUE);
+
+ assert(lp_scene_is_empty(setup->scene));
+
+ lp_scene_begin_binning(setup->scene,
+ &setup->fb );
+ }
+ return setup->scene;
+}
+
+
+/**
+ * Check if the size of the current scene has exceeded the limit.
+ * If so, flush/render it.
+ */
+static void
+setup_check_scene_size_and_flush(struct lp_setup_context *setup)
+{
+ if (setup->scene) {
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ unsigned size = lp_scene_get_size(scene);
+
+ if (size > LP_MAX_SCENE_SIZE) {
+ /*printf("LLVMPIPE: scene size = %u, flushing.\n", size);*/
+ set_scene_state( setup, SETUP_FLUSHED );
+ /*assert(lp_scene_get_size(scene) == 0);*/
+ }
+ }
+}
+
+
+static void
+first_triangle( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4])
+{
+ set_scene_state( setup, SETUP_ACTIVE );
+ lp_setup_choose_triangle( setup );
+ setup->triangle( setup, v0, v1, v2 );
+}
+
+static void
+first_line( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4])
+{
+ set_scene_state( setup, SETUP_ACTIVE );
+ lp_setup_choose_line( setup );
+ setup->line( setup, v0, v1 );
+}
+
+static void
+first_point( struct lp_setup_context *setup,
+ const float (*v0)[4])
+{
+ set_scene_state( setup, SETUP_ACTIVE );
+ lp_setup_choose_point( setup );
+ setup->point( setup, v0 );
+}
+
+static void reset_context( struct lp_setup_context *setup )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ /* Reset derived state */
+ setup->constants.stored_size = 0;
+ setup->constants.stored_data = NULL;
+ setup->fs.stored = NULL;
+ setup->dirty = ~0;
+
+ /* no current bin */
+ setup->scene = NULL;
+
+ /* Reset some state:
+ */
+ setup->clear.flags = 0;
+ setup->clear.clearzs.clearzs_mask = 0;
+
+ /* Have an explicit "start-binning" call and get rid of this
+ * pointer twiddling?
+ */
+ setup->line = first_line;
+ setup->point = first_point;
+ setup->triangle = first_triangle;
+}
+
+
+/** Rasterize all scene's bins */
+static void
+lp_setup_rasterize_scene( struct lp_setup_context *setup )
+{
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+
+ lp_scene_rasterize(scene, setup->rast);
+
+ reset_context( setup );
+
+ LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__);
+}
+
+
+
+static void
+begin_binning( struct lp_setup_context *setup )
+{
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ boolean need_zsload = FALSE;
+ if (setup->fb.zsbuf &&
+ ((setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+ util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
+ need_zsload = TRUE;
+
+ LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__,
+ (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load",
+ need_zsload ? "clear": "load");
+
+ if (setup->fb.nr_cbufs) {
+ if (setup->clear.flags & PIPE_CLEAR_COLOR) {
+ lp_scene_bin_everywhere( scene,
+ lp_rast_clear_color,
+ setup->clear.color );
+ scene->has_color_clear = TRUE;
+ }
+ }
+
+ if (setup->fb.zsbuf) {
+ if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (!need_zsload)
+ scene->has_depthstencil_clear = TRUE;
+ lp_scene_bin_everywhere( scene,
+ lp_rast_clear_zstencil,
+ lp_rast_arg_clearzs(&setup->clear.clearzs) );
+ }
+ }
+
+ LP_DBG(DEBUG_SETUP, "%s done\n", __FUNCTION__);
+}
+
+
+/* This basically bins and then flushes any outstanding full-screen
+ * clears.
+ *
+ * TODO: fast path for fullscreen clears and no triangles.
+ */
+static void
+execute_clears( struct lp_setup_context *setup )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ begin_binning( setup );
+ lp_setup_rasterize_scene( setup );
+}
+
+
+static void
+set_scene_state( struct lp_setup_context *setup,
+ enum setup_state new_state )
+{
+ unsigned old_state = setup->state;
+
+ if (old_state == new_state)
+ return;
+
+ LP_DBG(DEBUG_SETUP, "%s old %d new %d\n", __FUNCTION__, old_state, new_state);
+
+ switch (new_state) {
+ case SETUP_ACTIVE:
+ begin_binning( setup );
+ break;
+
+ case SETUP_CLEARED:
+ if (old_state == SETUP_ACTIVE) {
+ assert(0);
+ return;
+ }
+ break;
+
+ case SETUP_FLUSHED:
+ if (old_state == SETUP_CLEARED)
+ execute_clears( setup );
+ else
+ lp_setup_rasterize_scene( setup );
+ break;
+
+ default:
+ assert(0 && "invalid setup state mode");
+ }
+
+ setup->state = new_state;
+}
+
+
+/**
+ * \param flags bitmask of PIPE_FLUSH_x flags
+ */
+void
+lp_setup_flush( struct lp_setup_context *setup,
+ unsigned flags )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ if (setup->scene) {
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ union lp_rast_cmd_arg dummy = {0};
+
+ if (flags & (PIPE_FLUSH_SWAPBUFFERS |
+ PIPE_FLUSH_FRAME)) {
+ /* Store colors in the linear color buffer(s).
+ * If we don't do this here, we'll end up converting the tiled
+ * data to linear in the texture_unmap() function, which will
+ * not be a parallel/threaded operation as here.
+ */
+ lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy);
+ }
+ }
+
+ set_scene_state( setup, SETUP_FLUSHED );
+}
+
+
+void
+lp_setup_bind_framebuffer( struct lp_setup_context *setup,
+ const struct pipe_framebuffer_state *fb )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ /* Flush any old scene.
+ */
+ set_scene_state( setup, SETUP_FLUSHED );
+
+ /* Set new state. This will be picked up later when we next need a
+ * scene.
+ */
+ util_copy_framebuffer_state(&setup->fb, fb);
+}
+
+
+void
+lp_setup_clear( struct lp_setup_context *setup,
+ const float *color,
+ double depth,
+ unsigned stencil,
+ unsigned flags )
+{
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ unsigned i;
+ boolean full_zs_clear = TRUE;
+ uint32_t mask = 0;
+
+ LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
+
+
+ if (flags & PIPE_CLEAR_COLOR) {
+ for (i = 0; i < 4; ++i)
+ setup->clear.color.clear_color[i] = float_to_ubyte(color[i]);
+ }
+
+ if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (setup->fb.zsbuf &&
+ ((flags & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+ util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
+ full_zs_clear = FALSE;
+
+ if (full_zs_clear) {
+ setup->clear.clearzs.clearzs_value =
+ util_pack_z_stencil(setup->fb.zsbuf->format,
+ depth,
+ stencil);
+ setup->clear.clearzs.clearzs_mask = 0xffffffff;
+ }
+ else {
+ /* hmm */
+ uint32_t tmpval;
+ if (flags & PIPE_CLEAR_DEPTH) {
+ tmpval = util_pack_z(setup->fb.zsbuf->format,
+ depth);
+ switch (setup->fb.zsbuf->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ mask = 0xffffff;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ mask = 0xffffff00;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ switch (setup->fb.zsbuf->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ mask = 0xff000000;
+ tmpval = stencil << 24;
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ mask = 0xff;
+ tmpval = stencil;
+ break;
+ default:
+ assert(0);
+ tmpval = 0;
+ }
+ }
+ setup->clear.clearzs.clearzs_mask |= mask;
+ setup->clear.clearzs.clearzs_value =
+ (setup->clear.clearzs.clearzs_value & ~mask) | (tmpval & mask);
+ }
+ }
+
+ if (setup->state == SETUP_ACTIVE) {
+ /* Add the clear to existing scene. In the unusual case where
+ * both color and depth-stencil are being cleared when there's
+ * already been some rendering, we could discard the currently
+ * binned scene and start again, but I don't see that as being
+ * a common usage.
+ */
+ if (flags & PIPE_CLEAR_COLOR) {
+ lp_scene_bin_everywhere( scene,
+ lp_rast_clear_color,
+ setup->clear.color );
+ scene->has_color_clear = TRUE;
+ }
+
+ if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
+ if (full_zs_clear)
+ scene->has_depthstencil_clear = TRUE;
+ else
+ setup->clear.clearzs.clearzs_mask = mask;
+ lp_scene_bin_everywhere( scene,
+ lp_rast_clear_zstencil,
+ lp_rast_arg_clearzs(&setup->clear.clearzs) );
+
+
+ }
+
+ }
+ else {
+ /* Put ourselves into the 'pre-clear' state, specifically to try
+ * and accumulate multiple clears to color and depth_stencil
+ * buffers which the app or state-tracker might issue
+ * separately.
+ */
+ set_scene_state( setup, SETUP_CLEARED );
+
+ setup->clear.flags |= flags;
+ }
+}
+
+
+/**
+ * Emit a fence.
+ */
+struct pipe_fence_handle *
+lp_setup_fence( struct lp_setup_context *setup )
+{
+ if (setup->num_threads == 0) {
+ return NULL;
+ }
+ else {
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ const unsigned rank = lp_scene_get_num_bins( scene ); /* xxx */
+ struct lp_fence *fence = lp_fence_create(rank);
+
+ LP_DBG(DEBUG_SETUP, "%s rank %u\n", __FUNCTION__, rank);
+
+ set_scene_state( setup, SETUP_ACTIVE );
+
+ /* insert the fence into all command bins */
+ lp_scene_bin_everywhere( scene,
+ lp_rast_fence,
+ lp_rast_arg_fence(fence) );
+
+ return (struct pipe_fence_handle *) fence;
+ }
+}
+
+
+void
+lp_setup_set_triangle_state( struct lp_setup_context *setup,
+ unsigned cull_mode,
+ boolean ccw_is_frontface,
+ boolean scissor,
+ boolean gl_rasterization_rules)
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup->ccw_is_frontface = ccw_is_frontface;
+ setup->cullmode = cull_mode;
+ setup->triangle = first_triangle;
+ setup->scissor_test = scissor;
+ setup->pixel_offset = gl_rasterization_rules ? 0.5f : 0.0f;
+}
+
+
+
+void
+lp_setup_set_fs_inputs( struct lp_setup_context *setup,
+ const struct lp_shader_input *input,
+ unsigned nr )
+{
+ LP_DBG(DEBUG_SETUP, "%s %p %u\n", __FUNCTION__, (void *) input, nr);
+
+ memcpy( setup->fs.input, input, nr * sizeof input[0] );
+ setup->fs.nr_inputs = nr;
+}
+
+void
+lp_setup_set_fs_variant( struct lp_setup_context *setup,
+ struct lp_fragment_shader_variant *variant)
+{
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__,
+ variant);
+ /* FIXME: reference count */
+
+ setup->fs.current.variant = variant;
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+void
+lp_setup_set_fs_constants(struct lp_setup_context *setup,
+ struct pipe_resource *buffer)
+{
+ LP_DBG(DEBUG_SETUP, "%s %p\n", __FUNCTION__, (void *) buffer);
+
+ pipe_resource_reference(&setup->constants.current, buffer);
+
+ setup->dirty |= LP_SETUP_NEW_CONSTANTS;
+}
+
+
+void
+lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
+ float alpha_ref_value )
+{
+ LP_DBG(DEBUG_SETUP, "%s %f\n", __FUNCTION__, alpha_ref_value);
+
+ if(setup->fs.current.jit_context.alpha_ref_value != alpha_ref_value) {
+ setup->fs.current.jit_context.alpha_ref_value = alpha_ref_value;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+}
+
+void
+lp_setup_set_stencil_ref_values( struct lp_setup_context *setup,
+ const ubyte refs[2] )
+{
+ LP_DBG(DEBUG_SETUP, "%s %d %d\n", __FUNCTION__, refs[0], refs[1]);
+
+ if (setup->fs.current.jit_context.stencil_ref_front != refs[0] ||
+ setup->fs.current.jit_context.stencil_ref_back != refs[1]) {
+ setup->fs.current.jit_context.stencil_ref_front = refs[0];
+ setup->fs.current.jit_context.stencil_ref_back = refs[1];
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+}
+
+void
+lp_setup_set_blend_color( struct lp_setup_context *setup,
+ const struct pipe_blend_color *blend_color )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(blend_color);
+
+ if(memcmp(&setup->blend_color.current, blend_color, sizeof *blend_color) != 0) {
+ memcpy(&setup->blend_color.current, blend_color, sizeof *blend_color);
+ setup->dirty |= LP_SETUP_NEW_BLEND_COLOR;
+ }
+}
+
+
+void
+lp_setup_set_scissor( struct lp_setup_context *setup,
+ const struct pipe_scissor_state *scissor )
+{
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(scissor);
+
+ if (memcmp(&setup->scissor.current, scissor, sizeof(*scissor)) != 0) {
+ setup->scissor.current = *scissor; /* struct copy */
+ setup->dirty |= LP_SETUP_NEW_SCISSOR;
+ }
+}
+
+
+void
+lp_setup_set_flatshade_first( struct lp_setup_context *setup,
+ boolean flatshade_first )
+{
+ setup->flatshade_first = flatshade_first;
+}
+
+
+void
+lp_setup_set_vertex_info( struct lp_setup_context *setup,
+ struct vertex_info *vertex_info )
+{
+ /* XXX: just silently holding onto the pointer:
+ */
+ setup->vertex_info = vertex_info;
+}
+
+
+/**
+ * Called during state validation when LP_NEW_SAMPLER_VIEW is set.
+ */
+void
+lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ if(view) {
+ struct pipe_resource *tex = view->texture;
+ struct llvmpipe_resource *lp_tex = llvmpipe_resource(tex);
+ struct lp_jit_texture *jit_tex;
+ jit_tex = &setup->fs.current.jit_context.textures[i];
+ jit_tex->width = tex->width0;
+ jit_tex->height = tex->height0;
+ jit_tex->depth = tex->depth0;
+ jit_tex->last_level = tex->last_level;
+
+ /* We're referencing the texture's internal data, so save a
+ * reference to it.
+ */
+ pipe_resource_reference(&setup->fs.current_tex[i], tex);
+
+ if (!lp_tex->dt) {
+ /* regular texture - setup array of mipmap level pointers */
+ int j;
+ for (j = 0; j <= tex->last_level; j++) {
+ jit_tex->data[j] =
+ llvmpipe_get_texture_image_all(lp_tex, j, LP_TEX_USAGE_READ,
+ LP_TEX_LAYOUT_LINEAR);
+ jit_tex->row_stride[j] = lp_tex->row_stride[j];
+ jit_tex->img_stride[j] = lp_tex->img_stride[j];
+ }
+ }
+ else {
+ /* display target texture/surface */
+ /*
+ * XXX: Where should this be unmapped?
+ */
+
+ struct llvmpipe_screen *screen = llvmpipe_screen(tex->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ jit_tex->data[0] = winsys->displaytarget_map(winsys, lp_tex->dt,
+ PIPE_TRANSFER_READ);
+ jit_tex->row_stride[0] = lp_tex->row_stride[0];
+ jit_tex->img_stride[0] = lp_tex->img_stride[0];
+ assert(jit_tex->data[0]);
+ }
+ }
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+}
+
+
+/**
+ * Is the given texture referenced by any scene?
+ * Note: we have to check all scenes including any scenes currently
+ * being rendered and the current scene being built.
+ */
+unsigned
+lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
+ const struct pipe_resource *texture )
+{
+ unsigned i;
+
+ /* check the render targets */
+ for (i = 0; i < setup->fb.nr_cbufs; i++) {
+ if (setup->fb.cbufs[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ }
+ if (setup->fb.zsbuf && setup->fb.zsbuf->texture == texture) {
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+ }
+
+ /* check textures referenced by the scene */
+ for (i = 0; i < Elements(setup->scenes); i++) {
+ if (lp_scene_is_resource_referenced(setup->scenes[i], texture)) {
+ return PIPE_REFERENCED_FOR_READ;
+ }
+ }
+
+ return PIPE_UNREFERENCED;
+}
+
+
+/**
+ * Called by vbuf code when we're about to draw something.
+ */
+void
+lp_setup_update_state( struct lp_setup_context *setup )
+{
+ struct lp_scene *scene;
+
+ LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
+
+ setup_check_scene_size_and_flush(setup);
+
+ scene = lp_setup_get_current_scene(setup);
+
+ assert(setup->fs.current.variant);
+
+ /* Some of the 'draw' pipeline stages may have changed some driver state.
+ * Make sure we've processed those state changes before anything else.
+ *
+ * XXX this is the only place where llvmpipe_context is used in the
+ * setup code. This may get refactored/changed...
+ */
+ {
+ struct llvmpipe_context *lp = llvmpipe_context(scene->pipe);
+ if (lp->dirty) {
+ llvmpipe_update_derived(lp);
+ }
+ assert(lp->dirty == 0);
+ }
+
+ if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
+ uint8_t *stored;
+ unsigned i, j;
+
+ stored = lp_scene_alloc_aligned(scene, 4 * 16, 16);
+
+ if (stored) {
+ /* smear each blend color component across 16 ubyte elements */
+ for (i = 0; i < 4; ++i) {
+ uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
+ for (j = 0; j < 16; ++j)
+ stored[i*16 + j] = c;
+ }
+
+ setup->blend_color.stored = stored;
+
+ setup->fs.current.jit_context.blend_color = setup->blend_color.stored;
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+
+ if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
+ float *stored;
+
+ stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16);
+
+ if (stored) {
+ stored[0] = (float) setup->scissor.current.minx;
+ stored[1] = (float) setup->scissor.current.miny;
+ stored[2] = (float) setup->scissor.current.maxx;
+ stored[3] = (float) setup->scissor.current.maxy;
+
+ setup->scissor.stored = stored;
+
+ setup->fs.current.jit_context.scissor_xmin = stored[0];
+ setup->fs.current.jit_context.scissor_ymin = stored[1];
+ setup->fs.current.jit_context.scissor_xmax = stored[2];
+ setup->fs.current.jit_context.scissor_ymax = stored[3];
+ }
+
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+
+ if(setup->dirty & LP_SETUP_NEW_CONSTANTS) {
+ struct pipe_resource *buffer = setup->constants.current;
+
+ if(buffer) {
+ unsigned current_size = buffer->width0;
+ const void *current_data = llvmpipe_resource_data(buffer);
+
+ /* TODO: copy only the actually used constants? */
+
+ if(setup->constants.stored_size != current_size ||
+ !setup->constants.stored_data ||
+ memcmp(setup->constants.stored_data,
+ current_data,
+ current_size) != 0) {
+ void *stored;
+
+ stored = lp_scene_alloc(scene, current_size);
+ if(stored) {
+ memcpy(stored,
+ current_data,
+ current_size);
+ setup->constants.stored_size = current_size;
+ setup->constants.stored_data = stored;
+ }
+ }
+ }
+ else {
+ setup->constants.stored_size = 0;
+ setup->constants.stored_data = NULL;
+ }
+
+ setup->fs.current.jit_context.constants = setup->constants.stored_data;
+ setup->dirty |= LP_SETUP_NEW_FS;
+ }
+
+
+ if(setup->dirty & LP_SETUP_NEW_FS) {
+ if(!setup->fs.stored ||
+ memcmp(setup->fs.stored,
+ &setup->fs.current,
+ sizeof setup->fs.current) != 0) {
+ /* The fs state that's been stored in the scene is different from
+ * the new, current state. So allocate a new lp_rast_state object
+ * and append it to the bin's setup data buffer.
+ */
+ uint i;
+ struct lp_rast_state *stored =
+ (struct lp_rast_state *) lp_scene_alloc(scene, sizeof *stored);
+ if(stored) {
+ memcpy(stored,
+ &setup->fs.current,
+ sizeof setup->fs.current);
+ setup->fs.stored = stored;
+
+ /* put the state-set command into all bins */
+ lp_scene_bin_state_command( scene,
+ lp_rast_set_state,
+ lp_rast_arg_state(setup->fs.stored) );
+ }
+
+ /* The scene now references the textures in the rasterization
+ * state record. Note that now.
+ */
+ for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ if (setup->fs.current_tex[i])
+ lp_scene_add_resource_reference(scene, setup->fs.current_tex[i]);
+ }
+ }
+ }
+
+ setup->dirty = 0;
+
+ assert(setup->fs.stored);
+}
+
+
+
+/* Only caller is lp_setup_vbuf_destroy()
+ */
+void
+lp_setup_destroy( struct lp_setup_context *setup )
+{
+ uint i;
+
+ reset_context( setup );
+
+ util_unreference_framebuffer_state(&setup->fb);
+
+ for (i = 0; i < Elements(setup->fs.current_tex); i++) {
+ pipe_resource_reference(&setup->fs.current_tex[i], NULL);
+ }
+
+ pipe_resource_reference(&setup->constants.current, NULL);
+
+ /* free the scenes in the 'empty' queue */
+ while (1) {
+ struct lp_scene *scene = lp_scene_dequeue(setup->empty_scenes, FALSE);
+ if (!scene)
+ break;
+ lp_scene_destroy(scene);
+ }
+
+ lp_scene_queue_destroy(setup->empty_scenes);
+
+ lp_rast_destroy( setup->rast );
+
+ FREE( setup );
+}
+
+
+/**
+ * Create a new primitive tiling engine. Plug it into the backend of
+ * the draw module. Currently also creates a rasterizer to use with
+ * it.
+ */
+struct lp_setup_context *
+lp_setup_create( struct pipe_context *pipe,
+ struct draw_context *draw )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+ struct lp_setup_context *setup = CALLOC_STRUCT(lp_setup_context);
+ unsigned i;
+
+ if (!setup)
+ return NULL;
+
+ lp_setup_init_vbuf(setup);
+
+ setup->empty_scenes = lp_scene_queue_create();
+ if (!setup->empty_scenes)
+ goto fail;
+
+ /* XXX: move this to the screen and share between contexts:
+ */
+ setup->num_threads = screen->num_threads;
+ setup->rast = lp_rast_create(screen->num_threads);
+ if (!setup->rast)
+ goto fail;
+
+ setup->vbuf = draw_vbuf_stage(draw, &setup->base);
+ if (!setup->vbuf)
+ goto fail;
+
+ draw_set_rasterize_stage(draw, setup->vbuf);
+ draw_set_render(draw, &setup->base);
+
+ /* create some empty scenes */
+ for (i = 0; i < MAX_SCENES; i++) {
+ setup->scenes[i] = lp_scene_create( pipe, setup->empty_scenes );
+
+ lp_scene_enqueue(setup->empty_scenes, setup->scenes[i]);
+ }
+
+ setup->triangle = first_triangle;
+ setup->line = first_line;
+ setup->point = first_point;
+
+ setup->dirty = ~0;
+
+ return setup;
+
+fail:
+ if (setup->rast)
+ lp_rast_destroy( setup->rast );
+
+ if (setup->vbuf)
+ ;
+
+ if (setup->empty_scenes)
+ lp_scene_queue_destroy(setup->empty_scenes);
+
+ FREE(setup);
+ return NULL;
+}
+
+
+/**
+ * Put a BeginQuery command into all bins.
+ */
+void
+lp_setup_begin_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq)
+{
+ struct lp_scene * scene = lp_setup_get_current_scene(setup);
+ union lp_rast_cmd_arg cmd_arg;
+
+ /* init the query to its beginning state */
+ pq->done = FALSE;
+ pq->tile_count = 0;
+ pq->num_tiles = scene->tiles_x * scene->tiles_y;
+ assert(pq->num_tiles > 0);
+
+ memset(pq->count, 0, sizeof(pq->count)); /* reset all counters */
+
+ cmd_arg.query_obj = pq;
+ lp_scene_bin_everywhere(scene, lp_rast_begin_query, cmd_arg);
+ pq->binned = TRUE;
+}
+
+
+/**
+ * Put an EndQuery command into all bins.
+ */
+void
+lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq)
+{
+ struct lp_scene * scene = lp_setup_get_current_scene(setup);
+ union lp_rast_cmd_arg cmd_arg;
+
+ cmd_arg.query_obj = pq;
+ lp_scene_bin_everywhere(scene, lp_rast_end_query, cmd_arg);
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
new file mode 100644
index 0000000000..6a0dc55129
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -0,0 +1,156 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef LP_SETUP_H
+#define LP_SETUP_H
+
+#include "pipe/p_compiler.h"
+#include "lp_jit.h"
+
+struct draw_context;
+struct vertex_info;
+
+enum lp_interp {
+ LP_INTERP_CONSTANT,
+ LP_INTERP_LINEAR,
+ LP_INTERP_PERSPECTIVE,
+ LP_INTERP_POSITION,
+ LP_INTERP_FACING
+};
+
+
+/**
+ * Describes how to compute the interpolation coefficients (a0, dadx, dady)
+ * from the vertices passed into our triangle/line/point functions by the
+ * draw module.
+ *
+ * Vertices are treated as an array of float[4] values, indexed by
+ * src_index.
+ */
+struct lp_shader_input {
+ enum lp_interp interp; /* how to interpolate values */
+ unsigned src_index; /* where to find values in incoming vertices */
+ unsigned usage_mask; /* bitmask of TGSI_WRITEMASK_x flags */
+};
+
+struct pipe_resource;
+struct pipe_query;
+struct pipe_surface;
+struct pipe_blend_color;
+struct pipe_screen;
+struct pipe_framebuffer_state;
+struct lp_fragment_shader_variant;
+struct lp_jit_context;
+struct llvmpipe_query;
+
+
+struct lp_setup_context *
+lp_setup_create( struct pipe_context *pipe,
+ struct draw_context *draw );
+
+void
+lp_setup_clear(struct lp_setup_context *setup,
+ const float *clear_color,
+ double clear_depth,
+ unsigned clear_stencil,
+ unsigned flags);
+
+struct pipe_fence_handle *
+lp_setup_fence( struct lp_setup_context *setup );
+
+
+void
+lp_setup_flush( struct lp_setup_context *setup,
+ unsigned flags );
+
+
+void
+lp_setup_bind_framebuffer( struct lp_setup_context *setup,
+ const struct pipe_framebuffer_state *fb );
+
+void
+lp_setup_set_triangle_state( struct lp_setup_context *setup,
+ unsigned cullmode,
+ boolean front_is_ccw,
+ boolean scissor,
+ boolean gl_rasterization_rules );
+
+void
+lp_setup_set_fs_inputs( struct lp_setup_context *setup,
+ const struct lp_shader_input *interp,
+ unsigned nr );
+
+void
+lp_setup_set_fs_variant( struct lp_setup_context *setup,
+ struct lp_fragment_shader_variant *variant );
+
+void
+lp_setup_set_fs_constants(struct lp_setup_context *setup,
+ struct pipe_resource *buffer);
+
+
+void
+lp_setup_set_alpha_ref_value( struct lp_setup_context *setup,
+ float alpha_ref_value );
+
+void
+lp_setup_set_stencil_ref_values( struct lp_setup_context *setup,
+ const ubyte refs[2] );
+
+void
+lp_setup_set_blend_color( struct lp_setup_context *setup,
+ const struct pipe_blend_color *blend_color );
+
+void
+lp_setup_set_scissor( struct lp_setup_context *setup,
+ const struct pipe_scissor_state *scissor );
+
+void
+lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
+ unsigned num,
+ struct pipe_sampler_view **views);
+
+unsigned
+lp_setup_is_resource_referenced( const struct lp_setup_context *setup,
+ const struct pipe_resource *texture );
+
+void
+lp_setup_set_flatshade_first( struct lp_setup_context *setup,
+ boolean flatshade_first );
+
+void
+lp_setup_set_vertex_info( struct lp_setup_context *setup,
+ struct vertex_info *info );
+
+void
+lp_setup_begin_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq);
+
+void
+lp_setup_end_query(struct lp_setup_context *setup,
+ struct llvmpipe_query *pq);
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
new file mode 100644
index 0000000000..c8b8a2480b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -0,0 +1,162 @@
+/**************************************************************************
+ *
+ * Copyright 2007-2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * The setup code is concerned with point/line/triangle setup and
+ * putting commands/data into the bins.
+ */
+
+
+#ifndef LP_SETUP_CONTEXT_H
+#define LP_SETUP_CONTEXT_H
+
+#include "lp_setup.h"
+#include "lp_rast.h"
+#include "lp_tile_soa.h" /* for TILE_SIZE */
+#include "lp_scene.h"
+
+#include "draw/draw_vbuf.h"
+
+#define LP_SETUP_NEW_FS 0x01
+#define LP_SETUP_NEW_CONSTANTS 0x02
+#define LP_SETUP_NEW_BLEND_COLOR 0x04
+#define LP_SETUP_NEW_SCISSOR 0x08
+
+
+struct lp_scene_queue;
+
+
+/** Max number of scenes */
+#define MAX_SCENES 2
+
+
+
+/**
+ * Point/line/triangle setup context.
+ * Note: "stored" below indicates data which is stored in the bins,
+ * not arbitrary malloc'd memory.
+ *
+ *
+ * Subclass of vbuf_render, plugged directly into the draw module as
+ * the rendering backend.
+ */
+struct lp_setup_context
+{
+ struct vbuf_render base;
+
+ struct vertex_info *vertex_info;
+ uint prim;
+ uint vertex_size;
+ uint nr_vertices;
+ uint vertex_buffer_size;
+ void *vertex_buffer;
+
+ /* Final pipeline stage for draw module. Draw module should
+ * create/install this itself now.
+ */
+ struct draw_stage *vbuf;
+ unsigned num_threads;
+ struct lp_rasterizer *rast;
+ struct lp_scene *scenes[MAX_SCENES]; /**< all the scenes */
+ struct lp_scene *scene; /**< current scene being built */
+ struct lp_scene_queue *empty_scenes; /**< queue of empty scenes */
+
+ boolean flatshade_first;
+ boolean ccw_is_frontface;
+ boolean scissor_test;
+ unsigned cullmode;
+ float pixel_offset;
+
+ struct pipe_framebuffer_state fb;
+
+ struct {
+ unsigned flags;
+ union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */
+ struct lp_rast_clearzs clearzs; /**< lp_rast_clear_zstencil() cmd */
+ } clear;
+
+ enum setup_state {
+ SETUP_FLUSHED,
+ SETUP_CLEARED,
+ SETUP_ACTIVE
+ } state;
+
+ struct {
+ struct lp_shader_input input[PIPE_MAX_ATTRIBS];
+ unsigned nr_inputs;
+
+ const struct lp_rast_state *stored; /**< what's in the scene */
+ struct lp_rast_state current; /**< currently set state */
+ struct pipe_resource *current_tex[PIPE_MAX_SAMPLERS];
+ } fs;
+
+ /** fragment shader constants */
+ struct {
+ struct pipe_resource *current;
+ unsigned stored_size;
+ const void *stored_data;
+ } constants;
+
+ struct {
+ struct pipe_blend_color current;
+ uint8_t *stored;
+ } blend_color;
+
+ struct {
+ struct pipe_scissor_state current;
+ const void *stored;
+ } scissor;
+
+ unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */
+
+ void (*point)( struct lp_setup_context *,
+ const float (*v0)[4]);
+
+ void (*line)( struct lp_setup_context *,
+ const float (*v0)[4],
+ const float (*v1)[4]);
+
+ void (*triangle)( struct lp_setup_context *,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4]);
+};
+
+void lp_setup_choose_triangle( struct lp_setup_context *setup );
+void lp_setup_choose_line( struct lp_setup_context *setup );
+void lp_setup_choose_point( struct lp_setup_context *setup );
+
+struct lp_scene *lp_setup_get_current_scene(struct lp_setup_context *setup);
+
+void lp_setup_init_vbuf(struct lp_setup_context *setup);
+
+void lp_setup_update_state( struct lp_setup_context *setup );
+
+void lp_setup_destroy( struct lp_setup_context *setup );
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
new file mode 100644
index 0000000000..be41c44e6f
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for lines
+ */
+
+#include "lp_setup_context.h"
+
+static void line_nop( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4] )
+{
+}
+
+
+void
+lp_setup_choose_line( struct lp_setup_context *setup )
+{
+ setup->line = line_nop;
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
new file mode 100644
index 0000000000..9f69e6c5ce
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -0,0 +1,46 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for points
+ */
+
+#include "lp_setup_context.h"
+
+static void point_nop( struct lp_setup_context *setup,
+ const float (*v0)[4] )
+{
+}
+
+
+void
+lp_setup_choose_point( struct lp_setup_context *setup )
+{
+ setup->point = point_nop;
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
new file mode 100644
index 0000000000..0557d35f8b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -0,0 +1,723 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Binning code for triangles
+ */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
+
+#define NUM_CHANNELS 4
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ unsigned slot,
+ const float value,
+ unsigned i )
+{
+ tri->inputs.a0[slot][i] = value;
+ tri->inputs.dadx[slot][i] = 0.0f;
+ tri->inputs.dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
+ unsigned slot,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ unsigned vert_attr,
+ unsigned i)
+{
+ float a1 = v1[vert_attr][i];
+ float a2 = v2[vert_attr][i];
+ float a3 = v3[vert_attr][i];
+
+ float da12 = a1 - a2;
+ float da31 = a3 - a1;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+
+ /* calculate a0 as the value which would be sampled for the
+ * fragment at (0,0), taking into account that we want to sample at
+ * pixel centers, in other words (0.5, 0.5).
+ *
+ * this is neat but unfortunately not a good way to do things for
+ * triangles with very large values of dadx or dady as it will
+ * result in the subtraction and re-addition from a0 of a very
+ * large number, which means we'll end up loosing a lot of the
+ * fractional bits and precision from a0. the way to fix this is
+ * to define a0 as the sample at a pixel center somewhere near vmin
+ * instead - i'll switch to this later.
+ */
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (v1[0][0] - setup->pixel_offset) +
+ dady * (v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
+ unsigned slot,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ unsigned vert_attr,
+ unsigned i)
+{
+ /* premultiply by 1/w (v[0][3] is always 1/w):
+ */
+ float a1 = v1[vert_attr][i] * v1[0][3];
+ float a2 = v2[vert_attr][i] * v2[0][3];
+ float a3 = v3[vert_attr][i] * v3[0][3];
+ float da12 = a1 - a2;
+ float da31 = a3 - a1;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
+
+ tri->inputs.dadx[slot][i] = dadx;
+ tri->inputs.dady[slot][i] = dady;
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (v1[0][0] - setup->pixel_offset) +
+ dady * (v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Special coefficient setup for gl_FragCoord.
+ * X and Y are trivial
+ * Z and W are copied from position_coef which should have already been computed.
+ * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
+ */
+static void
+setup_fragcoord_coef(struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
+ unsigned slot,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ unsigned usage_mask)
+{
+ /*X*/
+ if (usage_mask & TGSI_WRITEMASK_X) {
+ tri->inputs.a0[slot][0] = 0.0;
+ tri->inputs.dadx[slot][0] = 1.0;
+ tri->inputs.dady[slot][0] = 0.0;
+ }
+
+ /*Y*/
+ if (usage_mask & TGSI_WRITEMASK_Y) {
+ tri->inputs.a0[slot][1] = 0.0;
+ tri->inputs.dadx[slot][1] = 0.0;
+ tri->inputs.dady[slot][1] = 1.0;
+ }
+
+ /*Z*/
+ if (usage_mask & TGSI_WRITEMASK_Z) {
+ linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
+ }
+
+ /*W*/
+ if (usage_mask & TGSI_WRITEMASK_W) {
+ linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
+ }
+}
+
+
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ unsigned slot,
+ boolean frontface,
+ unsigned usage_mask)
+{
+ /* convert TRUE to 1.0 and FALSE to -1.0 */
+ if (usage_mask & TGSI_WRITEMASK_X)
+ constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
+
+ if (usage_mask & TGSI_WRITEMASK_Y)
+ constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_Z)
+ constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
+
+ if (usage_mask & TGSI_WRITEMASK_W)
+ constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
+}
+
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_tri_coefficients( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ boolean frontface)
+{
+ unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+ unsigned slot;
+
+ /* setup interpolation for all the remaining attributes:
+ */
+ for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+ unsigned vert_attr = setup->fs.input[slot].src_index;
+ unsigned usage_mask = setup->fs.input[slot].usage_mask;
+ unsigned i;
+
+ switch (setup->fs.input[slot].interp) {
+ case LP_INTERP_CONSTANT:
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
+ }
+ break;
+
+ case LP_INTERP_LINEAR:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ break;
+
+ case LP_INTERP_PERSPECTIVE:
+ for (i = 0; i < NUM_CHANNELS; i++)
+ if (usage_mask & (1 << i))
+ perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
+ fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+ break;
+
+ case LP_INTERP_POSITION:
+ /*
+ * The generated pixel interpolators will pick up the coeffs from
+ * slot 0, so all need to ensure that the usage mask is covers all
+ * usages.
+ */
+ fragcoord_usage_mask |= usage_mask;
+ break;
+
+ case LP_INTERP_FACING:
+ setup_facing_coef(setup, tri, slot+1, frontface, usage_mask);
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ /* The internal position input is in slot zero:
+ */
+ setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3,
+ fragcoord_usage_mask);
+}
+
+
+
+static INLINE int subpixel_snap( float a )
+{
+ return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
+}
+
+
+
+/**
+ * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
+ * immediately after it.
+ * The memory is allocated from the per-scene pool, not per-tile.
+ * \param tri_size returns number of bytes allocated
+ * \param nr_inputs number of fragment shader inputs
+ * \return pointer to triangle space
+ */
+static INLINE struct lp_rast_triangle *
+alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+{
+ unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ struct lp_rast_triangle *tri;
+ unsigned bytes;
+ char *inputs;
+
+ assert(sizeof(*tri) % 16 == 0);
+
+ bytes = sizeof(*tri) + (3 * input_array_sz);
+
+ tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+
+ if (tri) {
+ inputs = (char *) (tri + 1);
+ tri->inputs.a0 = (float (*)[4]) inputs;
+ tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
+ tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+
+ *tri_size = bytes;
+ }
+
+ return tri;
+}
+
+
+/**
+ * Print triangle vertex attribs (for debug).
+ */
+static void
+print_triangle(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4])
+{
+ uint i;
+
+ debug_printf("llvmpipe triangle\n");
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v1[%d]: %f %f %f %f\n", i,
+ v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+ }
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v2[%d]: %f %f %f %f\n", i,
+ v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+ }
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v3[%d]: %f %f %f %f\n", i,
+ v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
+ }
+}
+
+
+/**
+ * Do basic setup for triangle rasterization and determine which
+ * framebuffer tiles are touched. Put the triangle in the scene's
+ * bins for the tiles which we overlap.
+ */
+static void
+do_triangle_ccw(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4],
+ boolean frontfacing )
+{
+ /* x/y positions in fixed point */
+ const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
+ const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
+ const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
+ const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
+ const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
+ const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
+
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *tri;
+ int area;
+ float oneoverarea;
+ int minx, maxx, miny, maxy;
+ unsigned tri_bytes;
+
+ if (0)
+ print_triangle(setup, v1, v2, v3);
+
+ tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
+ if (!tri)
+ return;
+
+#ifdef DEBUG
+ tri->v[0][0] = v1[0][0];
+ tri->v[1][0] = v2[0][0];
+ tri->v[2][0] = v3[0][0];
+ tri->v[0][1] = v1[0][1];
+ tri->v[1][1] = v2[0][1];
+ tri->v[2][1] = v3[0][1];
+#endif
+
+ tri->dx12 = x1 - x2;
+ tri->dx23 = x2 - x3;
+ tri->dx31 = x3 - x1;
+
+ tri->dy12 = y1 - y2;
+ tri->dy23 = y2 - y3;
+ tri->dy31 = y3 - y1;
+
+ area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
+
+ LP_COUNT(nr_tris);
+
+ /* Cull non-ccw and zero-sized triangles.
+ *
+ * XXX: subject to overflow??
+ */
+ if (area <= 0) {
+ lp_scene_putback_data( scene, tri_bytes );
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ /* Bounding rectangle (in pixels) */
+ minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+
+ if (setup->scissor_test) {
+ minx = MAX2(minx, setup->scissor.current.minx);
+ maxx = MIN2(maxx, setup->scissor.current.maxx);
+ miny = MAX2(miny, setup->scissor.current.miny);
+ maxy = MIN2(maxy, setup->scissor.current.maxy);
+ }
+
+ if (miny == maxy ||
+ minx == maxx) {
+ lp_scene_putback_data( scene, tri_bytes );
+ LP_COUNT(nr_culled_tris);
+ return;
+ }
+
+ /*
+ */
+ oneoverarea = ((float)FIXED_ONE) / (float)area;
+
+ /* Setup parameter interpolants:
+ */
+ setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
+
+ tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
+
+ /* half-edge constants, will be interated over the whole render target.
+ */
+ tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
+ tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
+ tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
+
+ /* correct for top-left fill convention:
+ */
+ if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
+ if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
+ if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
+
+ tri->dy12 *= FIXED_ONE;
+ tri->dy23 *= FIXED_ONE;
+ tri->dy31 *= FIXED_ONE;
+
+ tri->dx12 *= FIXED_ONE;
+ tri->dx23 *= FIXED_ONE;
+ tri->dx31 *= FIXED_ONE;
+
+ /* find trivial reject offsets for each edge for a single-pixel
+ * sized block. These will be scaled up at each recursive level to
+ * match the active blocksize. Scaling in this way works best if
+ * the blocks are square.
+ */
+ tri->eo1 = 0;
+ if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
+ if (tri->dx12 > 0) tri->eo1 += tri->dx12;
+
+ tri->eo2 = 0;
+ if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
+ if (tri->dx23 > 0) tri->eo2 += tri->dx23;
+
+ tri->eo3 = 0;
+ if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
+ if (tri->dx31 > 0) tri->eo3 += tri->dx31;
+
+ /* Calculate trivial accept offsets from the above.
+ */
+ tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
+ tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
+ tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
+
+ /* Fill in the inputs.step[][] arrays.
+ * We've manually unrolled some loops here.
+ */
+ {
+ const int xstep1 = -tri->dy12;
+ const int xstep2 = -tri->dy23;
+ const int xstep3 = -tri->dy31;
+ const int ystep1 = tri->dx12;
+ const int ystep2 = tri->dx23;
+ const int ystep3 = tri->dx31;
+
+#define SETUP_STEP(i, x, y) \
+ do { \
+ tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
+ tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
+ tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
+ } while (0)
+
+ SETUP_STEP(0, 0, 0);
+ SETUP_STEP(1, 1, 0);
+ SETUP_STEP(2, 0, 1);
+ SETUP_STEP(3, 1, 1);
+
+ SETUP_STEP(4, 2, 0);
+ SETUP_STEP(5, 3, 0);
+ SETUP_STEP(6, 2, 1);
+ SETUP_STEP(7, 3, 1);
+
+ SETUP_STEP(8, 0, 2);
+ SETUP_STEP(9, 1, 2);
+ SETUP_STEP(10, 0, 3);
+ SETUP_STEP(11, 1, 3);
+
+ SETUP_STEP(12, 2, 2);
+ SETUP_STEP(13, 3, 2);
+ SETUP_STEP(14, 2, 3);
+ SETUP_STEP(15, 3, 3);
+#undef STEP
+ }
+
+ /*
+ * All fields of 'tri' are now set. The remaining code here is
+ * concerned with binning.
+ */
+
+ /* Convert to tile coordinates:
+ */
+ minx = minx / TILE_SIZE;
+ miny = miny / TILE_SIZE;
+ maxx = maxx / TILE_SIZE;
+ maxy = maxy / TILE_SIZE;
+
+ /*
+ * Clamp to framebuffer size
+ */
+ minx = MAX2(minx, 0);
+ miny = MAX2(miny, 0);
+ maxx = MIN2(maxx, scene->tiles_x - 1);
+ maxy = MIN2(maxy, scene->tiles_y - 1);
+
+ /* Determine which tile(s) intersect the triangle's bounding box
+ */
+ if (miny == maxy && minx == maxx)
+ {
+ /* Triangle is contained in a single tile:
+ */
+ lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
+ lp_rast_arg_triangle(tri) );
+ }
+ else
+ {
+ int c1 = (tri->c1 +
+ tri->dx12 * miny * TILE_SIZE -
+ tri->dy12 * minx * TILE_SIZE);
+ int c2 = (tri->c2 +
+ tri->dx23 * miny * TILE_SIZE -
+ tri->dy23 * minx * TILE_SIZE);
+ int c3 = (tri->c3 +
+ tri->dx31 * miny * TILE_SIZE -
+ tri->dy31 * minx * TILE_SIZE);
+
+ int ei1 = tri->ei1 << TILE_ORDER;
+ int ei2 = tri->ei2 << TILE_ORDER;
+ int ei3 = tri->ei3 << TILE_ORDER;
+
+ int eo1 = tri->eo1 << TILE_ORDER;
+ int eo2 = tri->eo2 << TILE_ORDER;
+ int eo3 = tri->eo3 << TILE_ORDER;
+
+ int xstep1 = -(tri->dy12 << TILE_ORDER);
+ int xstep2 = -(tri->dy23 << TILE_ORDER);
+ int xstep3 = -(tri->dy31 << TILE_ORDER);
+
+ int ystep1 = tri->dx12 << TILE_ORDER;
+ int ystep2 = tri->dx23 << TILE_ORDER;
+ int ystep3 = tri->dx31 << TILE_ORDER;
+ int x, y;
+
+
+ /* Test tile-sized blocks against the triangle.
+ * Discard blocks fully outside the tri. If the block is fully
+ * contained inside the tri, bin an lp_rast_shade_tile command.
+ * Else, bin a lp_rast_triangle command.
+ */
+ for (y = miny; y <= maxy; y++)
+ {
+ int cx1 = c1;
+ int cx2 = c2;
+ int cx3 = c3;
+ boolean in = FALSE; /* are we inside the triangle? */
+
+ for (x = minx; x <= maxx; x++)
+ {
+ if (cx1 + eo1 < 0 ||
+ cx2 + eo2 < 0 ||
+ cx3 + eo3 < 0)
+ {
+ /* do nothing */
+ LP_COUNT(nr_empty_64);
+ if (in)
+ break; /* exiting triangle, all done with this row */
+ }
+ else if (cx1 + ei1 > 0 &&
+ cx2 + ei2 > 0 &&
+ cx3 + ei3 > 0)
+ {
+ /* triangle covers the whole tile- shade whole tile */
+ LP_COUNT(nr_fully_covered_64);
+ in = TRUE;
+ if (setup->fs.current.variant->opaque) {
+ lp_scene_bin_reset( scene, x, y );
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_set_state,
+ lp_rast_arg_state(setup->fs.stored) );
+ }
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_shade_tile,
+ lp_rast_arg_inputs(&tri->inputs) );
+ }
+ else
+ {
+ /* rasterizer/shade partial tile */
+ LP_COUNT(nr_partially_covered_64);
+ in = TRUE;
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_triangle,
+ lp_rast_arg_triangle(tri) );
+ }
+
+ /* Iterate cx values across the region:
+ */
+ cx1 += xstep1;
+ cx2 += xstep2;
+ cx3 += xstep3;
+ }
+
+ /* Iterate c values down the region:
+ */
+ c1 += ystep1;
+ c2 += ystep2;
+ c3 += ystep3;
+ }
+ }
+}
+
+
+/**
+ * Draw triangle if it's CW, cull otherwise.
+ */
+static void triangle_cw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
+}
+
+
+/**
+ * Draw triangle if it's CCW, cull otherwise.
+ */
+static void triangle_ccw( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
+}
+
+
+
+/**
+ * Draw triangle whether it's CW or CCW.
+ */
+static void triangle_both( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ const float ex = v0[0][0] - v2[0][0];
+ const float ey = v0[0][1] - v2[0][1];
+ const float fx = v1[0][0] - v2[0][0];
+ const float fy = v1[0][1] - v2[0][1];
+
+ /* det = cross(e,f).z */
+ if (ex * fy - ey * fx < 0.0f)
+ triangle_ccw( setup, v0, v1, v2 );
+ else
+ triangle_cw( setup, v0, v1, v2 );
+}
+
+
+static void triangle_nop( struct lp_setup_context *setup,
+ const float (*v0)[4],
+ const float (*v1)[4],
+ const float (*v2)[4] )
+{
+}
+
+
+void
+lp_setup_choose_triangle( struct lp_setup_context *setup )
+{
+ switch (setup->cullmode) {
+ case PIPE_FACE_NONE:
+ setup->triangle = triangle_both;
+ break;
+ case PIPE_FACE_BACK:
+ setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
+ break;
+ case PIPE_FACE_FRONT:
+ setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
+ break;
+ default:
+ setup->triangle = triangle_nop;
+ break;
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
new file mode 100644
index 0000000000..f6a424f25a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -0,0 +1,550 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Interface between 'draw' module's output and the llvmpipe rasterizer/setup
+ * code. When the 'draw' module has finished filling a vertex buffer, the
+ * draw_arrays() functions below will be called. Loop over the vertices and
+ * call the point/line/tri setup functions.
+ *
+ * Authors
+ * Brian Paul
+ */
+
+
+#include "lp_setup_context.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "util/u_memory.h"
+
+
+#define LP_MAX_VBUF_INDEXES 1024
+#define LP_MAX_VBUF_SIZE 4096
+
+
+
+/** cast wrapper */
+static struct lp_setup_context *
+lp_setup_context(struct vbuf_render *vbr)
+{
+ return (struct lp_setup_context *) vbr;
+}
+
+
+
+static const struct vertex_info *
+lp_setup_get_vertex_info(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+
+ /* vertex size/info depends on the latest state */
+ lp_setup_update_state(setup);
+
+ return setup->vertex_info;
+}
+
+
+static boolean
+lp_setup_allocate_vertices(struct vbuf_render *vbr,
+ ushort vertex_size, ushort nr_vertices)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ unsigned size = vertex_size * nr_vertices;
+
+ if (setup->vertex_buffer_size < size) {
+ align_free(setup->vertex_buffer);
+ setup->vertex_buffer = align_malloc(size, 16);
+ setup->vertex_buffer_size = size;
+ }
+
+ setup->vertex_size = vertex_size;
+ setup->nr_vertices = nr_vertices;
+
+ return setup->vertex_buffer != NULL;
+}
+
+static void
+lp_setup_release_vertices(struct vbuf_render *vbr)
+{
+ /* keep the old allocation for next time */
+}
+
+static void *
+lp_setup_map_vertices(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ return setup->vertex_buffer;
+}
+
+static void
+lp_setup_unmap_vertices(struct vbuf_render *vbr,
+ ushort min_index,
+ ushort max_index )
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ assert( setup->vertex_buffer_size >= (max_index+1) * setup->vertex_size );
+ /* do nothing */
+}
+
+
+static boolean
+lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim)
+{
+ lp_setup_context(vbr)->prim = prim;
+ return TRUE;
+}
+
+typedef const float (*const_float4_ptr)[4];
+
+static INLINE const_float4_ptr get_vert( const void *vertex_buffer,
+ int index,
+ int stride )
+{
+ return (const_float4_ptr)((char *)vertex_buffer + index * stride);
+}
+
+/**
+ * draw elements / indexed primitives
+ */
+static void
+lp_setup_draw_elements(struct vbuf_render *vbr, const ushort *indices, uint nr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ const unsigned stride = setup->vertex_info->size * sizeof(float);
+ const void *vertex_buffer = setup->vertex_buffer;
+ const boolean flatshade_first = setup->flatshade_first;
+ unsigned i;
+
+ lp_setup_update_state(setup);
+
+ switch (setup->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup->point( setup,
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ if (nr) {
+ setup->line( setup,
+ get_vert(vertex_buffer, indices[nr-1], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first triangle vertex as first triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i+(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)], stride) );
+
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last triangle vertex as last triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i+(i&1)-2], stride),
+ get_vert(vertex_buffer, indices[i-(i&1)-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first non-spoke vertex as first vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last non-spoke vertex as last vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ /* GL quads don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride) );
+
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ /* GL quad strips don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-3], stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-2], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-3], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ /* Almost same as tri fan but the _first_ vertex specifies the flat
+ * shading color.
+ */
+ if (flatshade_first) {
+ /* emit first polygon vertex as first triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[0], stride),
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride) );
+ }
+ }
+ else {
+ /* emit first polygon vertex as last triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, indices[i-1], stride),
+ get_vert(vertex_buffer, indices[i-0], stride),
+ get_vert(vertex_buffer, indices[0], stride) );
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+}
+
+
+/**
+ * This function is hit when the draw module is working in pass-through mode.
+ * It's up to us to convert the vertex array into point/line/tri prims.
+ */
+static void
+lp_setup_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ const unsigned stride = setup->vertex_info->size * sizeof(float);
+ const void *vertex_buffer =
+ (void *) get_vert(setup->vertex_buffer, start, stride);
+ const boolean flatshade_first = setup->flatshade_first;
+ unsigned i;
+
+ lp_setup_update_state(setup);
+
+ switch (setup->prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < nr; i++) {
+ setup->point( setup,
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 1; i < nr; i += 2) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ for (i = 1; i < nr; i ++) {
+ setup->line( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ if (nr) {
+ setup->line( setup,
+ get_vert(vertex_buffer, nr-1, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ for (i = 2; i < nr; i += 3) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i++) {
+ /* emit first triangle vertex as first triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i+(i&1)-1, stride),
+ get_vert(vertex_buffer, i-(i&1), stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i++) {
+ /* emit last triangle vertex as last triangle vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i+(i&1)-2, stride),
+ get_vert(vertex_buffer, i-(i&1)-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (flatshade_first) {
+ for (i = 2; i < nr; i += 1) {
+ /* emit first non-spoke vertex as first vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ }
+ else {
+ for (i = 2; i < nr; i += 1) {
+ /* emit last non-spoke vertex as last vertex */
+ setup->triangle( setup,
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUADS:
+ /* GL quads don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 4) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ /* GL quad strips don't follow provoking vertex convention */
+ if (flatshade_first) {
+ /* emit last quad vertex as first triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-3, stride) );
+ }
+ }
+ else {
+ /* emit last quad vertex as last triangle vertex */
+ for (i = 3; i < nr; i += 2) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-2, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-3, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ /* Almost same as tri fan but the _first_ vertex specifies the flat
+ * shading color.
+ */
+ if (flatshade_first) {
+ /* emit first polygon vertex as first triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, 0, stride),
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride) );
+ }
+ }
+ else {
+ /* emit first polygon vertex as last triangle vertex */
+ for (i = 2; i < nr; i += 1) {
+ setup->triangle( setup,
+ get_vert(vertex_buffer, i-1, stride),
+ get_vert(vertex_buffer, i-0, stride),
+ get_vert(vertex_buffer, 0, stride) );
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ }
+}
+
+
+
+static void
+lp_setup_vbuf_destroy(struct vbuf_render *vbr)
+{
+ struct lp_setup_context *setup = lp_setup_context(vbr);
+ if (setup->vertex_buffer) {
+ align_free(setup->vertex_buffer);
+ setup->vertex_buffer = NULL;
+ }
+ lp_setup_destroy(setup);
+}
+
+
+/**
+ * Create the post-transform vertex handler for the given context.
+ */
+void
+lp_setup_init_vbuf(struct lp_setup_context *setup)
+{
+ setup->base.max_indices = LP_MAX_VBUF_INDEXES;
+ setup->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
+
+ setup->base.get_vertex_info = lp_setup_get_vertex_info;
+ setup->base.allocate_vertices = lp_setup_allocate_vertices;
+ setup->base.map_vertices = lp_setup_map_vertices;
+ setup->base.unmap_vertices = lp_setup_unmap_vertices;
+ setup->base.set_primitive = lp_setup_set_primitive;
+ setup->base.draw_elements = lp_setup_draw_elements;
+ setup->base.draw_arrays = lp_setup_draw_arrays;
+ setup->base.release_vertices = lp_setup_release_vertices;
+ setup->base.destroy = lp_setup_vbuf_destroy;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
new file mode 100644
index 0000000000..05d1b93794
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LP_STATE_H
+#define LP_STATE_H
+
+#include "pipe/p_state.h"
+#include "lp_jit.h"
+#include "lp_state_fs.h"
+#include "gallivm/lp_bld.h"
+
+
+#define LP_NEW_VIEWPORT 0x1
+#define LP_NEW_RASTERIZER 0x2
+#define LP_NEW_FS 0x4
+#define LP_NEW_BLEND 0x8
+#define LP_NEW_CLIP 0x10
+#define LP_NEW_SCISSOR 0x20
+#define LP_NEW_STIPPLE 0x40
+#define LP_NEW_FRAMEBUFFER 0x80
+#define LP_NEW_DEPTH_STENCIL_ALPHA 0x100
+#define LP_NEW_CONSTANTS 0x200
+#define LP_NEW_SAMPLER 0x400
+#define LP_NEW_SAMPLER_VIEW 0x800
+#define LP_NEW_VERTEX 0x1000
+#define LP_NEW_VS 0x2000
+#define LP_NEW_QUERY 0x4000
+#define LP_NEW_BLEND_COLOR 0x8000
+#define LP_NEW_GS 0x10000
+#define LP_NEW_SO 0x20000
+#define LP_NEW_SO_BUFFERS 0x40000
+
+
+
+struct vertex_info;
+struct pipe_context;
+struct llvmpipe_context;
+
+
+
+/** Subclass of pipe_shader_state */
+struct lp_vertex_shader
+{
+ struct pipe_shader_state shader;
+ struct draw_vertex_shader *draw_data;
+};
+
+/** Subclass of pipe_shader_state */
+struct lp_geometry_shader {
+ struct pipe_shader_state shader;
+ struct draw_geometry_shader *draw_data;
+};
+
+/** Vertex element state */
+struct lp_velems_state
+{
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+};
+
+struct lp_so_state {
+ struct pipe_stream_output_state base;
+};
+
+
+void
+llvmpipe_set_framebuffer_state(struct pipe_context *,
+ const struct pipe_framebuffer_state *);
+
+void
+llvmpipe_update_fs(struct llvmpipe_context *lp);
+
+void
+llvmpipe_update_derived(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_draw_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe);
+
+void
+llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe);
+
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
new file mode 100644
index 0000000000..5b39d9d1a9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -0,0 +1,172 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_dump.h"
+#include "draw/draw_context.h"
+#include "lp_screen.h"
+#include "lp_context.h"
+#include "lp_state.h"
+
+
+static void *
+llvmpipe_create_blend_state(struct pipe_context *pipe,
+ const struct pipe_blend_state *blend)
+{
+ return mem_dup(blend, sizeof(*blend));
+}
+
+
+static void
+llvmpipe_bind_blend_state(struct pipe_context *pipe, void *blend)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->blend == blend)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->blend = blend;
+
+ llvmpipe->dirty |= LP_NEW_BLEND;
+}
+
+
+static void
+llvmpipe_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+ FREE( blend );
+}
+
+
+static void
+llvmpipe_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *blend_color)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if(!blend_color)
+ return;
+
+ if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
+
+ llvmpipe->dirty |= LP_NEW_BLEND_COLOR;
+}
+
+
+/** XXX move someday? Or consolidate all these simple state setters
+ * into one file.
+ */
+
+
+static void *
+llvmpipe_create_depth_stencil_state(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *depth_stencil)
+{
+ return mem_dup(depth_stencil, sizeof(*depth_stencil));
+}
+
+
+static void
+llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
+ void *depth_stencil)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->depth_stencil == depth_stencil)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->depth_stencil = depth_stencil;
+
+ llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+
+static void
+llvmpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
+{
+ FREE( depth );
+}
+
+
+static void
+llvmpipe_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *stencil_ref)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if(!stencil_ref)
+ return;
+
+ if(memcmp(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref) == 0)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ memcpy(&llvmpipe->stencil_ref, stencil_ref, sizeof *stencil_ref);
+
+ /* not sure. want new flag? */
+ llvmpipe->dirty |= LP_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void
+llvmpipe_set_sample_mask(struct pipe_context *pipe,
+ unsigned sample_mask)
+{
+}
+
+void
+llvmpipe_init_blend_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_blend_state = llvmpipe_create_blend_state;
+ llvmpipe->pipe.bind_blend_state = llvmpipe_bind_blend_state;
+ llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
+
+ llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
+ llvmpipe->pipe.bind_depth_stencil_alpha_state = llvmpipe_bind_depth_stencil_state;
+ llvmpipe->pipe.delete_depth_stencil_alpha_state = llvmpipe_delete_depth_stencil_state;
+
+ llvmpipe->pipe.set_blend_color = llvmpipe_set_blend_color;
+
+ llvmpipe->pipe.set_stencil_ref = llvmpipe_set_stencil_ref;
+ llvmpipe->pipe.set_sample_mask = llvmpipe_set_sample_mask;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_clip.c b/src/gallium/drivers/llvmpipe/lp_state_clip.c
new file mode 100644
index 0000000000..32ae079cc1
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_clip.c
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "lp_context.h"
+#include "lp_state.h"
+#include "draw/draw_context.h"
+
+
+static void
+llvmpipe_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ /* pass the clip state to the draw module */
+ draw_set_clip_state(llvmpipe->draw, clip);
+}
+
+
+static void
+llvmpipe_set_viewport_state(struct pipe_context *pipe,
+ const struct pipe_viewport_state *viewport)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ /* pass the viewport info to the draw module */
+ draw_set_viewport_state(llvmpipe->draw, viewport);
+
+ llvmpipe->viewport = *viewport; /* struct copy */
+ llvmpipe->dirty |= LP_NEW_VIEWPORT;
+}
+
+
+static void
+llvmpipe_set_scissor_state(struct pipe_context *pipe,
+ const struct pipe_scissor_state *scissor)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->scissor = *scissor; /* struct copy */
+ llvmpipe->dirty |= LP_NEW_SCISSOR;
+}
+
+
+static void
+llvmpipe_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->poly_stipple = *stipple; /* struct copy */
+ llvmpipe->dirty |= LP_NEW_STIPPLE;
+}
+
+
+
+void
+llvmpipe_init_clip_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.set_clip_state = llvmpipe_set_clip_state;
+ llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
+ llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
+ llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
new file mode 100644
index 0000000000..d20a5218d4
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -0,0 +1,198 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_private.h"
+#include "lp_context.h"
+#include "lp_screen.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+
+
+
+/**
+ * The vertex info describes how to convert the post-transformed vertices
+ * (simple float[][4]) used by the 'draw' module into vertices for
+ * rasterization.
+ *
+ * This function validates the vertex layout.
+ */
+static void
+compute_vertex_info(struct llvmpipe_context *llvmpipe)
+{
+ const struct lp_fragment_shader *lpfs = llvmpipe->fs;
+ struct vertex_info *vinfo = &llvmpipe->vertex_info;
+ struct lp_shader_input *inputs = llvmpipe->inputs;
+ unsigned vs_index;
+ uint i;
+
+ /*
+ * Match FS inputs against VS outputs, emitting the necessary attributes.
+ */
+
+ vinfo->num_attribs = 0;
+
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_POSITION,
+ 0);
+
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+
+ for (i = 0; i < lpfs->info.num_inputs; i++) {
+ /*
+ * Search for each input in current vs output:
+ */
+
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ lpfs->info.input_semantic_name[i],
+ lpfs->info.input_semantic_index[i]);
+
+ /* This can be pre-computed, except for flatshade:
+ */
+ inputs[i].usage_mask = lpfs->info.input_usage_mask[i];
+
+ switch (lpfs->info.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ inputs[i].interp = LP_INTERP_LINEAR;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (lpfs->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_FACE:
+ inputs[i].interp = LP_INTERP_FACING;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ /* Position was already emitted above
+ */
+ inputs[i].interp = LP_INTERP_POSITION;
+ inputs[i].src_index = 0;
+ continue;
+ case TGSI_SEMANTIC_COLOR:
+ /* Colors are linearly inputs[i].interpolated in the fragment shader
+ * even when flatshading is active. This just tells the
+ * setup module to use coefficients with ddx==0 and
+ * ddy==0.
+ */
+ if (llvmpipe->rasterizer->flatshade)
+ inputs[i].interp = LP_INTERP_CONSTANT;
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * Emit the requested fs attribute for all but position.
+ */
+
+ inputs[i].src_index = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
+ }
+ llvmpipe->num_inputs = lpfs->info.num_inputs;
+
+ draw_compute_vertex_size(vinfo);
+
+ lp_setup_set_vertex_info(llvmpipe->setup, vinfo);
+
+ lp_setup_set_fs_inputs(llvmpipe->setup,
+ inputs,
+ lpfs->info.num_inputs);
+}
+
+
+/**
+ * Handle state changes.
+ * Called just prior to drawing anything (pipe::draw_arrays(), etc).
+ *
+ * Hopefully this will remain quite simple, otherwise need to pull in
+ * something like the state tracker mechanism.
+ */
+void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
+{
+ struct llvmpipe_screen *lp_screen = llvmpipe_screen(llvmpipe->pipe.screen);
+
+ /* Check for updated textures.
+ */
+ if (llvmpipe->tex_timestamp != lp_screen->timestamp) {
+ llvmpipe->tex_timestamp = lp_screen->timestamp;
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+ }
+
+ if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
+ LP_NEW_FS |
+ LP_NEW_VS))
+ compute_vertex_info( llvmpipe );
+
+ if (llvmpipe->dirty & (LP_NEW_FS |
+ LP_NEW_BLEND |
+ LP_NEW_SCISSOR |
+ LP_NEW_DEPTH_STENCIL_ALPHA |
+ LP_NEW_RASTERIZER |
+ LP_NEW_SAMPLER |
+ LP_NEW_SAMPLER_VIEW |
+ LP_NEW_QUERY))
+ llvmpipe_update_fs( llvmpipe );
+
+ if (llvmpipe->dirty & LP_NEW_BLEND_COLOR)
+ lp_setup_set_blend_color(llvmpipe->setup,
+ &llvmpipe->blend_color);
+
+ if (llvmpipe->dirty & LP_NEW_SCISSOR)
+ lp_setup_set_scissor(llvmpipe->setup, &llvmpipe->scissor);
+
+ if (llvmpipe->dirty & LP_NEW_DEPTH_STENCIL_ALPHA) {
+ lp_setup_set_alpha_ref_value(llvmpipe->setup,
+ llvmpipe->depth_stencil->alpha.ref_value);
+ lp_setup_set_stencil_ref_values(llvmpipe->setup,
+ llvmpipe->stencil_ref.ref_value);
+ }
+
+ if (llvmpipe->dirty & LP_NEW_CONSTANTS)
+ lp_setup_set_fs_constants(llvmpipe->setup,
+ llvmpipe->constants[PIPE_SHADER_FRAGMENT][0]);
+
+ if (llvmpipe->dirty & LP_NEW_SAMPLER_VIEW)
+ lp_setup_set_fragment_sampler_views(llvmpipe->setup,
+ llvmpipe->num_fragment_sampler_views,
+ llvmpipe->fragment_sampler_views);
+
+ llvmpipe->dirty = 0;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
new file mode 100644
index 0000000000..65115052cd
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -0,0 +1,1322 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Code generate the whole fragment pipeline.
+ *
+ * The fragment pipeline consists of the following stages:
+ * - triangle edge in/out testing
+ * - scissor test
+ * - stipple (TBI)
+ * - early depth test
+ * - fragment shader
+ * - alpha test
+ * - depth/stencil test
+ * - blending
+ *
+ * This file has only the glue to assemble the fragment pipeline. The actual
+ * plumbing of converting Gallium state into LLVM IR is done elsewhere, in the
+ * lp_bld_*.[ch] files, and in a complete generic and reusable way. Here we
+ * muster the LLVM JIT execution engine to create a function that follows an
+ * established binary interface and that can be called from C directly.
+ *
+ * A big source of complexity here is that we often want to run different
+ * stages with different precisions and data types and precisions. For example,
+ * the fragment shader needs typically to be done in floats, but the
+ * depth/stencil test and blending is better done in the type that most closely
+ * matches the depth/stencil and color buffer respectively.
+ *
+ * Since the width of a SIMD vector register stays the same regardless of the
+ * element type, different types imply different number of elements, so we must
+ * code generate more instances of the stages with larger types to be able to
+ * feed/consume the stages with smaller types.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include <limits.h>
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_format.h"
+#include "util/u_dump.h"
+#include "util/u_string.h"
+#include "util/u_simple_list.h"
+#include "os/os_time.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_debug.h"
+
+#include "lp_bld_alpha.h"
+#include "lp_bld_blend.h"
+#include "lp_bld_depth.h"
+#include "lp_bld_interp.h"
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_perf.h"
+#include "lp_screen.h"
+#include "lp_setup.h"
+#include "lp_state.h"
+#include "lp_tex_sample.h"
+#include "lp_flush.h"
+
+
+#include <llvm-c/Analysis.h>
+
+
+static unsigned fs_no = 0;
+
+
+/**
+ * Generate the depth /stencil test code.
+ */
+static void
+generate_depth_stencil(LLVMBuilderRef builder,
+ const struct lp_fragment_shader_variant_key *key,
+ struct lp_type src_type,
+ struct lp_build_mask_context *mask,
+ LLVMValueRef stencil_refs[2],
+ LLVMValueRef src,
+ LLVMValueRef dst_ptr,
+ LLVMValueRef facing,
+ LLVMValueRef counter)
+{
+ const struct util_format_description *format_desc;
+ struct lp_type dst_type;
+
+ if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
+ return;
+
+ format_desc = util_format_description(key->zsbuf_format);
+ assert(format_desc);
+
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(src_type.floating) {
+ src_type.sign = FALSE;
+ src_type.norm = TRUE;
+ }
+ else {
+ assert(!src_type.sign);
+ assert(src_type.norm);
+ }
+
+ /* Pick the depth type. */
+ dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
+
+ /* FIXME: Cope with a depth test type with a different bit width. */
+ assert(dst_type.width == src_type.width);
+ assert(dst_type.length == src_type.length);
+
+ /* Convert fragment Z from float to integer */
+ lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
+
+ dst_ptr = LLVMBuildBitCast(builder,
+ dst_ptr,
+ LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
+ lp_build_depth_stencil_test(builder,
+ &key->depth,
+ key->stencil,
+ dst_type,
+ format_desc,
+ mask,
+ stencil_refs,
+ src,
+ dst_ptr,
+ facing,
+ counter);
+}
+
+
+/**
+ * Generate the code to do inside/outside triangle testing for the
+ * four pixels in a 2x2 quad. This will set the four elements of the
+ * quad mask vector to 0 or ~0.
+ * \param i which quad of the quad group to test, in [0,3]
+ */
+static void
+generate_tri_edge_mask(LLVMBuilderRef builder,
+ unsigned i,
+ LLVMValueRef *mask, /* ivec4, out */
+ LLVMValueRef c0, /* int32 */
+ LLVMValueRef c1, /* int32 */
+ LLVMValueRef c2, /* int32 */
+ LLVMValueRef step0_ptr, /* ivec4 */
+ LLVMValueRef step1_ptr, /* ivec4 */
+ LLVMValueRef step2_ptr) /* ivec4 */
+{
+#define OPTIMIZE_IN_OUT_TEST 0
+#if OPTIMIZE_IN_OUT_TEST
+ struct lp_build_if_state ifctx;
+ LLVMValueRef not_draw_all;
+#endif
+ struct lp_build_flow_context *flow;
+ struct lp_type i32_type;
+ LLVMTypeRef i32vec4_type;
+ LLVMValueRef c0_vec, c1_vec, c2_vec;
+ LLVMValueRef in_out_mask;
+
+ assert(i < 4);
+
+ /* int32 vector type */
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+
+ i32vec4_type = lp_build_int32_vec4_type();
+
+ /*
+ * Use a conditional here to do detailed pixel in/out testing.
+ * We only have to do this if c0 != INT_MIN.
+ */
+ flow = lp_build_flow_create(builder);
+ lp_build_flow_scope_begin(flow);
+
+ {
+#if OPTIMIZE_IN_OUT_TEST
+ /* not_draw_all = (c0 != INT_MIN) */
+ not_draw_all = LLVMBuildICmp(builder,
+ LLVMIntNE,
+ c0,
+ LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
+ "");
+
+ in_out_mask = lp_build_const_int_vec(i32_type, ~0);
+
+
+ lp_build_flow_scope_declare(flow, &in_out_mask);
+
+ /* if (not_draw_all) {... */
+ lp_build_if(&ifctx, flow, builder, not_draw_all);
+#endif
+ {
+ LLVMValueRef step0_vec, step1_vec, step2_vec;
+ LLVMValueRef m0_vec, m1_vec, m2_vec;
+ LLVMValueRef index, m;
+
+ /* c0_vec = {c0, c0, c0, c0}
+ * Note that we emit this code four times but LLVM optimizes away
+ * three instances of it.
+ */
+ c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
+ c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
+ c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
+ lp_build_name(c0_vec, "edgeconst0vec");
+ lp_build_name(c1_vec, "edgeconst1vec");
+ lp_build_name(c2_vec, "edgeconst2vec");
+
+ /* load step0vec, step1, step2 vec from memory */
+ index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
+ step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
+ step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+ lp_build_name(step0_vec, "step0vec");
+ lp_build_name(step1_vec, "step1vec");
+ lp_build_name(step2_vec, "step2vec");
+
+ /* m0_vec = step0_ptr[i] > c0_vec */
+ m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
+ m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
+ m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+
+ /* in_out_mask = m0_vec & m1_vec & m2_vec */
+ m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
+ in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
+ lp_build_name(in_out_mask, "inoutmaskvec");
+ }
+#if OPTIMIZE_IN_OUT_TEST
+ lp_build_endif(&ifctx);
+#endif
+
+ }
+ lp_build_flow_scope_end(flow);
+ lp_build_flow_destroy(flow);
+
+ /* This is the initial alive/dead pixel mask for a quad of four pixels.
+ * It's an int[4] vector with each word set to 0 or ~0.
+ * Words will get cleared when pixels faile the Z test, etc.
+ */
+ *mask = in_out_mask;
+}
+
+
+static LLVMValueRef
+generate_scissor_test(LLVMBuilderRef builder,
+ LLVMValueRef context_ptr,
+ const struct lp_build_interp_soa_context *interp,
+ struct lp_type type)
+{
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
+ LLVMValueRef xmin, ymin, xmax, ymax;
+ LLVMValueRef m0, m1, m2, m3, m;
+
+ /* xpos, ypos contain the window coords for the four pixels in the quad */
+ assert(xpos);
+ assert(ypos);
+
+ /* get the current scissor bounds, convert to vectors */
+ xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
+ xmin = lp_build_broadcast(builder, vec_type, xmin);
+
+ ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
+ ymin = lp_build_broadcast(builder, vec_type, ymin);
+
+ xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
+ xmax = lp_build_broadcast(builder, vec_type, xmax);
+
+ ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
+ ymax = lp_build_broadcast(builder, vec_type, ymax);
+
+ /* compare the fragment's position coordinates against the scissor bounds */
+ m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
+ m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
+ m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
+ m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
+
+ /* AND all the masks together */
+ m = LLVMBuildAnd(builder, m0, m1, "");
+ m = LLVMBuildAnd(builder, m, m2, "");
+ m = LLVMBuildAnd(builder, m, m3, "");
+
+ lp_build_name(m, "scissormask");
+
+ return m;
+}
+
+
+static LLVMValueRef
+build_int32_vec_const(int value)
+{
+ struct lp_type i32_type;
+
+ memset(&i32_type, 0, sizeof i32_type);
+ i32_type.floating = FALSE; /* values are integers */
+ i32_type.sign = TRUE; /* values are signed */
+ i32_type.norm = FALSE; /* values are not normalized */
+ i32_type.width = 32; /* 32-bit int values */
+ i32_type.length = 4; /* 4 elements per vector */
+ return lp_build_const_int_vec(i32_type, value);
+}
+
+
+
+/**
+ * Generate the fragment shader, depth/stencil test, and alpha tests.
+ * \param i which quad in the tile, in range [0,3]
+ * \param do_tri_test if 1, do triangle edge in/out testing
+ */
+static void
+generate_fs(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef context_ptr,
+ unsigned i,
+ const struct lp_build_interp_soa_context *interp,
+ struct lp_build_sampler_soa *sampler,
+ LLVMValueRef *pmask,
+ LLVMValueRef (*color)[4],
+ LLVMValueRef depth_ptr,
+ LLVMValueRef facing,
+ unsigned do_tri_test,
+ LLVMValueRef c0,
+ LLVMValueRef c1,
+ LLVMValueRef c2,
+ LLVMValueRef step0_ptr,
+ LLVMValueRef step1_ptr,
+ LLVMValueRef step2_ptr,
+ LLVMValueRef counter)
+{
+ const struct tgsi_token *tokens = shader->base.tokens;
+ LLVMTypeRef vec_type;
+ LLVMValueRef consts_ptr;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef z = interp->pos[2];
+ LLVMValueRef stencil_refs[2];
+ struct lp_build_flow_context *flow;
+ struct lp_build_mask_context mask;
+ boolean early_depth_stencil_test;
+ unsigned attrib;
+ unsigned chan;
+ unsigned cbuf;
+
+ assert(i < 4);
+
+ stencil_refs[0] = lp_jit_context_stencil_ref_front_value(builder, context_ptr);
+ stencil_refs[1] = lp_jit_context_stencil_ref_back_value(builder, context_ptr);
+
+ vec_type = lp_build_vec_type(type);
+
+ consts_ptr = lp_jit_context_constants(builder, context_ptr);
+
+ flow = lp_build_flow_create(builder);
+
+ memset(outputs, 0, sizeof outputs);
+
+ lp_build_flow_scope_begin(flow);
+
+ /* Declare the color and z variables */
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ color[cbuf][chan] = LLVMGetUndef(vec_type);
+ lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
+ }
+ }
+ lp_build_flow_scope_declare(flow, &z);
+
+ /* do triangle edge testing */
+ if (do_tri_test) {
+ generate_tri_edge_mask(builder, i, pmask,
+ c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+ }
+ else {
+ *pmask = build_int32_vec_const(~0);
+ }
+
+ /* 'mask' will control execution based on quad's pixel alive/killed state */
+ lp_build_mask_begin(&mask, flow, type, *pmask);
+
+ if (key->scissor) {
+ LLVMValueRef smask =
+ generate_scissor_test(builder, context_ptr, interp, type);
+ lp_build_mask_update(&mask, smask);
+ }
+
+ early_depth_stencil_test =
+ (key->depth.enabled || key->stencil[0].enabled) &&
+ !key->alpha.enabled &&
+ !shader->info.uses_kill &&
+ !shader->info.writes_z;
+
+ if (early_depth_stencil_test)
+ generate_depth_stencil(builder, key,
+ type, &mask,
+ stencil_refs, z, depth_ptr, facing, counter);
+
+ lp_build_tgsi_soa(builder, tokens, type, &mask,
+ consts_ptr, interp->pos, interp->inputs,
+ outputs, sampler, &shader->info);
+
+ /* loop over fragment shader outputs/results */
+ for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ if(outputs[attrib][chan]) {
+ LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
+ lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);
+
+ switch (shader->info.output_semantic_name[attrib]) {
+ case TGSI_SEMANTIC_COLOR:
+ {
+ unsigned cbuf = shader->info.output_semantic_index[attrib];
+
+ lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);
+
+ /* Alpha test */
+ /* XXX: should the alpha reference value be passed separately? */
+ /* XXX: should only test the final assignment to alpha */
+ if(cbuf == 0 && chan == 3) {
+ LLVMValueRef alpha = out;
+ LLVMValueRef alpha_ref_value;
+ alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
+ alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
+ lp_build_alpha_test(builder, &key->alpha, type,
+ &mask, alpha, alpha_ref_value);
+ }
+
+ color[cbuf][chan] = out;
+ break;
+ }
+
+ case TGSI_SEMANTIC_POSITION:
+ if(chan == 2)
+ z = out;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!early_depth_stencil_test)
+ generate_depth_stencil(builder, key,
+ type, &mask,
+ stencil_refs, z, depth_ptr, facing, counter);
+
+ lp_build_mask_end(&mask);
+
+ lp_build_flow_scope_end(flow);
+
+ lp_build_flow_destroy(flow);
+
+ *pmask = mask.value;
+
+}
+
+
+/**
+ * Generate color blending and color output.
+ * \param rt the render target index (to index blend, colormask state)
+ * \param type the pixel color type
+ * \param context_ptr pointer to the runtime JIT context
+ * \param mask execution mask (active fragment/pixel mask)
+ * \param src colors from the fragment shader
+ * \param dst_ptr the destination color buffer pointer
+ */
+static void
+generate_blend(const struct pipe_blend_state *blend,
+ unsigned rt,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef context_ptr,
+ LLVMValueRef mask,
+ LLVMValueRef *src,
+ LLVMValueRef dst_ptr)
+{
+ struct lp_build_context bld;
+ struct lp_build_flow_context *flow;
+ struct lp_build_mask_context mask_ctx;
+ LLVMTypeRef vec_type;
+ LLVMValueRef const_ptr;
+ LLVMValueRef con[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef res[4];
+ unsigned chan;
+
+ lp_build_context_init(&bld, builder, type);
+
+ flow = lp_build_flow_create(builder);
+
+ /* we'll use this mask context to skip blending if all pixels are dead */
+ lp_build_mask_begin(&mask_ctx, flow, type, mask);
+
+ vec_type = lp_build_vec_type(type);
+
+ const_ptr = lp_jit_context_blend_color(builder, context_ptr);
+ const_ptr = LLVMBuildBitCast(builder, const_ptr,
+ LLVMPointerType(vec_type, 0), "");
+
+ /* load constant blend color and colors from the dest color buffer */
+ for(chan = 0; chan < 4; ++chan) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+
+ dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+
+ lp_build_name(con[chan], "con.%c", "rgba"[chan]);
+ lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
+ }
+
+ /* do blend */
+ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+
+ /* store results to color buffer */
+ for(chan = 0; chan < 4; ++chan) {
+ if(blend->rt[rt].colormask & (1 << chan)) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+ lp_build_name(res[chan], "res.%c", "rgba"[chan]);
+ res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
+ LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
+ }
+ }
+
+ lp_build_mask_end(&mask_ctx);
+ lp_build_flow_destroy(flow);
+}
+
+
+/**
+ * Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time. The block contains 2x2 quads. Each quad contains
+ * 2x2 pixels.
+ */
+static void
+generate_fragment(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ struct lp_fragment_shader_variant *variant,
+ unsigned do_tri_test)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+ const struct lp_fragment_shader_variant_key *key = &variant->key;
+ char func_name[256];
+ struct lp_type fs_type;
+ struct lp_type blend_type;
+ LLVMTypeRef fs_elem_type;
+ LLVMTypeRef fs_int_vec_type;
+ LLVMTypeRef blend_vec_type;
+ LLVMTypeRef arg_types[16];
+ LLVMTypeRef func_type;
+ LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
+ LLVMValueRef context_ptr;
+ LLVMValueRef x;
+ LLVMValueRef y;
+ LLVMValueRef a0_ptr;
+ LLVMValueRef dadx_ptr;
+ LLVMValueRef dady_ptr;
+ LLVMValueRef color_ptr_ptr;
+ LLVMValueRef depth_ptr;
+ LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ struct lp_build_sampler_soa *sampler;
+ struct lp_build_interp_soa_context interp;
+ LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef blend_mask;
+ LLVMValueRef function;
+ LLVMValueRef facing;
+ unsigned num_fs;
+ unsigned i;
+ unsigned chan;
+ unsigned cbuf;
+
+
+ /* TODO: actually pick these based on the fs and color buffer
+ * characteristics. */
+
+ memset(&fs_type, 0, sizeof fs_type);
+ fs_type.floating = TRUE; /* floating point values */
+ fs_type.sign = TRUE; /* values are signed */
+ fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ fs_type.width = 32; /* 32-bit float */
+ fs_type.length = 4; /* 4 elements per vector */
+ num_fs = 4; /* number of quads per block */
+
+ memset(&blend_type, 0, sizeof blend_type);
+ blend_type.floating = FALSE; /* values are integers */
+ blend_type.sign = FALSE; /* values are unsigned */
+ blend_type.norm = TRUE; /* values are in [0,1] or [-1,1] */
+ blend_type.width = 8; /* 8-bit ubyte values */
+ blend_type.length = 16; /* 16 elements per vector */
+
+ /*
+ * Generate the function prototype. Any change here must be reflected in
+ * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
+ */
+
+ fs_elem_type = lp_build_elem_type(fs_type);
+ fs_int_vec_type = lp_build_int_vec_type(fs_type);
+
+ blend_vec_type = lp_build_vec_type(blend_type);
+
+ util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
+ shader->no, variant->no, do_tri_test ? "edge" : "whole");
+
+ arg_types[0] = screen->context_ptr_type; /* context */
+ arg_types[1] = LLVMInt32Type(); /* x */
+ arg_types[2] = LLVMInt32Type(); /* y */
+ arg_types[3] = LLVMFloatType(); /* facing */
+ arg_types[4] = LLVMPointerType(fs_elem_type, 0); /* a0 */
+ arg_types[5] = LLVMPointerType(fs_elem_type, 0); /* dadx */
+ arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
+ arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
+ arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+ arg_types[9] = LLVMInt32Type(); /* c0 */
+ arg_types[10] = LLVMInt32Type(); /* c1 */
+ arg_types[11] = LLVMInt32Type(); /* c2 */
+ /* Note: the step arrays are built as int32[16] but we interpret
+ * them here as int32_vec4[4].
+ */
+ arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
+ arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
+ arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
+ arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ function = LLVMAddFunction(screen->module, func_name, func_type);
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+
+ variant->function[do_tri_test] = function;
+
+
+ /* XXX: need to propagate noalias down into color param now we are
+ * passing a pointer-to-pointer?
+ */
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(function, 0);
+ x = LLVMGetParam(function, 1);
+ y = LLVMGetParam(function, 2);
+ facing = LLVMGetParam(function, 3);
+ a0_ptr = LLVMGetParam(function, 4);
+ dadx_ptr = LLVMGetParam(function, 5);
+ dady_ptr = LLVMGetParam(function, 6);
+ color_ptr_ptr = LLVMGetParam(function, 7);
+ depth_ptr = LLVMGetParam(function, 8);
+ c0 = LLVMGetParam(function, 9);
+ c1 = LLVMGetParam(function, 10);
+ c2 = LLVMGetParam(function, 11);
+ step0_ptr = LLVMGetParam(function, 12);
+ step1_ptr = LLVMGetParam(function, 13);
+ step2_ptr = LLVMGetParam(function, 14);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(x, "x");
+ lp_build_name(y, "y");
+ lp_build_name(a0_ptr, "a0");
+ lp_build_name(dadx_ptr, "dadx");
+ lp_build_name(dady_ptr, "dady");
+ lp_build_name(color_ptr_ptr, "color_ptr_ptr");
+ lp_build_name(depth_ptr, "depth");
+ lp_build_name(c0, "c0");
+ lp_build_name(c1, "c1");
+ lp_build_name(c2, "c2");
+ lp_build_name(step0_ptr, "step0");
+ lp_build_name(step1_ptr, "step1");
+ lp_build_name(step2_ptr, "step2");
+
+ if (key->occlusion_count) {
+ counter = LLVMGetParam(function, 15);
+ lp_build_name(counter, "counter");
+ }
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(function, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ /*
+ * The shader input interpolation info is not explicitely baked in the
+ * shader key, but everything it derives from (TGSI, and flatshade) is
+ * already included in the shader key.
+ */
+ lp_build_interp_soa_init(&interp,
+ lp->num_inputs,
+ lp->inputs,
+ builder, fs_type,
+ a0_ptr, dadx_ptr, dady_ptr,
+ x, y);
+
+ /* code generated texture sampling */
+ sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
+
+ /* loop over quads in the block */
+ for(i = 0; i < num_fs; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
+ LLVMValueRef depth_ptr_i;
+
+ if(i != 0)
+ lp_build_interp_soa_update(&interp, i);
+
+ depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
+
+ generate_fs(lp, shader, key,
+ builder,
+ fs_type,
+ context_ptr,
+ i,
+ &interp,
+ sampler,
+ &fs_mask[i], /* output */
+ out_color,
+ depth_ptr_i,
+ facing,
+ do_tri_test,
+ c0, c1, c2,
+ step0_ptr, step1_ptr, step2_ptr, counter);
+
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
+ for(chan = 0; chan < NUM_CHANNELS; ++chan)
+ fs_out_color[cbuf][chan][i] = out_color[cbuf][chan];
+ }
+
+ sampler->destroy(sampler);
+
+ /* Loop over color outputs / color buffers to do blending.
+ */
+ for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
+ LLVMValueRef color_ptr;
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0);
+ LLVMValueRef blend_in_color[NUM_CHANNELS];
+ unsigned rt;
+
+ /*
+ * Convert the fs's output color and mask to fit to the blending type.
+ */
+ for(chan = 0; chan < NUM_CHANNELS; ++chan) {
+ lp_build_conv(builder, fs_type, blend_type,
+ fs_out_color[cbuf][chan], num_fs,
+ &blend_in_color[chan], 1);
+ lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
+ }
+
+ lp_build_conv_mask(builder, fs_type, blend_type,
+ fs_mask, num_fs,
+ &blend_mask, 1);
+
+ color_ptr = LLVMBuildLoad(builder,
+ LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
+ "");
+ lp_build_name(color_ptr, "color_ptr%d", cbuf);
+
+ /* which blend/colormask state to use */
+ rt = key->blend.independent_blend_enable ? cbuf : 0;
+
+ /*
+ * Blending.
+ */
+ generate_blend(&key->blend,
+ rt,
+ builder,
+ blend_type,
+ context_ptr,
+ blend_mask,
+ blend_in_color,
+ color_ptr);
+ }
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+
+ /* Verify the LLVM IR. If invalid, dump and abort */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
+ if (1)
+ lp_debug_dump_value(function);
+ abort();
+ }
+#endif
+
+ /* Apply optimizations to LLVM IR */
+ if (1)
+ LLVMRunFunctionPassManager(screen->pass, function);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ /* Print the LLVM IR to stderr */
+ lp_debug_dump_value(function);
+ debug_printf("\n");
+ }
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+ {
+ void *f = LLVMGetPointerToGlobal(screen->engine, function);
+
+ variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f);
+
+ if (gallivm_debug & GALLIVM_DEBUG_ASM) {
+ lp_disassemble(f);
+ }
+ }
+}
+
+
+static void
+dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
+{
+ unsigned i;
+
+ debug_printf("fs variant %p:\n", (void *) key);
+
+ if (key->depth.enabled) {
+ debug_printf("depth.format = %s\n", util_format_name(key->zsbuf_format));
+ debug_printf("depth.func = %s\n", util_dump_func(key->depth.func, TRUE));
+ debug_printf("depth.writemask = %u\n", key->depth.writemask);
+ }
+
+ for (i = 0; i < 2; ++i) {
+ if (key->stencil[i].enabled) {
+ debug_printf("stencil[%u].func = %s\n", i, util_dump_func(key->stencil[i].func, TRUE));
+ debug_printf("stencil[%u].fail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].fail_op, TRUE));
+ debug_printf("stencil[%u].zpass_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zpass_op, TRUE));
+ debug_printf("stencil[%u].zfail_op = %s\n", i, util_dump_stencil_op(key->stencil[i].zfail_op, TRUE));
+ debug_printf("stencil[%u].valuemask = 0x%x\n", i, key->stencil[i].valuemask);
+ debug_printf("stencil[%u].writemask = 0x%x\n", i, key->stencil[i].writemask);
+ }
+ }
+
+ if (key->alpha.enabled) {
+ debug_printf("alpha.func = %s\n", util_dump_func(key->alpha.func, TRUE));
+ debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+ }
+
+ if (key->blend.logicop_enable) {
+ debug_printf("blend.logicop_func = %s\n", util_dump_logicop(key->blend.logicop_func, TRUE));
+ }
+ else if (key->blend.rt[0].blend_enable) {
+ debug_printf("blend.rgb_func = %s\n", util_dump_blend_func (key->blend.rt[0].rgb_func, TRUE));
+ debug_printf("blend.rgb_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_src_factor, TRUE));
+ debug_printf("blend.rgb_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].rgb_dst_factor, TRUE));
+ debug_printf("blend.alpha_func = %s\n", util_dump_blend_func (key->blend.rt[0].alpha_func, TRUE));
+ debug_printf("blend.alpha_src_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_src_factor, TRUE));
+ debug_printf("blend.alpha_dst_factor = %s\n", util_dump_blend_factor(key->blend.rt[0].alpha_dst_factor, TRUE));
+ }
+ debug_printf("blend.colormask = 0x%x\n", key->blend.rt[0].colormask);
+ for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if (key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ util_format_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ util_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ util_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ util_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ util_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ util_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if (key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+ debug_printf(" .compare_func = %s\n", util_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ }
+ }
+}
+
+
+
+static struct lp_fragment_shader_variant *
+generate_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ const struct lp_fragment_shader_variant_key *key)
+{
+ struct lp_fragment_shader_variant *variant;
+
+ variant = CALLOC_STRUCT(lp_fragment_shader_variant);
+ if(!variant)
+ return NULL;
+
+ variant->shader = shader;
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ variant->no = shader->variants_created++;
+
+ memcpy(&variant->key, key, sizeof *key);
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: Creating fragment shader #%u variant #%u:\n",
+ shader->no, variant->no);
+ tgsi_dump(shader->base.tokens, 0);
+ dump_fs_variant_key(key);
+ }
+
+ generate_fragment(lp, shader, variant, RAST_WHOLE);
+ generate_fragment(lp, shader, variant, RAST_EDGE_TEST);
+
+ /* TODO: most of these can be relaxed, in particular the colormask */
+ variant->opaque =
+ !key->blend.logicop_enable &&
+ !key->blend.rt[0].blend_enable &&
+ key->blend.rt[0].colormask == 0xf &&
+ !key->stencil[0].enabled &&
+ !key->alpha.enabled &&
+ !key->depth.enabled &&
+ !key->scissor &&
+ !shader->info.uses_kill
+ ? TRUE : FALSE;
+
+ return variant;
+}
+
+
+static void *
+llvmpipe_create_fs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct lp_fragment_shader *shader;
+
+ shader = CALLOC_STRUCT(lp_fragment_shader);
+ if (!shader)
+ return NULL;
+
+ shader->no = fs_no++;
+ make_empty_list(&shader->variants);
+
+ /* get/save the summary info for this shader */
+ tgsi_scan_shader(templ->tokens, &shader->info);
+
+ /* we need to keep a local copy of the tokens */
+ shader->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+ if (LP_DEBUG & DEBUG_TGSI) {
+ unsigned attrib;
+ debug_printf("llvmpipe: Create fragment shader #%u %p:\n", shader->no, (void *) shader);
+ tgsi_dump(templ->tokens, 0);
+ debug_printf("usage masks:\n");
+ for (attrib = 0; attrib < shader->info.num_inputs; ++attrib) {
+ unsigned usage_mask = shader->info.input_usage_mask[attrib];
+ debug_printf(" IN[%u].%s%s%s%s\n",
+ attrib,
+ usage_mask & TGSI_WRITEMASK_X ? "x" : "",
+ usage_mask & TGSI_WRITEMASK_Y ? "y" : "",
+ usage_mask & TGSI_WRITEMASK_Z ? "z" : "",
+ usage_mask & TGSI_WRITEMASK_W ? "w" : "");
+ }
+ debug_printf("\n");
+ }
+
+ return shader;
+}
+
+
+static void
+llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ if (llvmpipe->fs == fs)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ llvmpipe->fs = fs;
+
+ llvmpipe->dirty |= LP_NEW_FS;
+}
+
+static void
+remove_shader_variant(struct llvmpipe_context *lp,
+ struct lp_fragment_shader_variant *variant)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
+ unsigned i;
+
+ if (gallivm_debug & GALLIVM_DEBUG_IR) {
+ debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached #%u v total cached #%u\n",
+ variant->shader->no, variant->no, variant->shader->variants_created,
+ variant->shader->variants_cached, lp->nr_fs_variants);
+ }
+ for (i = 0; i < Elements(variant->function); i++) {
+ if (variant->function[i]) {
+ if (variant->jit_function[i])
+ LLVMFreeMachineCodeForFunction(screen->engine,
+ variant->function[i]);
+ LLVMDeleteFunction(variant->function[i]);
+ }
+ }
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+ remove_from_list(&variant->list_item_global);
+ lp->nr_fs_variants--;
+ FREE(variant);
+}
+
+static void
+llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct pipe_fence_handle *fence = NULL;
+ struct lp_fragment_shader *shader = fs;
+ struct lp_fs_variant_list_item *li;
+
+ assert(fs != llvmpipe->fs);
+ (void) llvmpipe;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not sufficient we need to wait on it too.
+ */
+
+ llvmpipe_flush(pipe, 0, &fence);
+
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct lp_fs_variant_list_item *next = next_elem(li);
+ remove_shader_variant(llvmpipe, li->base);
+ li = next;
+ }
+
+ assert(shader->variants_cached == 0);
+ FREE((void *) shader->base.tokens);
+ FREE(shader);
+}
+
+
+
+static void
+llvmpipe_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ struct pipe_resource *constants)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned size = constants ? constants->width0 : 0;
+ const void *data = constants ? llvmpipe_resource_data(constants) : NULL;
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index < PIPE_MAX_CONSTANT_BUFFERS);
+
+ if(llvmpipe->constants[shader][index] == constants)
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ /* note: reference counting */
+ pipe_resource_reference(&llvmpipe->constants[shader][index], constants);
+
+ if(shader == PIPE_SHADER_VERTEX ||
+ shader == PIPE_SHADER_GEOMETRY) {
+ draw_set_mapped_constant_buffer(llvmpipe->draw, shader,
+ index, data, size);
+ }
+
+ llvmpipe->dirty |= LP_NEW_CONSTANTS;
+}
+
+
+/**
+ * Return the blend factor equivalent to a destination alpha of one.
+ */
+static INLINE unsigned
+force_dst_alpha_one(unsigned factor, boolean alpha)
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return PIPE_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return PIPE_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return PIPE_BLENDFACTOR_ZERO;
+ }
+
+ if (alpha) {
+ switch(factor) {
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return PIPE_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return PIPE_BLENDFACTOR_ZERO;
+ }
+ }
+
+ return factor;
+}
+
+
+/**
+ * We need to generate several variants of the fragment pipeline to match
+ * all the combinations of the contributing state atoms.
+ *
+ * TODO: there is actually no reason to tie this to context state -- the
+ * generated code could be cached globally in the screen.
+ */
+static void
+make_variant_key(struct llvmpipe_context *lp,
+ struct lp_fragment_shader *shader,
+ struct lp_fragment_shader_variant_key *key)
+{
+ unsigned i;
+
+ memset(key, 0, sizeof *key);
+
+ if (lp->framebuffer.zsbuf) {
+ if (lp->depth_stencil->depth.enabled) {
+ key->zsbuf_format = lp->framebuffer.zsbuf->format;
+ memcpy(&key->depth, &lp->depth_stencil->depth, sizeof key->depth);
+ }
+ if (lp->depth_stencil->stencil[0].enabled) {
+ key->zsbuf_format = lp->framebuffer.zsbuf->format;
+ memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
+ }
+ }
+
+ key->alpha.enabled = lp->depth_stencil->alpha.enabled;
+ if(key->alpha.enabled)
+ key->alpha.func = lp->depth_stencil->alpha.func;
+ /* alpha.ref_value is passed in jit_context */
+
+ key->flatshade = lp->rasterizer->flatshade;
+ key->scissor = lp->rasterizer->scissor;
+ if (lp->active_query_count) {
+ key->occlusion_count = TRUE;
+ }
+
+ if (lp->framebuffer.nr_cbufs) {
+ memcpy(&key->blend, lp->blend, sizeof key->blend);
+ }
+
+ key->nr_cbufs = lp->framebuffer.nr_cbufs;
+ for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
+ struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
+ const struct util_format_description *format_desc;
+ unsigned chan;
+
+ format_desc = util_format_description(lp->framebuffer.cbufs[i]->format);
+ assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
+
+ blend_rt->colormask = lp->blend->rt[i].colormask;
+
+ /* mask out color channels not present in the color buffer.
+ * Should be simple to incorporate per-cbuf writemasks:
+ */
+ for(chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+
+ if(swizzle > UTIL_FORMAT_SWIZZLE_W)
+ blend_rt->colormask &= ~(1 << chan);
+ }
+
+ /*
+ * Our swizzled render tiles always have an alpha channel, but the linear
+ * render target format often does not, so force here the dst alpha to be
+ * one.
+ *
+ * This is not a mere optimization. Wrong results will be produced if the
+ * dst alpha is used, the dst format does not have alpha, and the previous
+ * rendering was not flushed from the swizzled to linear buffer. For
+ * example, NonPowTwo DCT.
+ *
+ * TODO: This should be generalized to all channels for better
+ * performance, but only alpha causes correctness issues.
+ */
+ if (format_desc->swizzle[3] > UTIL_FORMAT_SWIZZLE_W) {
+ blend_rt->rgb_src_factor = force_dst_alpha_one(blend_rt->rgb_src_factor, FALSE);
+ blend_rt->rgb_dst_factor = force_dst_alpha_one(blend_rt->rgb_dst_factor, FALSE);
+ blend_rt->alpha_src_factor = force_dst_alpha_one(blend_rt->alpha_src_factor, TRUE);
+ blend_rt->alpha_dst_factor = force_dst_alpha_one(blend_rt->alpha_dst_factor, TRUE);
+ }
+ }
+
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i)
+ if(shader->info.file_mask[TGSI_FILE_SAMPLER] & (1 << i))
+ lp_sampler_static_state(&key->sampler[i], lp->fragment_sampler_views[i], lp->sampler[i]);
+}
+
+/**
+ * Update fragment state. This is called just prior to drawing
+ * something when some fragment-related state has changed.
+ */
+void
+llvmpipe_update_fs(struct llvmpipe_context *lp)
+{
+ struct lp_fragment_shader *shader = lp->fs;
+ struct lp_fragment_shader_variant_key key;
+ struct lp_fragment_shader_variant *variant = NULL;
+ struct lp_fs_variant_list_item *li;
+
+ make_variant_key(lp, shader, &key);
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ if(memcmp(&li->base->key, &key, sizeof key) == 0) {
+ variant = li->base;
+ break;
+ }
+ li = next_elem(li);
+ }
+
+ if (variant) {
+ move_to_head(&lp->fs_variants_list, &variant->list_item_global);
+ }
+ else {
+ int64_t t0, t1;
+ int64_t dt;
+ unsigned i;
+ if (lp->nr_fs_variants >= LP_MAX_SHADER_VARIANTS) {
+ struct pipe_context *pipe = &lp->pipe;
+ struct pipe_fence_handle *fence = NULL;
+
+ /*
+ * XXX: we need to flush the context until we have some sort of reference
+ * counting in fragment shaders as they may still be binned
+ * Flushing alone might not be sufficient we need to wait on it too.
+ */
+ llvmpipe_flush(pipe, 0, &fence);
+
+ if (fence) {
+ pipe->screen->fence_finish(pipe->screen, fence, 0);
+ pipe->screen->fence_reference(pipe->screen, &fence, NULL);
+ }
+ for (i = 0; i < LP_MAX_SHADER_VARIANTS / 4; i++) {
+ struct lp_fs_variant_list_item *item = last_elem(&lp->fs_variants_list);
+ remove_shader_variant(lp, item->base);
+ }
+ }
+ t0 = os_time_get();
+
+ variant = generate_variant(lp, shader, &key);
+
+ t1 = os_time_get();
+ dt = t1 - t0;
+ LP_COUNT_ADD(llvm_compile_time, dt);
+ LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
+
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&lp->fs_variants_list, &variant->list_item_global);
+ lp->nr_fs_variants++;
+ shader->variants_cached++;
+ }
+ }
+
+ lp_setup_set_fs_variant(lp->setup, variant);
+}
+
+
+
+void
+llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_fs_state = llvmpipe_create_fs_state;
+ llvmpipe->pipe.bind_fs_state = llvmpipe_bind_fs_state;
+ llvmpipe->pipe.delete_fs_state = llvmpipe_delete_fs_state;
+
+ llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
new file mode 100644
index 0000000000..593cd4de6b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_STATE_FS_H_
+#define LP_STATE_FS_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h" /* for tgsi_shader_info */
+#include "gallivm/lp_bld_sample.h" /* for struct lp_sampler_static_state */
+
+
+struct tgsi_token;
+struct lp_fragment_shader;
+
+
+/** Indexes into jit_function[] array */
+#define RAST_WHOLE 0
+#define RAST_EDGE_TEST 1
+
+
+struct lp_fragment_shader_variant_key
+{
+ struct pipe_depth_state depth;
+ struct pipe_stencil_state stencil[2];
+ struct pipe_alpha_state alpha;
+ struct pipe_blend_state blend;
+ enum pipe_format zsbuf_format;
+ unsigned nr_cbufs:8;
+ unsigned flatshade:1;
+ unsigned scissor:1;
+ unsigned occlusion_count:1;
+
+ struct {
+ ubyte colormask;
+ } cbuf_blend[PIPE_MAX_COLOR_BUFS];
+
+ struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
+};
+
+struct lp_fs_variant_list_item
+{
+ struct lp_fragment_shader_variant *base;
+ struct lp_fs_variant_list_item *next, *prev;
+};
+
+struct lp_fragment_shader_variant
+{
+ struct lp_fragment_shader_variant_key key;
+
+ boolean opaque;
+
+ LLVMValueRef function[2];
+
+ lp_jit_frag_func jit_function[2];
+
+ struct lp_fs_variant_list_item list_item_global, list_item_local;
+ struct lp_fragment_shader *shader;
+
+ /* For debugging/profiling purposes */
+ unsigned no;
+};
+
+
+/** Subclass of pipe_shader_state */
+struct lp_fragment_shader
+{
+ struct pipe_shader_state base;
+
+ struct tgsi_shader_info info;
+
+ struct lp_fs_variant_list_item variants;
+
+ /* For debugging/profiling purposes */
+ unsigned no;
+ unsigned variants_created;
+ unsigned variants_cached;
+};
+
+
+#endif /* LP_STATE_FS_H_ */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_gs.c b/src/gallium/drivers/llvmpipe/lp_state_gs.c
new file mode 100644
index 0000000000..1ba6f10821
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_gs.c
@@ -0,0 +1,112 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "draw/draw_context.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+
+
+static void *
+llvmpipe_create_gs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_geometry_shader *state;
+
+ state = CALLOC_STRUCT(lp_geometry_shader);
+ if (state == NULL )
+ goto fail;
+
+ /* debug */
+ if (0)
+ tgsi_dump(templ->tokens, 0);
+
+ /* copy shader tokens, the ones passed in will go away.
+ */
+ state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (state->shader.tokens == NULL)
+ goto fail;
+
+ state->draw_data = draw_create_geometry_shader(llvmpipe->draw, templ);
+ if (state->draw_data == NULL)
+ goto fail;
+
+ return state;
+
+fail:
+ if (state) {
+ FREE( (void *)state->shader.tokens );
+ FREE( state->draw_data );
+ FREE( state );
+ }
+ return NULL;
+}
+
+
+static void
+llvmpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ llvmpipe->gs = (struct lp_geometry_shader *)gs;
+
+ draw_bind_geometry_shader(llvmpipe->draw,
+ (llvmpipe->gs ? llvmpipe->gs->draw_data : NULL));
+
+ llvmpipe->dirty |= LP_NEW_GS;
+}
+
+
+static void
+llvmpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ struct lp_geometry_shader *state =
+ (struct lp_geometry_shader *)gs;
+
+ draw_delete_geometry_shader(llvmpipe->draw,
+ (state) ? state->draw_data : 0);
+ FREE(state);
+}
+
+
+void
+llvmpipe_init_gs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_gs_state = llvmpipe_create_gs_state;
+ llvmpipe->pipe.bind_gs_state = llvmpipe_bind_gs_state;
+ llvmpipe->pipe.delete_gs_state = llvmpipe_delete_gs_state;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
new file mode 100644
index 0000000000..afd3e0b21c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -0,0 +1,97 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_setup.h"
+#include "draw/draw_context.h"
+
+
+
+static void *
+llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *rast)
+{
+ /* We do nothing special with rasterizer state.
+ * The CSO handle is just a pointer to a pipe_rasterizer_state object.
+ */
+ return mem_dup(rast, sizeof(*rast));
+}
+
+
+
+static void
+llvmpipe_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct pipe_rasterizer_state *rasterizer =
+ (const struct pipe_rasterizer_state *) handle;
+
+ if (llvmpipe->rasterizer == rasterizer)
+ return;
+
+ /* pass-through to draw module */
+ draw_set_rasterizer_state(llvmpipe->draw, rasterizer, handle);
+
+ llvmpipe->rasterizer = rasterizer;
+
+ /* Note: we can immediately set the triangle state here and
+ * not worry about binning because we handle culling during
+ * triangle setup, not when rasterizing the bins.
+ */
+ if (llvmpipe->rasterizer) {
+ lp_setup_set_triangle_state( llvmpipe->setup,
+ llvmpipe->rasterizer->cull_face,
+ llvmpipe->rasterizer->front_ccw,
+ llvmpipe->rasterizer->scissor,
+ llvmpipe->rasterizer->gl_rasterization_rules);
+ lp_setup_set_flatshade_first( llvmpipe->setup,
+ llvmpipe->rasterizer->flatshade_first);
+ }
+
+ llvmpipe->dirty |= LP_NEW_RASTERIZER;
+}
+
+
+static void
+llvmpipe_delete_rasterizer_state(struct pipe_context *pipe,
+ void *rasterizer)
+{
+ FREE( rasterizer );
+}
+
+
+
+void
+llvmpipe_init_rasterizer_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_rasterizer_state = llvmpipe_create_rasterizer_state;
+ llvmpipe->pipe.bind_rasterizer_state = llvmpipe_bind_rasterizer_state;
+ llvmpipe->pipe.delete_rasterizer_state = llvmpipe_delete_rasterizer_state;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
new file mode 100644
index 0000000000..e94065fb6a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -0,0 +1,231 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors:
+ * Brian Paul
+ */
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "draw/draw_context.h"
+
+#include "lp_context.h"
+#include "lp_context.h"
+#include "lp_state.h"
+#include "draw/draw_context.h"
+
+
+
+static void *
+llvmpipe_create_sampler_state(struct pipe_context *pipe,
+ const struct pipe_sampler_state *sampler)
+{
+ return mem_dup(sampler, sizeof(*sampler));
+}
+
+
+static void
+llvmpipe_bind_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == llvmpipe->num_samplers &&
+ !memcmp(llvmpipe->sampler, sampler, num * sizeof(void *)))
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < num; ++i)
+ llvmpipe->sampler[i] = sampler[i];
+ for (i = num; i < PIPE_MAX_SAMPLERS; ++i)
+ llvmpipe->sampler[i] = NULL;
+
+ llvmpipe->num_samplers = num;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
+static void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i;
+
+ assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_samplers == llvmpipe->num_vertex_samplers &&
+ !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < num_samplers; ++i)
+ llvmpipe->vertex_samplers[i] = samplers[i];
+ for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ llvmpipe->vertex_samplers[i] = NULL;
+
+ llvmpipe->num_vertex_samplers = num_samplers;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
+static void
+llvmpipe_bind_geometry_sampler_states(struct pipe_context *pipe,
+ unsigned num, void **sampler)
+{
+ /* XXX: implementation missing */
+}
+
+static void
+llvmpipe_set_fragment_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == llvmpipe->num_fragment_sampler_views &&
+ !memcmp(llvmpipe->fragment_sampler_views, views, num * sizeof(struct pipe_sampler_view *)))
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ pipe_sampler_view_reference(&llvmpipe->fragment_sampler_views[i], view);
+ }
+
+ llvmpipe->num_fragment_sampler_views = num;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+}
+
+
+static void
+llvmpipe_set_vertex_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ uint i;
+
+ assert(num <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num == llvmpipe->num_vertex_sampler_views &&
+ !memcmp(llvmpipe->vertex_sampler_views, views, num * sizeof(struct pipe_sampler_view *))) {
+ return;
+ }
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_sampler_view *view = i < num ? views[i] : NULL;
+
+ pipe_sampler_view_reference(&llvmpipe->vertex_sampler_views[i], view);
+ }
+
+ llvmpipe->num_vertex_sampler_views = num;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER_VIEW;
+}
+
+
+static void
+llvmpipe_set_geometry_sampler_views(struct pipe_context *pipe,
+ unsigned num,
+ struct pipe_sampler_view **views)
+{
+ /*XXX: implementation missing */
+}
+
+static struct pipe_sampler_view *
+llvmpipe_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
+
+ if (view) {
+ *view = *templ;
+ view->reference.count = 1;
+ view->texture = NULL;
+ pipe_resource_reference(&view->texture, texture);
+ view->context = pipe;
+ }
+
+ return view;
+}
+
+
+static void
+llvmpipe_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+
+static void
+llvmpipe_delete_sampler_state(struct pipe_context *pipe,
+ void *sampler)
+{
+ FREE( sampler );
+}
+
+
+void
+llvmpipe_init_sampler_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
+
+ llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states;
+ llvmpipe->pipe.bind_geometry_sampler_states = llvmpipe_bind_geometry_sampler_states;
+ llvmpipe->pipe.set_fragment_sampler_views = llvmpipe_set_fragment_sampler_views;
+ llvmpipe->pipe.set_vertex_sampler_views = llvmpipe_set_vertex_sampler_views;
+ llvmpipe->pipe.set_geometry_sampler_views = llvmpipe_set_geometry_sampler_views;
+ llvmpipe->pipe.create_sampler_view = llvmpipe_create_sampler_view;
+ llvmpipe->pipe.sampler_view_destroy = llvmpipe_sampler_view_destroy;
+ llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_so.c b/src/gallium/drivers/llvmpipe/lp_state_so.c
new file mode 100644
index 0000000000..4c64a5b142
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_so.c
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_context.h"
+#include "lp_state.h"
+#include "lp_texture.h"
+
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+
+static void *
+llvmpipe_create_stream_output_state(struct pipe_context *pipe,
+ const struct pipe_stream_output_state *templ)
+{
+ struct lp_so_state *so;
+ so = (struct lp_so_state *) CALLOC_STRUCT(lp_so_state);
+
+ if (so) {
+ so->base.num_outputs = templ->num_outputs;
+ so->base.stride = templ->stride;
+ memcpy(so->base.output_buffer,
+ templ->output_buffer,
+ sizeof(int) * templ->num_outputs);
+ memcpy(so->base.register_index,
+ templ->register_index,
+ sizeof(int) * templ->num_outputs);
+ memcpy(so->base.register_mask,
+ templ->register_mask,
+ sizeof(ubyte) * templ->num_outputs);
+ }
+ return so;
+}
+
+static void
+llvmpipe_bind_stream_output_state(struct pipe_context *pipe,
+ void *so)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+ struct lp_so_state *lp_so = (struct lp_so_state *) so;
+
+ lp->so = lp_so;
+
+ lp->dirty |= LP_NEW_SO;
+
+ if (lp_so)
+ draw_set_so_state(lp->draw, &lp_so->base);
+}
+
+static void
+llvmpipe_delete_stream_output_state(struct pipe_context *pipe, void *so)
+{
+ FREE( so );
+}
+
+static void
+llvmpipe_set_stream_output_buffers(struct pipe_context *pipe,
+ struct pipe_resource **buffers,
+ int *offsets,
+ int num_buffers)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+ int i;
+ void *map_buffers[PIPE_MAX_SO_BUFFERS];
+
+ assert(num_buffers <= PIPE_MAX_SO_BUFFERS);
+ if (num_buffers > PIPE_MAX_SO_BUFFERS)
+ num_buffers = PIPE_MAX_SO_BUFFERS;
+
+ lp->dirty |= LP_NEW_SO_BUFFERS;
+
+ for (i = 0; i < num_buffers; ++i) {
+ void *mapped;
+ struct llvmpipe_resource *res = llvmpipe_resource(buffers[i]);
+
+ if (!res) {
+ /* the whole call is invalid, bail out */
+ lp->so_target.num_buffers = 0;
+ draw_set_mapped_so_buffers(lp->draw, 0, 0);
+ return;
+ }
+
+ lp->so_target.buffer[i] = res;
+ lp->so_target.offset[i] = offsets[i];
+ lp->so_target.so_count[i] = 0;
+
+ mapped = res->data;
+ if (offsets[i] >= 0)
+ map_buffers[i] = ((char*)mapped) + offsets[i];
+ else {
+ /* this is a buffer append */
+ assert(!"appending not implemented");
+ map_buffers[i] = mapped;
+ }
+ }
+ lp->so_target.num_buffers = num_buffers;
+
+ draw_set_mapped_so_buffers(lp->draw, map_buffers, num_buffers);
+}
+
+void
+llvmpipe_init_so_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_stream_output_state =
+ llvmpipe_create_stream_output_state;
+ llvmpipe->pipe.bind_stream_output_state =
+ llvmpipe_bind_stream_output_state;
+ llvmpipe->pipe.delete_stream_output_state =
+ llvmpipe_delete_stream_output_state;
+
+ llvmpipe->pipe.set_stream_output_buffers =
+ llvmpipe_set_stream_output_buffers;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
new file mode 100644
index 0000000000..4b135aaf8b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -0,0 +1,83 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_framebuffer.h"
+#include "util/u_surface.h"
+#include "lp_context.h"
+#include "lp_scene.h"
+#include "lp_state.h"
+#include "lp_setup.h"
+
+#include "draw/draw_context.h"
+
+#include "util/u_format.h"
+
+
+/**
+ * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer.
+ */
+void
+llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct llvmpipe_context *lp = llvmpipe_context(pipe);
+
+ boolean changed = !util_framebuffer_state_equal(&lp->framebuffer, fb);
+
+ assert(fb->width <= LP_MAX_WIDTH);
+ assert(fb->height <= LP_MAX_HEIGHT);
+
+ if (changed) {
+
+ util_copy_framebuffer_state(&lp->framebuffer, fb);
+
+ /* Tell draw module how deep the Z/depth buffer is */
+ if (lp->framebuffer.zsbuf) {
+ int depth_bits;
+ double mrd;
+ depth_bits = util_format_get_component_bits(lp->framebuffer.zsbuf->format,
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0);
+ if (depth_bits > 16) {
+ mrd = 0.0000001;
+ }
+ else {
+ mrd = 0.00002;
+ }
+ draw_set_mrd(lp->draw, mrd);
+ }
+
+ lp_setup_bind_framebuffer( lp->setup, &lp->framebuffer );
+
+ lp->dirty |= LP_NEW_FRAMEBUFFER;
+ }
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vertex.c b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
new file mode 100644
index 0000000000..113f13db01
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_vertex.c
@@ -0,0 +1,101 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "lp_context.h"
+#include "lp_state.h"
+
+#include "draw/draw_context.h"
+
+
+static void *
+llvmpipe_create_vertex_elements_state(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_element *attribs)
+{
+ struct lp_velems_state *velems;
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = (struct lp_velems_state *) MALLOC(sizeof(struct lp_velems_state));
+ if (velems) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(*attribs) * count);
+ }
+ return velems;
+}
+
+static void
+llvmpipe_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *velems)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_velems_state *lp_velems = (struct lp_velems_state *) velems;
+
+ llvmpipe->velems = lp_velems;
+
+ llvmpipe->dirty |= LP_NEW_VERTEX;
+
+ if (velems)
+ draw_set_vertex_elements(llvmpipe->draw, lp_velems->count, lp_velems->velem);
+}
+
+static void
+llvmpipe_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
+{
+ FREE( velems );
+}
+
+static void
+llvmpipe_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer *buffers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+
+ memcpy(llvmpipe->vertex_buffer, buffers, count * sizeof(buffers[0]));
+ llvmpipe->num_vertex_buffers = count;
+
+ llvmpipe->dirty |= LP_NEW_VERTEX;
+
+ draw_set_vertex_buffers(llvmpipe->draw, count, buffers);
+}
+
+
+
+void
+llvmpipe_init_vertex_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_vertex_elements_state = llvmpipe_create_vertex_elements_state;
+ llvmpipe->pipe.bind_vertex_elements_state = llvmpipe_bind_vertex_elements_state;
+ llvmpipe->pipe.delete_vertex_elements_state = llvmpipe_delete_vertex_elements_state;
+
+ llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
new file mode 100644
index 0000000000..f2d8808990
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -0,0 +1,118 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "lp_context.h"
+#include "lp_debug.h"
+#include "lp_state.h"
+
+
+static void *
+llvmpipe_create_vs_state(struct pipe_context *pipe,
+ const struct pipe_shader_state *templ)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct lp_vertex_shader *state;
+
+ state = CALLOC_STRUCT(lp_vertex_shader);
+ if (state == NULL )
+ goto fail;
+
+ /* copy shader tokens, the ones passed in will go away.
+ */
+ state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+ if (state->shader.tokens == NULL)
+ goto fail;
+
+ state->draw_data = draw_create_vertex_shader(llvmpipe->draw, templ);
+ if (state->draw_data == NULL)
+ goto fail;
+
+ if (LP_DEBUG & DEBUG_TGSI) {
+ debug_printf("llvmpipe: Create vertex shader %p:\n", (void *) state);
+ tgsi_dump(templ->tokens, 0);
+ }
+
+ return state;
+
+fail:
+ if (state) {
+ FREE( (void *)state->shader.tokens );
+ FREE( state->draw_data );
+ FREE( state );
+ }
+ return NULL;
+}
+
+
+static void
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs;
+
+ if (llvmpipe->vs == vs)
+ return;
+
+ draw_bind_vertex_shader(llvmpipe->draw,
+ vs ? vs->draw_data : NULL);
+
+ llvmpipe->vs = vs;
+
+ llvmpipe->dirty |= LP_NEW_VS;
+}
+
+
+static void
+llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+
+ struct lp_vertex_shader *state =
+ (struct lp_vertex_shader *)vs;
+
+ draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+ FREE( (void *)state->shader.tokens );
+ FREE( state );
+}
+
+
+
+void
+llvmpipe_init_vs_funcs(struct llvmpipe_context *llvmpipe)
+{
+ llvmpipe->pipe.create_vs_state = llvmpipe_create_vs_state;
+ llvmpipe->pipe.bind_vs_state = llvmpipe_bind_vs_state;
+ llvmpipe->pipe.delete_vs_state = llvmpipe_delete_vs_state;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.c b/src/gallium/drivers/llvmpipe/lp_surface.c
new file mode 100644
index 0000000000..76b3fce1fa
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_surface.c
@@ -0,0 +1,158 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_rect.h"
+#include "util/u_surface.h"
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_limits.h"
+#include "lp_surface.h"
+#include "lp_texture.h"
+
+
+/**
+ * Adjust x, y, width, height to lie on tile bounds.
+ */
+static void
+adjust_to_tile_bounds(unsigned x, unsigned y, unsigned width, unsigned height,
+ unsigned *x_tile, unsigned *y_tile,
+ unsigned *w_tile, unsigned *h_tile)
+{
+ *x_tile = x & ~(TILE_SIZE - 1);
+ *y_tile = y & ~(TILE_SIZE - 1);
+ *w_tile = ((x + width + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *x_tile;
+ *h_tile = ((y + height + TILE_SIZE - 1) & ~(TILE_SIZE - 1)) - *y_tile;
+}
+
+
+
+static void
+lp_resource_copy(struct pipe_context *pipe,
+ struct pipe_resource *dst, struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ /* XXX what about the dstz/srcz parameters - zslice wasn't used... */
+ struct llvmpipe_resource *src_tex = llvmpipe_resource(src);
+ struct llvmpipe_resource *dst_tex = llvmpipe_resource(dst);
+ const enum pipe_format format = src_tex->base.format;
+
+ llvmpipe_flush_resource(pipe,
+ dst, subdst.face, subdst.level,
+ 0, /* flush_flags */
+ FALSE, /* read_only */
+ FALSE, /* cpu_access */
+ FALSE); /* do_not_block */
+
+ llvmpipe_flush_resource(pipe,
+ src, subsrc.face, subsrc.level,
+ 0, /* flush_flags */
+ TRUE, /* read_only */
+ FALSE, /* cpu_access */
+ FALSE); /* do_not_block */
+
+ /*
+ printf("surface copy from %u to %u: %u,%u to %u,%u %u x %u\n",
+ src_tex->id, dst_tex->id,
+ srcx, srcy, dstx, dsty, width, height);
+ */
+
+ /* set src tiles to linear layout */
+ {
+ unsigned tx, ty, tw, th;
+ unsigned x, y;
+
+ adjust_to_tile_bounds(srcx, srcy, width, height, &tx, &ty, &tw, &th);
+
+ for (y = 0; y < th; y += TILE_SIZE) {
+ for (x = 0; x < tw; x += TILE_SIZE) {
+ (void) llvmpipe_get_texture_tile_linear(src_tex,
+ subsrc.face, subsrc.level,
+ LP_TEX_USAGE_READ,
+ tx + x, ty + y);
+ }
+ }
+ }
+
+ /* set dst tiles to linear layout */
+ {
+ unsigned tx, ty, tw, th;
+ unsigned x, y;
+ enum lp_texture_usage usage;
+
+ /* XXX for the tiles which are completely contained by the
+ * dest rectangle, we could set the usage mode to WRITE_ALL.
+ * Just test for the case of replacing the whole dest region for now.
+ */
+ if (width == dst_tex->base.width0 && height == dst_tex->base.height0)
+ usage = LP_TEX_USAGE_WRITE_ALL;
+ else
+ usage = LP_TEX_USAGE_READ_WRITE;
+
+ adjust_to_tile_bounds(dstx, dsty, width, height, &tx, &ty, &tw, &th);
+
+ for (y = 0; y < th; y += TILE_SIZE) {
+ for (x = 0; x < tw; x += TILE_SIZE) {
+ (void) llvmpipe_get_texture_tile_linear(dst_tex,
+ subdst.face, subdst.level,
+ usage,
+ tx + x, ty + y);
+ }
+ }
+ }
+
+ /* copy */
+ {
+ const ubyte *src_linear_ptr
+ = llvmpipe_get_texture_image_address(src_tex, subsrc.face,
+ subsrc.level,
+ LP_TEX_LAYOUT_LINEAR);
+ ubyte *dst_linear_ptr
+ = llvmpipe_get_texture_image_address(dst_tex, subdst.face,
+ subdst.level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ util_copy_rect(dst_linear_ptr, format,
+ llvmpipe_resource_stride(&dst_tex->base, subdst.level),
+ dstx, dsty,
+ width, height,
+ src_linear_ptr,
+ llvmpipe_resource_stride(&src_tex->base, subsrc.level),
+ srcx, srcy);
+ }
+}
+
+
+void
+llvmpipe_init_surface_functions(struct llvmpipe_context *lp)
+{
+ lp->pipe.resource_copy_region = lp_resource_copy;
+ lp->pipe.clear_render_target = util_clear_render_target;
+ lp->pipe.clear_depth_stencil = util_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_surface.h b/src/gallium/drivers/llvmpipe/lp_surface.h
new file mode 100644
index 0000000000..b1b896ebd9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_surface.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LP_SURFACE_H
+#define LP_SURFACE_H
+
+
+struct llvmpipe_context;
+
+
+extern void
+llvmpipe_init_surface_functions(struct llvmpipe_context *lp);
+
+
+#endif /* LP_SURFACE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
new file mode 100644
index 0000000000..90422e4258
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -0,0 +1,144 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_TEST_H
+#define LP_TEST_H
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include "gallivm/lp_bld.h"
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/BitWriter.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_dump.h"
+
+#include "gallivm/lp_bld_type.h"
+
+
+#define LP_TEST_NUM_SAMPLES 32
+
+
+void
+write_tsv_header(FILE *fp);
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n);
+
+boolean
+test_single(unsigned verbose, FILE *fp);
+
+boolean
+test_all(unsigned verbose, FILE *fp);
+
+
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
+static INLINE uint64_t
+rdtsc(void)
+{
+ uint32_t hi, lo;
+ __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+#else
+
+#define rdtsc() 0
+
+#endif
+
+
+
+float
+random_float(void);
+
+
+void
+dump_type(FILE *fp, struct lp_type type);
+
+
+double
+read_elem(struct lp_type type, const void *src, unsigned index);
+
+
+void
+write_elem(struct lp_type type, void *dst, unsigned index, double src);
+
+
+void
+random_elem(struct lp_type type, void *dst, unsigned index);
+
+
+void
+read_vec(struct lp_type type, const void *src, double *dst);
+
+
+void
+write_vec(struct lp_type type, void *dst, const double *src);
+
+
+void
+random_vec(struct lp_type type, void *dst);
+
+
+boolean
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps);
+
+
+boolean
+compare_vec(struct lp_type type, const void *res, const void *ref);
+
+
+void
+dump_vec(FILE *fp, struct lp_type type, const void *src);
+
+
+#endif /* !LP_TEST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
new file mode 100644
index 0000000000..0c95555655
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -0,0 +1,905 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for blend LLVM IR generation
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ *
+ * Blend computation code derived from code written by
+ * @author Brian Paul <brian@vmware.com>
+ */
+
+
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_bld_blend.h"
+#include "lp_test.h"
+
+
+enum vector_mode
+{
+ AoS = 0,
+ SoA = 1
+};
+
+
+typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
+
+/** cast wrapper */
+static blend_test_ptr_t
+voidptr_to_blend_test_ptr_t(void *p)
+{
+ union {
+ void *v;
+ blend_test_ptr_t f;
+ } u;
+ u.v = p;
+ return u.f;
+}
+
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "cycles_per_channel\t"
+ "mode\t"
+ "type\t"
+ "sep_func\t"
+ "sep_src_factor\t"
+ "sep_dst_factor\t"
+ "rgb_func\t"
+ "rgb_src_factor\t"
+ "rgb_dst_factor\t"
+ "alpha_func\t"
+ "alpha_src_factor\t"
+ "alpha_dst_factor\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ struct lp_type type,
+ double cycles,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ if (mode == AoS) {
+ fprintf(fp, "%.1f\t", cycles / type.length);
+ fprintf(fp, "aos\t");
+ }
+
+ if (mode == SoA) {
+ fprintf(fp, "%.1f\t", cycles / (4 * type.length));
+ fprintf(fp, "soa\t");
+ }
+
+ fprintf(fp, "%s%u%sx%u\t",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ "%s\t%s\t%s\t",
+ blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
+ blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
+ blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
+
+ fprintf(fp,
+ "%s\t%s\t%s\t%s\t%s\t%s\n",
+ util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
+ util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
+ util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+
+ fflush(fp);
+}
+
+
+static void
+dump_blend_type(FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ struct lp_type type)
+{
+ fprintf(fp, "%s", mode ? "soa" : "aos");
+
+ fprintf(fp, " type=%s%u%sx%u",
+ type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+
+ fprintf(fp,
+ " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
+ "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
+ "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
+ "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
+ "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
+ "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
+ "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
+
+ fprintf(fp, " ...\n");
+ fflush(fp);
+}
+
+
+static LLVMValueRef
+add_blend_test(LLVMModuleRef module,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ struct lp_type type)
+{
+ LLVMTypeRef vec_type;
+ LLVMTypeRef args[4];
+ LLVMValueRef func;
+ LLVMValueRef src_ptr;
+ LLVMValueRef dst_ptr;
+ LLVMValueRef const_ptr;
+ LLVMValueRef res_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ const unsigned rt = 0;
+
+ vec_type = lp_build_vec_type(type);
+
+ args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
+ func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ src_ptr = LLVMGetParam(func, 0);
+ dst_ptr = LLVMGetParam(func, 1);
+ const_ptr = LLVMGetParam(func, 2);
+ res_ptr = LLVMGetParam(func, 3);
+
+ block = LLVMAppendBasicBlock(func, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ if (mode == AoS) {
+ LLVMValueRef src;
+ LLVMValueRef dst;
+ LLVMValueRef con;
+ LLVMValueRef res;
+
+ src = LLVMBuildLoad(builder, src_ptr, "src");
+ dst = LLVMBuildLoad(builder, dst_ptr, "dst");
+ con = LLVMBuildLoad(builder, const_ptr, "const");
+
+ res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
+
+ lp_build_name(res, "res");
+
+ LLVMBuildStore(builder, res, res_ptr);
+ }
+
+ if (mode == SoA) {
+ LLVMValueRef src[4];
+ LLVMValueRef dst[4];
+ LLVMValueRef con[4];
+ LLVMValueRef res[4];
+ unsigned i;
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
+ dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+ con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+ lp_build_name(src[i], "src.%c", "rgba"[i]);
+ lp_build_name(con[i], "con.%c", "rgba"[i]);
+ lp_build_name(dst[i], "dst.%c", "rgba"[i]);
+ }
+
+ lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+
+ for(i = 0; i < 4; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ lp_build_name(res[i], "res.%c", "rgba"[i]);
+ LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
+ }
+ }
+
+ LLVMBuildRetVoid(builder);;
+
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+
+/** Add and limit result to ceiling of 1.0 */
+#define ADD_SAT(R, A, B) \
+do { \
+ R = (A) + (B); if (R > 1.0f) R = 1.0f; \
+} while (0)
+
+/** Subtract and limit result to floor of 0.0 */
+#define SUB_SAT(R, A, B) \
+do { \
+ R = (A) - (B); if (R < 0.0f) R = 0.0f; \
+} while (0)
+
+
+static void
+compute_blend_ref_term(unsigned rgb_factor,
+ unsigned alpha_factor,
+ const double *factor,
+ const double *src,
+ const double *dst,
+ const double *con,
+ double *term)
+{
+ double temp;
+
+ switch (rgb_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[0] = factor[0]; /* R */
+ term[1] = factor[1]; /* G */
+ term[2] = factor[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ term[0] = factor[0] * src[0]; /* R */
+ term[1] = factor[1] * src[1]; /* G */
+ term[2] = factor[2] * src[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[0] = factor[0] * src[3]; /* R */
+ term[1] = factor[1] * src[3]; /* G */
+ term[2] = factor[2] * src[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ term[0] = factor[0] * dst[0]; /* R */
+ term[1] = factor[1] * dst[1]; /* G */
+ term[2] = factor[2] * dst[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[0] = factor[0] * dst[3]; /* R */
+ term[1] = factor[1] * dst[3]; /* G */
+ term[2] = factor[2] * dst[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ temp = MIN2(src[3], 1.0f - dst[3]);
+ term[0] = factor[0] * temp; /* R */
+ term[1] = factor[1] * temp; /* G */
+ term[2] = factor[2] * temp; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ term[0] = factor[0] * con[0]; /* R */
+ term[1] = factor[1] * con[1]; /* G */
+ term[2] = factor[2] * con[2]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[0] = factor[0] * con[3]; /* R */
+ term[1] = factor[1] * con[3]; /* G */
+ term[2] = factor[2] * con[3]; /* B */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[0] = 0.0f; /* R */
+ term[1] = 0.0f; /* G */
+ term[2] = 0.0f; /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ term[0] = factor[0] * (1.0f - src[0]); /* R */
+ term[1] = factor[1] * (1.0f - src[1]); /* G */
+ term[2] = factor[2] * (1.0f - src[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[0] = factor[0] * (1.0f - src[3]); /* R */
+ term[1] = factor[1] * (1.0f - src[3]); /* G */
+ term[2] = factor[2] * (1.0f - src[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[0] = factor[0] * (1.0f - dst[3]); /* R */
+ term[1] = factor[1] * (1.0f - dst[3]); /* G */
+ term[2] = factor[2] * (1.0f - dst[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ term[0] = factor[0] * (1.0f - dst[0]); /* R */
+ term[1] = factor[1] * (1.0f - dst[1]); /* G */
+ term[2] = factor[2] * (1.0f - dst[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ term[0] = factor[0] * (1.0f - con[0]); /* R */
+ term[1] = factor[1] * (1.0f - con[1]); /* G */
+ term[2] = factor[2] * (1.0f - con[2]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[0] = factor[0] * (1.0f - con[3]); /* R */
+ term[1] = factor[1] * (1.0f - con[3]); /* G */
+ term[2] = factor[2] * (1.0f - con[3]); /* B */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ assert(0); /* to do */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ assert(0); /* to do */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Compute src/first term A
+ */
+ switch (alpha_factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ term[3] = factor[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ term[3] = factor[3] * src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ term[3] = factor[3] * dst[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ term[3] = src[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ term[3] = factor[3] * con[3]; /* A */
+ break;
+ case PIPE_BLENDFACTOR_ZERO:
+ term[3] = 0.0f; /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ term[3] = factor[3] * (1.0f - src[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ term[3] = factor[3] * (1.0f - dst[3]); /* A */
+ break;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ term[3] = factor[3] * (1.0f - con[3]);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+static void
+compute_blend_ref(const struct pipe_blend_state *blend,
+ const double *src,
+ const double *dst,
+ const double *con,
+ double *res)
+{
+ double src_term[4];
+ double dst_term[4];
+
+ compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
+ src, src, dst, con, src_term);
+ compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
+ dst, src, dst, con, dst_term);
+
+ /*
+ * Combine RGB terms
+ */
+ switch (blend->rt[0].rgb_func) {
+ case PIPE_BLEND_ADD:
+ ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
+ ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
+ ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
+ SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
+ SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
+ SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
+ SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
+ break;
+ case PIPE_BLEND_MIN:
+ res[0] = MIN2(src_term[0], dst_term[0]); /* R */
+ res[1] = MIN2(src_term[1], dst_term[1]); /* G */
+ res[2] = MIN2(src_term[2], dst_term[2]); /* B */
+ break;
+ case PIPE_BLEND_MAX:
+ res[0] = MAX2(src_term[0], dst_term[0]); /* R */
+ res[1] = MAX2(src_term[1], dst_term[1]); /* G */
+ res[2] = MAX2(src_term[2], dst_term[2]); /* B */
+ break;
+ default:
+ assert(0);
+ }
+
+ /*
+ * Combine A terms
+ */
+ switch (blend->rt[0].alpha_func) {
+ case PIPE_BLEND_ADD:
+ ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_SUBTRACT:
+ SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
+ break;
+ case PIPE_BLEND_MIN:
+ res[3] = MIN2(src_term[3], dst_term[3]); /* A */
+ break;
+ case PIPE_BLEND_MAX:
+ res[3] = MAX2(src_term[3], dst_term[3]); /* A */
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_one(unsigned verbose,
+ FILE *fp,
+ const struct pipe_blend_state *blend,
+ enum vector_mode mode,
+ struct lp_type type)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef func = NULL;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ blend_test_ptr_t blend_test_ptr;
+ boolean success;
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
+ double cycles_avg = 0.0;
+ unsigned i, j;
+ void *code;
+
+ if(verbose >= 1)
+ dump_blend_type(stdout, blend, mode, type);
+
+ module = LLVMModuleCreateWithName("test");
+
+ func = add_blend_test(module, blend, mode, type);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ if(verbose >= 2)
+ LLVMDumpModule(module);
+
+ code = LLVMGetPointerToGlobal(engine, func);
+ blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
+
+ if(verbose >= 2)
+ lp_disassemble(code);
+
+ success = TRUE;
+ for(i = 0; i < n && success; ++i) {
+ if(mode == AoS) {
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ random_vec(type, src);
+ random_vec(type, dst);
+ random_vec(type, con);
+
+ {
+ double fsrc[LP_MAX_VECTOR_LENGTH];
+ double fdst[LP_MAX_VECTOR_LENGTH];
+ double fcon[LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH];
+
+ read_vec(type, src, fsrc);
+ read_vec(type, dst, fdst);
+ read_vec(type, con, fcon);
+
+ for(j = 0; j < type.length; j += 4)
+ compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
+
+ write_vec(type, ref, fref);
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ if(!compare_vec(type, res, ref)) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+
+ fprintf(stderr, " Src: ");
+ dump_vec(stderr, type, src);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst: ");
+ dump_vec(stderr, type, dst);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con: ");
+ dump_vec(stderr, type, con);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res: ");
+ dump_vec(stderr, type, res);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref: ");
+ dump_vec(stderr, type, ref);
+ fprintf(stderr, "\n");
+ }
+ }
+
+ if(mode == SoA) {
+ const unsigned stride = type.length*type.width/8;
+ PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+ boolean mismatch;
+
+ for(j = 0; j < 4; ++j) {
+ random_vec(type, src + j*stride);
+ random_vec(type, dst + j*stride);
+ random_vec(type, con + j*stride);
+ }
+
+ {
+ double fsrc[4];
+ double fdst[4];
+ double fcon[4];
+ double fref[4];
+ unsigned k;
+
+ for(k = 0; k < type.length; ++k) {
+ for(j = 0; j < 4; ++j) {
+ fsrc[j] = read_elem(type, src + j*stride, k);
+ fdst[j] = read_elem(type, dst + j*stride, k);
+ fcon[j] = read_elem(type, con + j*stride, k);
+ }
+
+ compute_blend_ref(blend, fsrc, fdst, fcon, fref);
+
+ for(j = 0; j < 4; ++j)
+ write_elem(type, ref + j*stride, k, fref[j]);
+ }
+ }
+
+ start_counter = rdtsc();
+ blend_test_ptr(src, dst, con, res);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ mismatch = FALSE;
+ for (j = 0; j < 4; ++j)
+ if(!compare_vec(type, res + j*stride, ref + j*stride))
+ mismatch = TRUE;
+
+ if (mismatch) {
+ success = FALSE;
+
+ if(verbose < 1)
+ dump_blend_type(stderr, blend, mode, type);
+ fprintf(stderr, "MISMATCH\n");
+ for(j = 0; j < 4; ++j) {
+ char channel = "RGBA"[j];
+ fprintf(stderr, " Src%c: ", channel);
+ dump_vec(stderr, type, src + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Dst%c: ", channel);
+ dump_vec(stderr, type, dst + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Con%c: ", channel);
+ dump_vec(stderr, type, con + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Res%c: ", channel);
+ dump_vec(stderr, type, res + j*stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%c: ", channel);
+ dump_vec(stderr, type, ref + j*stride);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+ }
+
+ /*
+ * Unfortunately the output of cycle counter is not very reliable as it comes
+ * -- sometimes we get outliers (due IRQs perhaps?) which are
+ * better removed to avoid random or biased data.
+ */
+ {
+ double sum = 0.0, sum2 = 0.0;
+ double avg, std;
+ unsigned m;
+
+ for(i = 0; i < n; ++i) {
+ sum += cycles[i];
+ sum2 += cycles[i]*cycles[i];
+ }
+
+ avg = sum/n;
+ std = sqrtf((sum2 - n*avg*avg)/n);
+
+ m = 0;
+ sum = 0.0;
+ for(i = 0; i < n; ++i) {
+ if(fabs(cycles[i] - avg) <= 4.0*std) {
+ sum += cycles[i];
+ ++m;
+ }
+ }
+
+ cycles_avg = sum/m;
+
+ }
+
+ if(fp)
+ write_tsv_row(fp, blend, mode, type, cycles_avg, success);
+
+ if (!success) {
+ if(verbose < 2)
+ LLVMDumpModule(module);
+ LLVMWriteBitcodeToFile(module, "blend.bc");
+ fprintf(stderr, "blend.bc written\n");
+ fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
+ abort();
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, func);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+
+const unsigned
+blend_factors[] = {
+ PIPE_BLENDFACTOR_ZERO,
+ PIPE_BLENDFACTOR_ONE,
+ PIPE_BLENDFACTOR_SRC_COLOR,
+ PIPE_BLENDFACTOR_SRC_ALPHA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_CONST_COLOR,
+ PIPE_BLENDFACTOR_CONST_ALPHA,
+#if 0
+ PIPE_BLENDFACTOR_SRC1_COLOR,
+ PIPE_BLENDFACTOR_SRC1_ALPHA,
+#endif
+ PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
+ PIPE_BLENDFACTOR_INV_SRC_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC_ALPHA,
+ PIPE_BLENDFACTOR_INV_DST_COLOR,
+ PIPE_BLENDFACTOR_INV_DST_ALPHA,
+ PIPE_BLENDFACTOR_INV_CONST_COLOR,
+ PIPE_BLENDFACTOR_INV_CONST_ALPHA,
+#if 0
+ PIPE_BLENDFACTOR_INV_SRC1_COLOR,
+ PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
+#endif
+};
+
+
+const unsigned
+blend_funcs[] = {
+ PIPE_BLEND_ADD,
+ PIPE_BLEND_SUBTRACT,
+ PIPE_BLEND_REVERSE_SUBTRACT,
+ PIPE_BLEND_MIN,
+ PIPE_BLEND_MAX
+};
+
+
+const struct lp_type blend_types[] = {
+ /* float, fixed, sign, norm, width, len */
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
+};
+
+
+const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
+const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
+const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ enum vector_mode mode;
+ const struct lp_type *type;
+ boolean success = TRUE;
+
+ for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
+ for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
+ for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
+ for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
+ for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
+ for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
+ for(mode = 0; mode < 2; ++mode) {
+ for(type = blend_types; type < &blend_types[num_types]; ++type) {
+
+ if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+ continue;
+
+ memset(&blend, 0, sizeof blend);
+ blend.rt[0].blend_enable = 1;
+ blend.rt[0].rgb_func = *rgb_func;
+ blend.rt[0].rgb_src_factor = *rgb_src_factor;
+ blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
+ blend.rt[0].alpha_func = *alpha_func;
+ blend.rt[0].alpha_src_factor = *alpha_src_factor;
+ blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ const unsigned *rgb_func;
+ const unsigned *rgb_src_factor;
+ const unsigned *rgb_dst_factor;
+ const unsigned *alpha_func;
+ const unsigned *alpha_src_factor;
+ const unsigned *alpha_dst_factor;
+ struct pipe_blend_state blend;
+ enum vector_mode mode;
+ const struct lp_type *type;
+ unsigned long i;
+ boolean success = TRUE;
+
+ for(i = 0; i < n; ++i) {
+ rgb_func = &blend_funcs[rand() % num_funcs];
+ alpha_func = &blend_funcs[rand() % num_funcs];
+ rgb_src_factor = &blend_factors[rand() % num_factors];
+ alpha_src_factor = &blend_factors[rand() % num_factors];
+
+ do {
+ rgb_dst_factor = &blend_factors[rand() % num_factors];
+ } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ do {
+ alpha_dst_factor = &blend_factors[rand() % num_factors];
+ } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
+
+ mode = rand() & 1;
+
+ type = &blend_types[rand() % num_types];
+
+ memset(&blend, 0, sizeof blend);
+ blend.rt[0].blend_enable = 1;
+ blend.rt[0].rgb_func = *rgb_func;
+ blend.rt[0].rgb_src_factor = *rgb_src_factor;
+ blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
+ blend.rt[0].alpha_func = *alpha_func;
+ blend.rt[0].alpha_src_factor = *alpha_src_factor;
+ blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
+ blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ if(!test_one(verbose, fp, &blend, mode, *type))
+ success = FALSE;
+ }
+
+ return success;
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
new file mode 100644
index 0000000000..9b02f436c5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -0,0 +1,453 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Unit tests for type conversion.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_conv.h"
+#include "gallivm/lp_bld_debug.h"
+#include "lp_test.h"
+
+
+typedef void (*conv_test_ptr_t)(const void *src, const void *dst);
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "cycles_per_channel\t"
+ "src_type\t"
+ "dst_type\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ double cycles,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%.1f\t", cycles / MAX2(src_type.length, dst_type.length));
+
+ dump_type(fp, src_type);
+ fprintf(fp, "\t");
+
+ dump_type(fp, dst_type);
+ fprintf(fp, "\n");
+
+ fflush(fp);
+}
+
+
+static void
+dump_conv_types(FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type)
+{
+ fprintf(fp, "src_type=");
+ dump_type(fp, src_type);
+
+ fprintf(fp, " dst_type=");
+ dump_type(fp, dst_type);
+
+ fprintf(fp, " ...\n");
+ fflush(fp);
+}
+
+
+static LLVMValueRef
+add_conv_test(LLVMModuleRef module,
+ struct lp_type src_type, unsigned num_srcs,
+ struct lp_type dst_type, unsigned num_dsts)
+{
+ LLVMTypeRef args[2];
+ LLVMValueRef func;
+ LLVMValueRef src_ptr;
+ LLVMValueRef dst_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ args[0] = LLVMPointerType(lp_build_vec_type(src_type), 0);
+ args[1] = LLVMPointerType(lp_build_vec_type(dst_type), 0);
+
+ func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ src_ptr = LLVMGetParam(func, 0);
+ dst_ptr = LLVMGetParam(func, 1);
+
+ block = LLVMAppendBasicBlock(func, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ for(i = 0; i < num_srcs; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
+ src[i] = LLVMBuildLoad(builder, ptr, "");
+ }
+
+ lp_build_conv(builder, src_type, dst_type, src, num_srcs, dst, num_dsts);
+
+ for(i = 0; i < num_dsts; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
+ LLVMBuildStore(builder, dst[i], ptr);
+ }
+
+ LLVMBuildRetVoid(builder);;
+
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_one(unsigned verbose,
+ FILE *fp,
+ struct lp_type src_type,
+ struct lp_type dst_type)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef func = NULL;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ conv_test_ptr_t conv_test_ptr;
+ boolean success;
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
+ double cycles_avg = 0.0;
+ unsigned num_srcs;
+ unsigned num_dsts;
+ double eps;
+ unsigned i, j;
+ void *code;
+
+ if(verbose >= 1)
+ dump_conv_types(stdout, src_type, dst_type);
+
+ if(src_type.length > dst_type.length) {
+ num_srcs = 1;
+ num_dsts = src_type.length/dst_type.length;
+ }
+ else {
+ num_dsts = 1;
+ num_srcs = dst_type.length/src_type.length;
+ }
+
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ eps = MAX2(lp_const_eps(src_type), lp_const_eps(dst_type));
+
+ module = LLVMModuleCreateWithName("test");
+
+ func = add_conv_test(module, src_type, num_srcs, dst_type, num_dsts);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ if(verbose < 1)
+ dump_conv_types(stderr, src_type, dst_type);
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ if(verbose >= 2)
+ LLVMDumpModule(module);
+
+ code = LLVMGetPointerToGlobal(engine, func);
+ conv_test_ptr = (conv_test_ptr_t)pointer_to_func(code);
+
+ if(verbose >= 2)
+ lp_disassemble(code);
+
+ success = TRUE;
+ for(i = 0; i < n && success; ++i) {
+ unsigned src_stride = src_type.length*src_type.width/8;
+ unsigned dst_stride = dst_type.length*dst_type.width/8;
+ PIPE_ALIGN_VAR(16) uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ PIPE_ALIGN_VAR(16) uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ int64_t start_counter = 0;
+ int64_t end_counter = 0;
+
+ for(j = 0; j < num_srcs; ++j) {
+ random_vec(src_type, src + j*src_stride);
+ read_vec(src_type, src + j*src_stride, fref + j*src_type.length);
+ }
+
+ for(j = 0; j < num_dsts; ++j) {
+ write_vec(dst_type, ref + j*dst_stride, fref + j*dst_type.length);
+ }
+
+ start_counter = rdtsc();
+ conv_test_ptr(src, dst);
+ end_counter = rdtsc();
+
+ cycles[i] = end_counter - start_counter;
+
+ for(j = 0; j < num_dsts; ++j) {
+ if(!compare_vec_with_eps(dst_type, dst + j*dst_stride, ref + j*dst_stride, eps))
+ success = FALSE;
+ }
+
+ if (!success || verbose >= 3) {
+ if(verbose < 1)
+ dump_conv_types(stderr, src_type, dst_type);
+ if (success) {
+ fprintf(stderr, "PASS\n");
+ }
+ else {
+ fprintf(stderr, "MISMATCH\n");
+ }
+
+ for(j = 0; j < num_srcs; ++j) {
+ fprintf(stderr, " Src%u: ", j);
+ dump_vec(stderr, src_type, src + j*src_stride);
+ fprintf(stderr, "\n");
+ }
+
+#if 1
+ fprintf(stderr, " Ref: ");
+ for(j = 0; j < src_type.length*num_srcs; ++j)
+ fprintf(stderr, " %f", fref[j]);
+ fprintf(stderr, "\n");
+#endif
+
+ for(j = 0; j < num_dsts; ++j) {
+ fprintf(stderr, " Dst%u: ", j);
+ dump_vec(stderr, dst_type, dst + j*dst_stride);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, " Ref%u: ", j);
+ dump_vec(stderr, dst_type, ref + j*dst_stride);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ /*
+ * Unfortunately the output of cycle counter is not very reliable as it comes
+ * -- sometimes we get outliers (due IRQs perhaps?) which are
+ * better removed to avoid random or biased data.
+ */
+ {
+ double sum = 0.0, sum2 = 0.0;
+ double avg, std;
+ unsigned m;
+
+ for(i = 0; i < n; ++i) {
+ sum += cycles[i];
+ sum2 += cycles[i]*cycles[i];
+ }
+
+ avg = sum/n;
+ std = sqrtf((sum2 - n*avg*avg)/n);
+
+ m = 0;
+ sum = 0.0;
+ for(i = 0; i < n; ++i) {
+ if(fabs(cycles[i] - avg) <= 4.0*std) {
+ sum += cycles[i];
+ ++m;
+ }
+ }
+
+ cycles_avg = sum/m;
+
+ }
+
+ if(fp)
+ write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
+
+ if (!success) {
+ static boolean firsttime = TRUE;
+ if(firsttime) {
+ if(verbose < 2)
+ LLVMDumpModule(module);
+ LLVMWriteBitcodeToFile(module, "conv.bc");
+ fprintf(stderr, "conv.bc written\n");
+ fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
+ firsttime = FALSE;
+ /* abort(); */
+ }
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, func);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+
+const struct lp_type conv_types[] = {
+ /* float, fixed, sign, norm, width, len */
+
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 },
+ { TRUE, FALSE, TRUE, FALSE, 32, 4 },
+ { TRUE, FALSE, FALSE, TRUE, 32, 4 },
+ { TRUE, FALSE, FALSE, FALSE, 32, 4 },
+
+ /* TODO: test fixed formats too */
+
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 32, 4 },
+ { FALSE, FALSE, TRUE, FALSE, 32, 4 },
+ { FALSE, FALSE, FALSE, TRUE, 32, 4 },
+ { FALSE, FALSE, FALSE, FALSE, 32, 4 },
+
+ { FALSE, FALSE, TRUE, TRUE, 16, 8 },
+ { FALSE, FALSE, TRUE, FALSE, 16, 8 },
+ { FALSE, FALSE, FALSE, TRUE, 16, 8 },
+ { FALSE, FALSE, FALSE, FALSE, 16, 8 },
+
+ { FALSE, FALSE, TRUE, TRUE, 8, 16 },
+ { FALSE, FALSE, TRUE, FALSE, 8, 16 },
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 },
+ { FALSE, FALSE, FALSE, FALSE, 8, 16 },
+};
+
+
+const unsigned num_types = sizeof(conv_types)/sizeof(conv_types[0]);
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
+ boolean success = TRUE;
+
+ for(src_type = conv_types; src_type < &conv_types[num_types]; ++src_type) {
+ for(dst_type = conv_types; dst_type < &conv_types[num_types]; ++dst_type) {
+
+ if(src_type == dst_type)
+ continue;
+
+ if(src_type->norm != dst_type->norm)
+ continue;
+
+ if(!test_one(verbose, fp, *src_type, *dst_type))
+ success = FALSE;
+
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ const struct lp_type *src_type;
+ const struct lp_type *dst_type;
+ unsigned long i;
+ boolean success = TRUE;
+
+ for(i = 0; i < n; ++i) {
+ src_type = &conv_types[rand() % num_types];
+
+ do {
+ dst_type = &conv_types[rand() % num_types];
+ } while (src_type == dst_type || src_type->norm != dst_type->norm);
+
+ if(!test_one(verbose, fp, *src_type, *dst_type))
+ success = FALSE;
+ }
+
+ return success;
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ /* float, fixed, sign, norm, width, len */
+ struct lp_type f32x4_type =
+ { TRUE, FALSE, TRUE, TRUE, 32, 4 };
+ struct lp_type ub8x4_type =
+ { FALSE, FALSE, FALSE, TRUE, 8, 16 };
+
+ boolean success;
+
+ success = test_one(verbose, fp, f32x4_type, ub8x4_type);
+
+ return success;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
new file mode 100644
index 0000000000..8b6dc1c7f5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -0,0 +1,279 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
+#include <llvm-c/Analysis.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "util/u_memory.h"
+#include "util/u_pointer.h"
+#include "util/u_format.h"
+#include "util/u_format_tests.h"
+#include "util/u_format_s3tc.h"
+
+#include "gallivm/lp_bld_format.h"
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct util_format_description *desc,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%s\n", desc->name);
+
+ fflush(fp);
+}
+
+
+typedef void
+(*fetch_ptr_t)(float *, const void *packed,
+ unsigned i, unsigned j);
+
+
+static LLVMValueRef
+add_fetch_rgba_test(LLVMModuleRef lp_build_module,
+ const struct util_format_description *desc)
+{
+ LLVMTypeRef args[4];
+ LLVMValueRef func;
+ LLVMValueRef packed_ptr;
+ LLVMValueRef rgba_ptr;
+ LLVMValueRef i;
+ LLVMValueRef j;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef rgba;
+
+ args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+ args[1] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[3] = args[2] = LLVMInt32Type();
+
+ func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+ rgba_ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 1);
+ i = LLVMGetParam(func, 2);
+ j = LLVMGetParam(func, 3);
+
+ block = LLVMAppendBasicBlock(func, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
+
+ LLVMBuildStore(builder, rgba, rgba_ptr);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_format(unsigned verbose, FILE *fp,
+ const struct util_format_description *desc,
+ const struct util_format_test_case *test)
+{
+ LLVMValueRef fetch = NULL;
+ LLVMPassManagerRef pass = NULL;
+ fetch_ptr_t fetch_ptr;
+ PIPE_ALIGN_VAR(16) float unpacked[4];
+ boolean success;
+ unsigned i, j, k;
+
+ fetch = add_fetch_rgba_test(lp_build_module, desc);
+
+ if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
+ LLVMDumpValue(fetch);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, lp_build_module);
+#else
+ (void)pass;
+#endif
+
+ fetch_ptr = (fetch_ptr_t)pointer_to_func(LLVMGetPointerToGlobal(lp_build_engine, fetch));
+
+ for (i = 0; i < desc->block.height; ++i) {
+ for (j = 0; j < desc->block.width; ++j) {
+
+ memset(unpacked, 0, sizeof unpacked);
+
+ fetch_ptr(unpacked, test->packed, j, i);
+
+ success = TRUE;
+ for(k = 0; k < 4; ++k)
+ if (fabs((float)test->unpacked[i][j][k] - unpacked[k]) > FLT_EPSILON)
+ success = FALSE;
+
+ if (!success) {
+ printf("FAILED\n");
+ printf(" Packed: %02x %02x %02x %02x\n",
+ test->packed[0], test->packed[1], test->packed[2], test->packed[3]);
+ printf(" Unpacked (%u,%u): %f %f %f %f obtained\n",
+ j, i,
+ unpacked[0], unpacked[1], unpacked[2], unpacked[3]);
+ printf(" %f %f %f %f expected\n",
+ test->unpacked[i][j][0],
+ test->unpacked[i][j][1],
+ test->unpacked[i][j][2],
+ test->unpacked[i][j][3]);
+ }
+ }
+ }
+
+ if (!success)
+ LLVMDumpValue(fetch);
+
+ LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMDeleteFunction(fetch);
+
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
+ return success;
+}
+
+
+
+static boolean
+test_one(unsigned verbose, FILE *fp,
+ const struct util_format_description *format_desc)
+{
+ unsigned i;
+ boolean first = TRUE;
+ boolean success = TRUE;
+
+ for (i = 0; i < util_format_nr_test_cases; ++i) {
+ const struct util_format_test_case *test = &util_format_test_cases[i];
+
+ if (test->format == format_desc->format) {
+
+ if (first) {
+ printf("Testing %s ...\n",
+ format_desc->name);
+ first = FALSE;
+ }
+
+ if (!test_format(verbose, fp, format_desc, test)) {
+ success = FALSE;
+ }
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ enum pipe_format format;
+ boolean success = TRUE;
+
+ util_format_s3tc_init();
+
+ for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if (!format_desc) {
+ continue;
+ }
+
+ /*
+ * TODO: test more
+ */
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ continue;
+ }
+
+ if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
+ !util_format_s3tc_enabled) {
+ continue;
+ }
+
+ if (!test_one(verbose, fp, format_desc)) {
+ success = FALSE;
+ }
+ }
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
new file mode 100644
index 0000000000..7bbbc61d4c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -0,0 +1,408 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Shared testing code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_cpu_detect.h"
+
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_init.h"
+#include "lp_test.h"
+
+
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+ if (x >= 0.0)
+ return floor(x + 0.5);
+ else
+ return ceil(x - 0.5);
+}
+#endif
+
+
+void
+dump_type(FILE *fp,
+ struct lp_type type)
+{
+ fprintf(fp, "%s%s%u%sx%u",
+ type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
+ type.floating ? "f" : (type.fixed ? "h" : "i"),
+ type.width,
+ type.norm ? "n" : "",
+ type.length);
+}
+
+
+double
+read_elem(struct lp_type type, const void *src, unsigned index)
+{
+ double scale = lp_const_scale(type);
+ double value;
+ assert(index < type.length);
+ if (type.floating) {
+ switch(type.width) {
+ case 32:
+ value = *((const float *)src + index);
+ break;
+ case 64:
+ value = *((const double *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ else {
+ if(type.sign) {
+ switch(type.width) {
+ case 8:
+ value = *((const int8_t *)src + index);
+ break;
+ case 16:
+ value = *((const int16_t *)src + index);
+ break;
+ case 32:
+ value = *((const int32_t *)src + index);
+ break;
+ case 64:
+ value = *((const int64_t *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ else {
+ switch(type.width) {
+ case 8:
+ value = *((const uint8_t *)src + index);
+ break;
+ case 16:
+ value = *((const uint16_t *)src + index);
+ break;
+ case 32:
+ value = *((const uint32_t *)src + index);
+ break;
+ case 64:
+ value = *((const uint64_t *)src + index);
+ break;
+ default:
+ assert(0);
+ return 0.0;
+ }
+ }
+ }
+ return value/scale;
+}
+
+
+void
+write_elem(struct lp_type type, void *dst, unsigned index, double value)
+{
+ assert(index < type.length);
+ if(!type.sign && value < 0.0)
+ value = 0.0;
+ if(type.norm && value < -1.0)
+ value = -1.0;
+ if(type.norm && value > 1.0)
+ value = 1.0;
+ if (type.floating) {
+ switch(type.width) {
+ case 32:
+ *((float *)dst + index) = (float)(value);
+ break;
+ case 64:
+ *((double *)dst + index) = value;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ double scale = lp_const_scale(type);
+ value = round(value*scale);
+ if(type.sign) {
+ long long lvalue = (long long)value;
+ lvalue = MIN2(lvalue, ((long long)1 << (type.width - 1)) - 1);
+ switch(type.width) {
+ case 8:
+ *((int8_t *)dst + index) = (int8_t)lvalue;
+ break;
+ case 16:
+ *((int16_t *)dst + index) = (int16_t)lvalue;
+ break;
+ case 32:
+ *((int32_t *)dst + index) = (int32_t)lvalue;
+ break;
+ case 64:
+ *((int64_t *)dst + index) = (int64_t)lvalue;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ unsigned long long lvalue = (long long)value;
+ lvalue = MIN2(lvalue, ((unsigned long long)1 << type.width) - 1);
+ switch(type.width) {
+ case 8:
+ *((uint8_t *)dst + index) = (uint8_t)lvalue;
+ break;
+ case 16:
+ *((uint16_t *)dst + index) = (uint16_t)lvalue;
+ break;
+ case 32:
+ *((uint32_t *)dst + index) = (uint32_t)lvalue;
+ break;
+ case 64:
+ *((uint64_t *)dst + index) = (uint64_t)lvalue;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+}
+
+
+void
+random_elem(struct lp_type type, void *dst, unsigned index)
+{
+ double value;
+ assert(index < type.length);
+ value = (double)rand()/(double)RAND_MAX;
+ if(!type.norm) {
+ unsigned long long mask;
+ if (type.floating)
+ mask = ~(unsigned long long)0;
+ else if (type.fixed)
+ mask = ((unsigned long long)1 << (type.width / 2)) - 1;
+ else if (type.sign)
+ mask = ((unsigned long long)1 << (type.width - 1)) - 1;
+ else
+ mask = ((unsigned long long)1 << type.width) - 1;
+ value += (double)(mask & rand());
+ }
+ if(!type.sign)
+ if(rand() & 1)
+ value = -value;
+ write_elem(type, dst, index, value);
+}
+
+
+void
+read_vec(struct lp_type type, const void *src, double *dst)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ dst[i] = read_elem(type, src, i);
+}
+
+
+void
+write_vec(struct lp_type type, void *dst, const double *src)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ write_elem(type, dst, i, src[i]);
+}
+
+
+float
+random_float(void)
+{
+ return (float)((double)rand()/(double)RAND_MAX);
+}
+
+
+void
+random_vec(struct lp_type type, void *dst)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i)
+ random_elem(type, dst, i);
+}
+
+
+boolean
+compare_vec_with_eps(struct lp_type type, const void *res, const void *ref, double eps)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i) {
+ double res_elem = read_elem(type, res, i);
+ double ref_elem = read_elem(type, ref, i);
+ double delta = fabs(res_elem - ref_elem);
+ if(delta >= 2.0*eps)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+boolean
+compare_vec(struct lp_type type, const void *res, const void *ref)
+{
+ double eps = lp_const_eps(type);
+ return compare_vec_with_eps(type, res, ref, eps);
+}
+
+
+void
+dump_vec(FILE *fp, struct lp_type type, const void *src)
+{
+ unsigned i;
+ for (i = 0; i < type.length; ++i) {
+ if(i)
+ fprintf(fp, " ");
+ if (type.floating) {
+ double value;
+ switch(type.width) {
+ case 32:
+ value = *((const float *)src + i);
+ break;
+ case 64:
+ value = *((const double *)src + i);
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ }
+ fprintf(fp, "%f", value);
+ }
+ else {
+ if(type.sign && !type.norm) {
+ long long value;
+ const char *format;
+ switch(type.width) {
+ case 8:
+ value = *((const int8_t *)src + i);
+ format = "%3lli";
+ break;
+ case 16:
+ value = *((const int16_t *)src + i);
+ format = "%5lli";
+ break;
+ case 32:
+ value = *((const int32_t *)src + i);
+ format = "%10lli";
+ break;
+ case 64:
+ value = *((const int64_t *)src + i);
+ format = "%20lli";
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ format = "?";
+ }
+ fprintf(fp, format, value);
+ }
+ else {
+ unsigned long long value;
+ const char *format;
+ switch(type.width) {
+ case 8:
+ value = *((const uint8_t *)src + i);
+ format = type.norm ? "%2x" : "%4llu";
+ break;
+ case 16:
+ value = *((const uint16_t *)src + i);
+ format = type.norm ? "%4x" : "%6llx";
+ break;
+ case 32:
+ value = *((const uint32_t *)src + i);
+ format = type.norm ? "%8x" : "%11llx";
+ break;
+ case 64:
+ value = *((const uint64_t *)src + i);
+ format = type.norm ? "%16x" : "%21llx";
+ break;
+ default:
+ assert(0);
+ value = 0.0;
+ format = "?";
+ }
+ fprintf(fp, format, value);
+ }
+ }
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ unsigned verbose = 0;
+ FILE *fp = NULL;
+ unsigned long n = 1000;
+ unsigned i;
+ boolean success;
+ boolean single = FALSE;
+
+ for(i = 1; i < argc; ++i) {
+ if(strcmp(argv[i], "-v") == 0)
+ ++verbose;
+ else if(strcmp(argv[i], "-s") == 0)
+ single = TRUE;
+ else if(strcmp(argv[i], "-o") == 0)
+ fp = fopen(argv[++i], "wt");
+ else
+ n = atoi(argv[i]);
+ }
+
+ lp_build_init();
+
+ util_cpu_detect();
+
+ if(fp) {
+ /* Warm up the caches */
+ test_some(0, NULL, 100);
+
+ write_tsv_header(fp);
+ }
+
+ if (single)
+ success = test_single(verbose, fp);
+ else if (n)
+ success = test_some(verbose, fp, n);
+ else
+ success = test_all(verbose, fp);
+
+ if(fp)
+ fclose(fp);
+
+ return success ? 0 : 1;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_printf.c b/src/gallium/drivers/llvmpipe/lp_test_printf.c
new file mode 100644
index 0000000000..21df83f9d8
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_printf.c
@@ -0,0 +1,175 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "util/u_pointer.h"
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_printf.h"
+
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_test.h"
+
+
+struct printf_test_case {
+ int foo;
+};
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+
+typedef void (*test_printf_t)(int i);
+
+
+static LLVMValueRef
+add_printf_test(LLVMModuleRef module)
+{
+ LLVMTypeRef args[1] = { LLVMIntType(32) };
+ LLVMValueRef func = LLVMAddFunction(module, "test_printf", LLVMFunctionType(LLVMVoidType(), args, 1, 0));
+ LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+ lp_build_printf(builder, "hello, world\n");
+ lp_build_printf(builder, "print 5 6: %d %d\n", LLVMConstInt(LLVMInt32Type(), 5, 0),
+ LLVMConstInt(LLVMInt32Type(), 6, 0));
+ LLVMBuildRetVoid(builder);
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+
+PIPE_ALIGN_STACK
+static boolean
+test_printf(unsigned verbose, FILE *fp, const struct printf_test_case *testcase)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef test = NULL;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ test_printf_t test_printf;
+ float unpacked[4];
+ unsigned packed;
+ boolean success = TRUE;
+ void *code;
+
+ module = LLVMModuleCreateWithName("test");
+
+ test = add_printf_test(module);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ code = LLVMGetPointerToGlobal(engine, test);
+ test_printf = (test_printf_t)pointer_to_func(code);
+
+ memset(unpacked, 0, sizeof unpacked);
+ packed = 0;
+
+
+ // LLVMDumpModule(module);
+
+ test_printf(0);
+
+ LLVMFreeMachineCodeForFunction(engine, test);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+
+ test_printf(verbose, fp, NULL);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_sincos.c b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
new file mode 100644
index 0000000000..c7a903a025
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_test_sincos.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_arit.h"
+
+#include <llvm-c/Analysis.h>
+#include <llvm-c/ExecutionEngine.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/Scalar.h>
+
+#include "lp_test.h"
+
+
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+#ifdef PIPE_ARCH_SSE
+
+#define USE_SSE2
+#include "sse_mathfun.h"
+
+typedef __m128 (*test_sincos_t)(__m128);
+
+static LLVMValueRef
+add_sincos_test(LLVMModuleRef module, boolean sin)
+{
+ LLVMTypeRef v4sf = LLVMVectorType(LLVMFloatType(), 4);
+ LLVMTypeRef args[1] = { v4sf };
+ LLVMValueRef func = LLVMAddFunction(module, "sincos", LLVMFunctionType(v4sf, args, 1, 0));
+ LLVMValueRef arg1 = LLVMGetParam(func, 0);
+ LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry");
+ LLVMValueRef ret;
+ struct lp_build_context bld;
+
+ bld.builder = builder;
+ bld.type.floating = 1;
+ bld.type.width = 32;
+ bld.type.length = 4;
+
+ LLVMSetFunctionCallConv(func, LLVMCCallConv);
+
+ LLVMPositionBuilderAtEnd(builder, block);
+ ret = sin ? lp_build_sin(&bld, arg1) : lp_build_cos(&bld, arg1);
+ LLVMBuildRet(builder, ret);
+ LLVMDisposeBuilder(builder);
+ return func;
+}
+
+static void
+printv(char* string, v4sf value)
+{
+ v4sf v = value;
+ uint32_t *p = (uint32_t *) &v;
+ float *f = (float *)&v;
+ printf("%s: %f(%x) %f(%x) %f(%x) %f(%x)\n", string,
+ f[0], p[0], f[1], p[1], f[2], p[2], f[3], p[3]);
+}
+
+PIPE_ALIGN_STACK
+static boolean
+test_sincos(unsigned verbose, FILE *fp)
+{
+ LLVMModuleRef module = NULL;
+ LLVMValueRef test_sin = NULL, test_cos = NULL;
+ LLVMExecutionEngineRef engine = NULL;
+ LLVMModuleProviderRef provider = NULL;
+ LLVMPassManagerRef pass = NULL;
+ char *error = NULL;
+ test_sincos_t sin_func;
+ test_sincos_t cos_func;
+ float unpacked[4];
+ unsigned packed;
+ boolean success = TRUE;
+
+ module = LLVMModuleCreateWithName("test");
+
+ test_sin = add_sincos_test(module, TRUE);
+ test_cos = add_sincos_test(module, FALSE);
+
+ if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
+ printf("LLVMVerifyModule: %s\n", error);
+ LLVMDumpModule(module);
+ abort();
+ }
+ LLVMDisposeMessage(error);
+
+ provider = LLVMCreateModuleProviderForExistingModule(module);
+ if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
+ fprintf(stderr, "%s\n", error);
+ LLVMDisposeMessage(error);
+ abort();
+ }
+
+#if 0
+ pass = LLVMCreatePassManager();
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
+ * but there are more on SVN. */
+ LLVMAddConstantPropagationPass(pass);
+ LLVMAddInstructionCombiningPass(pass);
+ LLVMAddPromoteMemoryToRegisterPass(pass);
+ LLVMAddGVNPass(pass);
+ LLVMAddCFGSimplificationPass(pass);
+ LLVMRunPassManager(pass, module);
+#else
+ (void)pass;
+#endif
+
+ sin_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_sin);
+ cos_func = (test_sincos_t)LLVMGetPointerToGlobal(engine, test_cos);
+
+ memset(unpacked, 0, sizeof unpacked);
+ packed = 0;
+
+
+ // LLVMDumpModule(module);
+ {
+ v4sf src = {3.14159/4.0, -3.14159/4.0, 1.0, -1.0};
+ printv("ref ",sin_ps(src));
+ printv("llvm", sin_func(src));
+ printv("ref ",cos_ps(src));
+ printv("llvm",cos_func(src));
+ }
+
+ LLVMFreeMachineCodeForFunction(engine, test_sin);
+ LLVMFreeMachineCodeForFunction(engine, test_cos);
+
+ LLVMDisposeExecutionEngine(engine);
+ if(pass)
+ LLVMDisposePassManager(pass);
+
+ return success;
+}
+
+#else /* !PIPE_ARCH_SSE */
+
+static boolean
+test_sincos(unsigned verbose, FILE *fp)
+{
+ return TRUE;
+}
+
+#endif /* !PIPE_ARCH_SSE */
+
+
+boolean
+test_all(unsigned verbose, FILE *fp)
+{
+ boolean success = TRUE;
+
+ test_sincos(verbose, fp);
+
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
+}
+
+boolean
+test_single(unsigned verbose, FILE *fp)
+{
+ printf("no test_single()");
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
new file mode 100644
index 0000000000..65208dd5d5
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -0,0 +1,219 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Texture sampling code generation
+ *
+ * This file is nothing more than ugly glue between three largely independent
+ * entities:
+ * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa)
+ * - texture sampling code generation (i.e., lp_build_sample_soa)
+ * - LLVM pipe driver
+ *
+ * All interesting code is in the functions mentioned above. There is really
+ * nothing to see here.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_type.h"
+#include "gallivm/lp_bld_sample.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "lp_jit.h"
+#include "lp_tex_sample.h"
+
+
+/**
+ * This provides the bridge between the sampler state store in
+ * lp_jit_context and lp_jit_texture and the sampler code
+ * generator. It provides the texture layout information required by
+ * the texture sampler code generator in terms of the state stored in
+ * lp_jit_context and lp_jit_texture in runtime.
+ */
+struct llvmpipe_sampler_dynamic_state
+{
+ struct lp_sampler_dynamic_state base;
+
+ const struct lp_sampler_static_state *static_state;
+
+ LLVMValueRef context_ptr;
+};
+
+
+/**
+ * This is the bridge between our sampler and the TGSI translator.
+ */
+struct lp_llvm_sampler_soa
+{
+ struct lp_build_sampler_soa base;
+
+ struct llvmpipe_sampler_dynamic_state dynamic_state;
+};
+
+
+/**
+ * Fetch the specified member of the lp_jit_texture structure.
+ * \param emit_load if TRUE, emit the LLVM load instruction to actually
+ * fetch the field's value. Otherwise, just emit the
+ * GEP code to address the field.
+ *
+ * @sa http://llvm.org/docs/GetElementPtr.html
+ */
+static LLVMValueRef
+lp_llvm_texture_member(const struct lp_sampler_dynamic_state *base,
+ LLVMBuilderRef builder,
+ unsigned unit,
+ unsigned member_index,
+ const char *member_name,
+ boolean emit_load)
+{
+ struct llvmpipe_sampler_dynamic_state *state =
+ (struct llvmpipe_sampler_dynamic_state *)base;
+ LLVMValueRef indices[4];
+ LLVMValueRef ptr;
+ LLVMValueRef res;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ /* context[0] */
+ indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ /* context[0].textures */
+ indices[1] = LLVMConstInt(LLVMInt32Type(), LP_JIT_CTX_TEXTURES, 0);
+ /* context[0].textures[unit] */
+ indices[2] = LLVMConstInt(LLVMInt32Type(), unit, 0);
+ /* context[0].textures[unit].member */
+ indices[3] = LLVMConstInt(LLVMInt32Type(), member_index, 0);
+
+ ptr = LLVMBuildGEP(builder, state->context_ptr, indices, Elements(indices), "");
+
+ if (emit_load)
+ res = LLVMBuildLoad(builder, ptr, "");
+ else
+ res = ptr;
+
+ lp_build_name(res, "context.texture%u.%s", unit, member_name);
+
+ return res;
+}
+
+
+/**
+ * Helper macro to instantiate the functions that generate the code to
+ * fetch the members of lp_jit_texture to fulfill the sampler code
+ * generator requests.
+ *
+ * This complexity is the price we have to pay to keep the texture
+ * sampler code generator a reusable module without dependencies to
+ * llvmpipe internals.
+ */
+#define LP_LLVM_TEXTURE_MEMBER(_name, _index, _emit_load) \
+ static LLVMValueRef \
+ lp_llvm_texture_##_name( const struct lp_sampler_dynamic_state *base, \
+ LLVMBuilderRef builder, \
+ unsigned unit) \
+ { \
+ return lp_llvm_texture_member(base, builder, unit, _index, #_name, _emit_load ); \
+ }
+
+
+LP_LLVM_TEXTURE_MEMBER(width, LP_JIT_TEXTURE_WIDTH, TRUE)
+LP_LLVM_TEXTURE_MEMBER(height, LP_JIT_TEXTURE_HEIGHT, TRUE)
+LP_LLVM_TEXTURE_MEMBER(depth, LP_JIT_TEXTURE_DEPTH, TRUE)
+LP_LLVM_TEXTURE_MEMBER(last_level, LP_JIT_TEXTURE_LAST_LEVEL, TRUE)
+LP_LLVM_TEXTURE_MEMBER(row_stride, LP_JIT_TEXTURE_ROW_STRIDE, FALSE)
+LP_LLVM_TEXTURE_MEMBER(img_stride, LP_JIT_TEXTURE_IMG_STRIDE, FALSE)
+LP_LLVM_TEXTURE_MEMBER(data_ptr, LP_JIT_TEXTURE_DATA, FALSE)
+
+
+static void
+lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
+{
+ FREE(sampler);
+}
+
+
+/**
+ * Fetch filtered values from texture.
+ * The 'texel' parameter returns four vectors corresponding to R, G, B, A.
+ */
+static void
+lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
+ LLVMBuilderRef builder,
+ struct lp_type type,
+ unsigned unit,
+ unsigned num_coords,
+ const LLVMValueRef *coords,
+ const LLVMValueRef *ddx,
+ const LLVMValueRef *ddy,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef *texel)
+{
+ struct lp_llvm_sampler_soa *sampler = (struct lp_llvm_sampler_soa *)base;
+
+ assert(unit < PIPE_MAX_SAMPLERS);
+
+ lp_build_sample_soa(builder,
+ &sampler->dynamic_state.static_state[unit],
+ &sampler->dynamic_state.base,
+ type,
+ unit,
+ num_coords, coords,
+ ddx, ddy,
+ lod_bias, explicit_lod,
+ texel);
+}
+
+
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state,
+ LLVMValueRef context_ptr)
+{
+ struct lp_llvm_sampler_soa *sampler;
+
+ sampler = CALLOC_STRUCT(lp_llvm_sampler_soa);
+ if(!sampler)
+ return NULL;
+
+ sampler->base.destroy = lp_llvm_sampler_soa_destroy;
+ sampler->base.emit_fetch_texel = lp_llvm_sampler_soa_emit_fetch_texel;
+ sampler->dynamic_state.base.width = lp_llvm_texture_width;
+ sampler->dynamic_state.base.height = lp_llvm_texture_height;
+ sampler->dynamic_state.base.depth = lp_llvm_texture_depth;
+ sampler->dynamic_state.base.last_level = lp_llvm_texture_last_level;
+ sampler->dynamic_state.base.row_stride = lp_llvm_texture_row_stride;
+ sampler->dynamic_state.base.img_stride = lp_llvm_texture_img_stride;
+ sampler->dynamic_state.base.data_ptr = lp_llvm_texture_data_ptr;
+ sampler->dynamic_state.static_state = static_state;
+ sampler->dynamic_state.context_ptr = context_ptr;
+
+ return &sampler->base;
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
new file mode 100644
index 0000000000..1228a831f3
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -0,0 +1,48 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_TEX_SAMPLE_H
+#define LP_TEX_SAMPLE_H
+
+
+#include "gallivm/lp_bld.h"
+
+
+struct lp_sampler_static_state;
+
+
+/**
+ * Pure-LLVM texture sampling code generator.
+ *
+ * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
+ */
+struct lp_build_sampler_soa *
+lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key,
+ LLVMValueRef context_ptr);
+
+
+#endif /* LP_TEX_SAMPLE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
new file mode 100644
index 0000000000..0d526ead89
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -0,0 +1,1288 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Michel Dänzer <michel@tungstengraphics.com>
+ */
+
+#include <stdio.h>
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+#include "util/u_transfer.h"
+
+#include "lp_context.h"
+#include "lp_flush.h"
+#include "lp_screen.h"
+#include "lp_tile_image.h"
+#include "lp_texture.h"
+#include "lp_setup.h"
+
+#include "state_tracker/sw_winsys.h"
+
+
+#ifdef DEBUG
+static struct llvmpipe_resource resource_list;
+#endif
+
+
+static INLINE boolean
+resource_is_texture(const struct pipe_resource *resource)
+{
+ switch (resource->target) {
+ case PIPE_BUFFER:
+ return FALSE;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ return TRUE;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+
+
+/**
+ * Allocate storage for llvmpipe_texture::layout array.
+ * The number of elements is width_in_tiles * height_in_tiles.
+ */
+static enum lp_texture_layout *
+alloc_layout_array(unsigned num_slices, unsigned width, unsigned height)
+{
+ const unsigned tx = align(width, TILE_SIZE) / TILE_SIZE;
+ const unsigned ty = align(height, TILE_SIZE) / TILE_SIZE;
+
+ assert(num_slices * tx * ty > 0);
+ assert(LP_TEX_LAYOUT_NONE == 0); /* calloc'ing LP_TEX_LAYOUT_NONE here */
+
+ return (enum lp_texture_layout *)
+ CALLOC(num_slices * tx * ty, sizeof(enum lp_texture_layout));
+}
+
+
+
+/**
+ * Conventional allocation path for non-display textures:
+ * Just compute row strides here. Storage is allocated on demand later.
+ */
+static boolean
+llvmpipe_texture_layout(struct llvmpipe_screen *screen,
+ struct llvmpipe_resource *lpr)
+{
+ struct pipe_resource *pt = &lpr->base;
+ unsigned level;
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
+
+ assert(LP_MAX_TEXTURE_2D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
+ assert(LP_MAX_TEXTURE_3D_LEVELS <= LP_MAX_TEXTURE_LEVELS);
+
+ for (level = 0; level <= pt->last_level; level++) {
+
+ /* Row stride and image stride (for linear layout) */
+ {
+ unsigned alignment, nblocksx, nblocksy, block_size;
+
+ /* For non-compressed formats we need to align the texture size
+ * to the tile size to facilitate render-to-texture.
+ */
+ if (util_format_is_compressed(pt->format))
+ alignment = 1;
+ else
+ alignment = TILE_SIZE;
+
+ nblocksx = util_format_get_nblocksx(pt->format,
+ align(width, alignment));
+ nblocksy = util_format_get_nblocksy(pt->format,
+ align(height, alignment));
+ block_size = util_format_get_blocksize(pt->format);
+
+ lpr->row_stride[level] = align(nblocksx * block_size, 16);
+
+ lpr->img_stride[level] = lpr->row_stride[level] * nblocksy;
+ }
+
+ /* Size of the image in tiles (for tiled layout) */
+ {
+ const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE;
+ const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE;
+ lpr->tiles_per_row[level] = width_t;
+ lpr->tiles_per_image[level] = width_t * height_t;
+ }
+
+ /* Number of 3D image slices or cube faces */
+ {
+ unsigned num_slices;
+
+ if (lpr->base.target == PIPE_TEXTURE_CUBE)
+ num_slices = 6;
+ else if (lpr->base.target == PIPE_TEXTURE_3D)
+ num_slices = depth;
+ else
+ num_slices = 1;
+
+ lpr->num_slices_faces[level] = num_slices;
+
+ lpr->layout[level] = alloc_layout_array(num_slices, width, height);
+ }
+
+ /* Compute size of next mipmap level */
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
+ }
+
+ return TRUE;
+}
+
+
+
+static boolean
+llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
+ struct llvmpipe_resource *lpr)
+{
+ struct sw_winsys *winsys = screen->winsys;
+
+ /* Round up the surface size to a multiple of the tile size to
+ * avoid tile clipping.
+ */
+ const unsigned width = align(lpr->base.width0, TILE_SIZE);
+ const unsigned height = align(lpr->base.height0, TILE_SIZE);
+ const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE;
+ const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE;
+
+ lpr->tiles_per_row[0] = width_t;
+ lpr->tiles_per_image[0] = width_t * height_t;
+ lpr->num_slices_faces[0] = 1;
+ lpr->img_stride[0] = 0;
+
+ lpr->layout[0] = alloc_layout_array(1, width, height);
+ //lpr->layout[0][0] = LP_TEX_LAYOUT_LINEAR;
+
+ lpr->dt = winsys->displaytarget_create(winsys,
+ lpr->base.bind,
+ lpr->base.format,
+ width, height,
+ 16,
+ &lpr->row_stride[0] );
+
+ return lpr->dt != NULL;
+}
+
+
+static struct pipe_resource *
+llvmpipe_resource_create(struct pipe_screen *_screen,
+ const struct pipe_resource *templat)
+{
+ static unsigned id_counter = 0;
+ struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
+ struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
+ if (!lpr)
+ return NULL;
+
+ lpr->base = *templat;
+ pipe_reference_init(&lpr->base.reference, 1);
+ lpr->base.screen = &screen->base;
+
+ /* assert(lpr->base.bind); */
+
+ if (resource_is_texture(&lpr->base)) {
+ if (lpr->base.bind & PIPE_BIND_DISPLAY_TARGET) {
+ /* displayable surface */
+ if (!llvmpipe_displaytarget_layout(screen, lpr))
+ goto fail;
+ assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE);
+ }
+ else {
+ /* texture map */
+ if (!llvmpipe_texture_layout(screen, lpr))
+ goto fail;
+ assert(lpr->layout[0][0] == LP_TEX_LAYOUT_NONE);
+ }
+ assert(lpr->layout[0]);
+ }
+ else {
+ /* other data (vertex buffer, const buffer, etc) */
+ const enum pipe_format format = templat->format;
+ const uint w = templat->width0 / util_format_get_blockheight(format);
+ const uint h = templat->height0 / util_format_get_blockwidth(format);
+ const uint d = templat->depth0;
+ const uint bpp = util_format_get_blocksize(format);
+ const uint bytes = w * h * d * bpp;
+ lpr->data = align_malloc(bytes, 16);
+ if (!lpr->data)
+ goto fail;
+ }
+
+ lpr->id = id_counter++;
+
+#ifdef DEBUG
+ insert_at_tail(&resource_list, lpr);
+#endif
+
+ return &lpr->base;
+
+ fail:
+ FREE(lpr);
+ return NULL;
+}
+
+
+static void
+llvmpipe_resource_destroy(struct pipe_screen *pscreen,
+ struct pipe_resource *pt)
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pscreen);
+ struct llvmpipe_resource *lpr = llvmpipe_resource(pt);
+
+ if (lpr->dt) {
+ /* display target */
+ struct sw_winsys *winsys = screen->winsys;
+ winsys->displaytarget_destroy(winsys, lpr->dt);
+
+ if (lpr->tiled[0].data) {
+ align_free(lpr->tiled[0].data);
+ lpr->tiled[0].data = NULL;
+ }
+
+ FREE(lpr->layout[0]);
+ }
+ else if (resource_is_texture(pt)) {
+ /* regular texture */
+ uint level;
+
+ /* free linear image data */
+ for (level = 0; level < Elements(lpr->linear); level++) {
+ if (lpr->linear[level].data) {
+ align_free(lpr->linear[level].data);
+ lpr->linear[level].data = NULL;
+ }
+ }
+
+ /* free tiled image data */
+ for (level = 0; level < Elements(lpr->tiled); level++) {
+ if (lpr->tiled[level].data) {
+ align_free(lpr->tiled[level].data);
+ lpr->tiled[level].data = NULL;
+ }
+ }
+
+ /* free layout flag arrays */
+ for (level = 0; level < Elements(lpr->tiled); level++) {
+ FREE(lpr->layout[level]);
+ lpr->layout[level] = NULL;
+ }
+ }
+ else if (!lpr->userBuffer) {
+ assert(lpr->data);
+ align_free(lpr->data);
+ }
+
+#ifdef DEBUG
+ if (lpr->next)
+ remove_from_list(lpr);
+#endif
+
+ FREE(lpr);
+}
+
+
+/**
+ * Map a resource for read/write.
+ */
+void *
+llvmpipe_resource_map(struct pipe_resource *resource,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ enum lp_texture_usage tex_usage,
+ enum lp_texture_layout layout)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ uint8_t *map;
+
+ assert(face < 6);
+ assert(level < LP_MAX_TEXTURE_LEVELS);
+
+ assert(tex_usage == LP_TEX_USAGE_READ ||
+ tex_usage == LP_TEX_USAGE_READ_WRITE ||
+ tex_usage == LP_TEX_USAGE_WRITE_ALL);
+
+ assert(layout == LP_TEX_LAYOUT_NONE ||
+ layout == LP_TEX_LAYOUT_TILED ||
+ layout == LP_TEX_LAYOUT_LINEAR);
+
+ if (lpr->dt) {
+ /* display target */
+ struct llvmpipe_screen *screen = llvmpipe_screen(resource->screen);
+ struct sw_winsys *winsys = screen->winsys;
+ unsigned dt_usage;
+ uint8_t *map2;
+
+ if (tex_usage == LP_TEX_USAGE_READ) {
+ dt_usage = PIPE_TRANSFER_READ;
+ }
+ else {
+ dt_usage = PIPE_TRANSFER_READ_WRITE;
+ }
+
+ assert(face == 0);
+ assert(level == 0);
+ assert(zslice == 0);
+
+ /* FIXME: keep map count? */
+ map = winsys->displaytarget_map(winsys, lpr->dt, dt_usage);
+
+ /* install this linear image in texture data structure */
+ lpr->linear[level].data = map;
+
+ /* make sure tiled data gets converted to linear data */
+ map2 = llvmpipe_get_texture_image(lpr, 0, 0, tex_usage, layout);
+ if (layout == LP_TEX_LAYOUT_LINEAR)
+ assert(map == map2);
+
+ return map2;
+ }
+ else if (resource_is_texture(resource)) {
+ /* regular texture */
+ if (resource->target != PIPE_TEXTURE_CUBE) {
+ assert(face == 0);
+ }
+ if (resource->target != PIPE_TEXTURE_3D) {
+ assert(zslice == 0);
+ }
+
+ map = llvmpipe_get_texture_image(lpr, face + zslice, level,
+ tex_usage, layout);
+ assert(map);
+ return map;
+ }
+ else {
+ return lpr->data;
+ }
+}
+
+
+/**
+ * Unmap a resource.
+ */
+void
+llvmpipe_resource_unmap(struct pipe_resource *resource,
+ unsigned face,
+ unsigned level,
+ unsigned zslice)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+
+ if (lpr->dt) {
+ /* display target */
+ struct llvmpipe_screen *lp_screen = llvmpipe_screen(resource->screen);
+ struct sw_winsys *winsys = lp_screen->winsys;
+
+ assert(face == 0);
+ assert(level == 0);
+ assert(zslice == 0);
+
+ /* make sure linear image is up to date */
+ (void) llvmpipe_get_texture_image(lpr, face + zslice, level,
+ LP_TEX_USAGE_READ,
+ LP_TEX_LAYOUT_LINEAR);
+
+ winsys->displaytarget_unmap(winsys, lpr->dt);
+ }
+}
+
+
+void *
+llvmpipe_resource_data(struct pipe_resource *resource)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+
+ assert(!resource_is_texture(resource));
+
+ return lpr->data;
+}
+
+
+static struct pipe_resource *
+llvmpipe_resource_from_handle(struct pipe_screen *screen,
+ const struct pipe_resource *template,
+ struct winsys_handle *whandle)
+{
+ struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
+ struct llvmpipe_resource *lpr = CALLOC_STRUCT(llvmpipe_resource);
+ if (!lpr)
+ return NULL;
+
+ lpr->base = *template;
+ pipe_reference_init(&lpr->base.reference, 1);
+ lpr->base.screen = screen;
+
+ lpr->dt = winsys->displaytarget_from_handle(winsys,
+ template,
+ whandle,
+ &lpr->row_stride[0]);
+ if (!lpr->dt)
+ goto fail;
+
+ return &lpr->base;
+
+ fail:
+ FREE(lpr);
+ return NULL;
+}
+
+
+static boolean
+llvmpipe_resource_get_handle(struct pipe_screen *screen,
+ struct pipe_resource *pt,
+ struct winsys_handle *whandle)
+{
+ struct sw_winsys *winsys = llvmpipe_screen(screen)->winsys;
+ struct llvmpipe_resource *lpr = llvmpipe_resource(pt);
+
+ assert(lpr->dt);
+ if (!lpr->dt)
+ return FALSE;
+
+ return winsys->displaytarget_get_handle(winsys, lpr->dt, whandle);
+}
+
+
+static struct pipe_surface *
+llvmpipe_get_tex_surface(struct pipe_screen *screen,
+ struct pipe_resource *pt,
+ unsigned face, unsigned level, unsigned zslice,
+ unsigned usage)
+{
+ struct pipe_surface *ps;
+
+ assert(level <= pt->last_level);
+
+ ps = CALLOC_STRUCT(pipe_surface);
+ if (ps) {
+ pipe_reference_init(&ps->reference, 1);
+ pipe_resource_reference(&ps->texture, pt);
+ ps->format = pt->format;
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
+ ps->usage = usage;
+
+ ps->face = face;
+ ps->level = level;
+ ps->zslice = zslice;
+ }
+ return ps;
+}
+
+
+static void
+llvmpipe_tex_surface_destroy(struct pipe_surface *surf)
+{
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For llvmpipe, nothing to do.
+ */
+ assert(surf->texture);
+ pipe_resource_reference(&surf->texture, NULL);
+ FREE(surf);
+}
+
+
+static struct pipe_transfer *
+llvmpipe_get_transfer(struct pipe_context *pipe,
+ struct pipe_resource *resource,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct llvmpipe_resource *lprex = llvmpipe_resource(resource);
+ struct llvmpipe_transfer *lpr;
+
+ assert(resource);
+ assert(sr.level <= resource->last_level);
+
+ /*
+ * Transfers, like other pipe operations, must happen in order, so flush the
+ * context if necessary.
+ */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ boolean read_only = !(usage & PIPE_TRANSFER_WRITE);
+ boolean do_not_block = !!(usage & PIPE_TRANSFER_DONTBLOCK);
+ if (!llvmpipe_flush_resource(pipe, resource,
+ sr.face, sr.level,
+ 0, /* flush_flags */
+ read_only,
+ TRUE, /* cpu_access */
+ do_not_block)) {
+ /*
+ * It would have blocked, but state tracker requested no to.
+ */
+ assert(do_not_block);
+ return NULL;
+ }
+ }
+
+ lpr = CALLOC_STRUCT(llvmpipe_transfer);
+ if (lpr) {
+ struct pipe_transfer *pt = &lpr->base;
+ pipe_resource_reference(&pt->resource, resource);
+ pt->box = *box;
+ pt->sr = sr;
+ pt->stride = lprex->row_stride[sr.level];
+ pt->slice_stride = lprex->img_stride[sr.level];
+ pt->usage = usage;
+
+ return pt;
+ }
+ return NULL;
+}
+
+
+static void
+llvmpipe_transfer_destroy(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ /* Effectively do the texture_update work here - if texture images
+ * needed post-processing to put them into hardware layout, this is
+ * where it would happen. For llvmpipe, nothing to do.
+ */
+ assert (transfer->resource);
+ pipe_resource_reference(&transfer->resource, NULL);
+ FREE(transfer);
+}
+
+
+static void *
+llvmpipe_transfer_map( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
+ ubyte *map;
+ struct llvmpipe_resource *lpr;
+ enum pipe_format format;
+ enum lp_texture_usage tex_usage;
+ const char *mode;
+
+ assert(transfer->sr.face < 6);
+ assert(transfer->sr.level < LP_MAX_TEXTURE_LEVELS);
+
+ /*
+ printf("tex_transfer_map(%d, %d %d x %d of %d x %d, usage %d )\n",
+ transfer->x, transfer->y, transfer->width, transfer->height,
+ transfer->texture->width0,
+ transfer->texture->height0,
+ transfer->usage);
+ */
+
+ if (transfer->usage == PIPE_TRANSFER_READ) {
+ tex_usage = LP_TEX_USAGE_READ;
+ mode = "read";
+ }
+ else {
+ tex_usage = LP_TEX_USAGE_READ_WRITE;
+ mode = "read/write";
+ }
+
+ if (0) {
+ struct llvmpipe_resource *lpr = llvmpipe_resource(transfer->resource);
+ printf("transfer map tex %u mode %s\n", lpr->id, mode);
+ }
+
+
+ assert(transfer->resource);
+ lpr = llvmpipe_resource(transfer->resource);
+ format = lpr->base.format;
+
+ map = llvmpipe_resource_map(transfer->resource,
+ transfer->sr.face,
+ transfer->sr.level,
+ transfer->box.z,
+ tex_usage, LP_TEX_LAYOUT_LINEAR);
+
+
+ /* May want to do different things here depending on read/write nature
+ * of the map:
+ */
+ if (transfer->usage & PIPE_TRANSFER_WRITE) {
+ /* Do something to notify sharing contexts of a texture change.
+ */
+ screen->timestamp++;
+ }
+
+ map +=
+ transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+
+ return map;
+}
+
+
+static void
+llvmpipe_transfer_unmap(struct pipe_context *pipe,
+ struct pipe_transfer *transfer)
+{
+ assert(transfer->resource);
+
+ llvmpipe_resource_unmap(transfer->resource,
+ transfer->sr.face,
+ transfer->sr.level,
+ transfer->box.z);
+}
+
+static unsigned int
+llvmpipe_is_resource_referenced( struct pipe_context *pipe,
+ struct pipe_resource *presource,
+ unsigned face, unsigned level)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
+
+ if (presource->target == PIPE_BUFFER)
+ return PIPE_UNREFERENCED;
+
+ return lp_setup_is_resource_referenced(llvmpipe->setup, presource);
+}
+
+
+
+/**
+ * Create buffer which wraps user-space data.
+ */
+static struct pipe_resource *
+llvmpipe_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned bind_flags)
+{
+ struct llvmpipe_resource *buffer;
+
+ buffer = CALLOC_STRUCT(llvmpipe_resource);
+ if(!buffer)
+ return NULL;
+
+ pipe_reference_init(&buffer->base.reference, 1);
+ buffer->base.screen = screen;
+ buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */
+ buffer->base.bind = bind_flags;
+ buffer->base.usage = PIPE_USAGE_IMMUTABLE;
+ buffer->base.flags = 0;
+ buffer->base.width0 = bytes;
+ buffer->base.height0 = 1;
+ buffer->base.depth0 = 1;
+ buffer->userBuffer = TRUE;
+ buffer->data = ptr;
+
+ return &buffer->base;
+}
+
+
+/**
+ * Compute size (in bytes) need to store a texture image / mipmap level,
+ * for just one cube face or one 3D texture slice
+ */
+static unsigned
+tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level,
+ enum lp_texture_layout layout)
+{
+ const unsigned width = u_minify(lpr->base.width0, level);
+ const unsigned height = u_minify(lpr->base.height0, level);
+
+ assert(layout == LP_TEX_LAYOUT_TILED ||
+ layout == LP_TEX_LAYOUT_LINEAR);
+
+ if (layout == LP_TEX_LAYOUT_TILED) {
+ /* for tiled layout, force a 32bpp format */
+ const enum pipe_format format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ const unsigned block_size = util_format_get_blocksize(format);
+ const unsigned nblocksy =
+ util_format_get_nblocksy(format, align(height, TILE_SIZE));
+ const unsigned nblocksx =
+ util_format_get_nblocksx(format, align(width, TILE_SIZE));
+ const unsigned buffer_size = block_size * nblocksy * nblocksx;
+ return buffer_size;
+ }
+ else {
+ /* we already computed this */
+ return lpr->img_stride[level];
+ }
+}
+
+
+/**
+ * Compute size (in bytes) need to store a texture image / mipmap level,
+ * including all cube faces or 3D image slices
+ */
+static unsigned
+tex_image_size(const struct llvmpipe_resource *lpr, unsigned level,
+ enum lp_texture_layout layout)
+{
+ const unsigned buf_size = tex_image_face_size(lpr, level, layout);
+ return buf_size * lpr->num_slices_faces[level];
+}
+
+
+/**
+ * This function encapsulates some complicated logic for determining
+ * how to convert a tile of image data from linear layout to tiled
+ * layout, or vice versa.
+ * \param cur_layout the current tile layout
+ * \param target_layout the desired tile layout
+ * \param usage how the tile will be accessed (R/W vs. read-only, etc)
+ * \param new_layout_return returns the new layout mode
+ * \param convert_return returns TRUE if image conversion is needed
+ */
+static void
+layout_logic(enum lp_texture_layout cur_layout,
+ enum lp_texture_layout target_layout,
+ enum lp_texture_usage usage,
+ enum lp_texture_layout *new_layout_return,
+ boolean *convert)
+{
+ enum lp_texture_layout other_layout, new_layout;
+
+ *convert = FALSE;
+
+ new_layout = 99; /* debug check */
+
+ if (target_layout == LP_TEX_LAYOUT_LINEAR) {
+ other_layout = LP_TEX_LAYOUT_TILED;
+ }
+ else {
+ assert(target_layout == LP_TEX_LAYOUT_TILED);
+ other_layout = LP_TEX_LAYOUT_LINEAR;
+ }
+
+ new_layout = target_layout; /* may get changed below */
+
+ if (cur_layout == LP_TEX_LAYOUT_BOTH) {
+ if (usage == LP_TEX_USAGE_READ) {
+ new_layout = LP_TEX_LAYOUT_BOTH;
+ }
+ }
+ else if (cur_layout == other_layout) {
+ if (usage != LP_TEX_USAGE_WRITE_ALL) {
+ /* need to convert tiled data to linear or vice versa */
+ *convert = TRUE;
+
+ if (usage == LP_TEX_USAGE_READ)
+ new_layout = LP_TEX_LAYOUT_BOTH;
+ }
+ }
+ else {
+ assert(cur_layout == LP_TEX_LAYOUT_NONE ||
+ cur_layout == target_layout);
+ }
+
+ assert(new_layout == LP_TEX_LAYOUT_BOTH ||
+ new_layout == target_layout);
+
+ *new_layout_return = new_layout;
+}
+
+
+/**
+ * Return pointer to a 2D texture image/face/slice.
+ * No tiled/linear conversion is done.
+ */
+ubyte *
+llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_layout layout)
+{
+ struct llvmpipe_texture_image *img;
+ unsigned offset;
+
+ if (layout == LP_TEX_LAYOUT_LINEAR) {
+ img = &lpr->linear[level];
+ }
+ else {
+ assert (layout == LP_TEX_LAYOUT_TILED);
+ img = &lpr->tiled[level];
+ }
+
+ if (face_slice > 0)
+ offset = face_slice * tex_image_face_size(lpr, level, layout);
+ else
+ offset = 0;
+
+ return (ubyte *) img->data + offset;
+}
+
+
+static INLINE enum lp_texture_layout
+llvmpipe_get_texture_tile_layout(const struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y)
+{
+ uint i;
+ assert(resource_is_texture(&lpr->base));
+ assert(x < lpr->tiles_per_row[level]);
+ i = face_slice * lpr->tiles_per_image[level]
+ + y * lpr->tiles_per_row[level] + x;
+ return lpr->layout[level][i];
+}
+
+
+static INLINE void
+llvmpipe_set_texture_tile_layout(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned x, unsigned y,
+ enum lp_texture_layout layout)
+{
+ uint i;
+ assert(resource_is_texture(&lpr->base));
+ assert(x < lpr->tiles_per_row[level]);
+ i = face_slice * lpr->tiles_per_image[level]
+ + y * lpr->tiles_per_row[level] + x;
+ lpr->layout[level][i] = layout;
+}
+
+
+/**
+ * Set the layout mode for all tiles in a particular image.
+ */
+static INLINE void
+llvmpipe_set_texture_image_layout(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ unsigned width_t, unsigned height_t,
+ enum lp_texture_layout layout)
+{
+ const unsigned start = face_slice * lpr->tiles_per_image[level];
+ unsigned i;
+
+ for (i = 0; i < width_t * height_t; i++) {
+ lpr->layout[level][start + i] = layout;
+ }
+}
+
+
+/**
+ * Allocate storage for a linear or tile texture image (all cube
+ * faces and all 3D slices.
+ */
+static void
+alloc_image_data(struct llvmpipe_resource *lpr, unsigned level,
+ enum lp_texture_layout layout)
+{
+ if (lpr->dt)
+ assert(level == 0);
+
+ if (layout == LP_TEX_LAYOUT_TILED) {
+ /* tiled data is stored in regular memory */
+ uint buffer_size = tex_image_size(lpr, level, layout);
+ lpr->tiled[level].data = align_malloc(buffer_size, 16);
+ }
+ else {
+ assert(layout == LP_TEX_LAYOUT_LINEAR);
+ if (lpr->dt) {
+ /* we get the linear memory from the winsys */
+ struct llvmpipe_screen *screen = llvmpipe_screen(lpr->base.screen);
+ struct sw_winsys *winsys = screen->winsys;
+
+ lpr->linear[0].data =
+ winsys->displaytarget_map(winsys, lpr->dt,
+ PIPE_TRANSFER_READ_WRITE);
+ }
+ else {
+ /* not a display target - allocate regular memory */
+ uint buffer_size = tex_image_size(lpr, level, LP_TEX_LAYOUT_LINEAR);
+ lpr->linear[level].data = align_malloc(buffer_size, 16);
+ }
+ }
+}
+
+
+
+/**
+ * Return pointer to texture image data (either linear or tiled layout)
+ * for a particular cube face or 3D texture slice.
+ *
+ * \param face_slice the cube face or 3D slice of interest
+ * \param usage one of LP_TEX_USAGE_READ/WRITE_ALL/READ_WRITE
+ * \param layout either LP_TEX_LAYOUT_LINEAR or _TILED or _NONE
+ */
+void *
+llvmpipe_get_texture_image(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ enum lp_texture_layout layout)
+{
+ /*
+ * 'target' refers to the image which we're retrieving (either in
+ * tiled or linear layout).
+ * 'other' refers to the same image but in the other layout. (it may
+ * or may not exist.
+ */
+ struct llvmpipe_texture_image *target_img;
+ struct llvmpipe_texture_image *other_img;
+ void *target_data;
+ void *other_data;
+ const unsigned width = u_minify(lpr->base.width0, level);
+ const unsigned height = u_minify(lpr->base.height0, level);
+ const unsigned width_t = align(width, TILE_SIZE) / TILE_SIZE;
+ const unsigned height_t = align(height, TILE_SIZE) / TILE_SIZE;
+ enum lp_texture_layout other_layout;
+ boolean only_allocate;
+
+ assert(layout == LP_TEX_LAYOUT_NONE ||
+ layout == LP_TEX_LAYOUT_TILED ||
+ layout == LP_TEX_LAYOUT_LINEAR);
+
+ assert(usage == LP_TEX_USAGE_READ ||
+ usage == LP_TEX_USAGE_READ_WRITE ||
+ usage == LP_TEX_USAGE_WRITE_ALL);
+
+ /* check for the special case of layout == LP_TEX_LAYOUT_NONE */
+ if (layout == LP_TEX_LAYOUT_NONE) {
+ only_allocate = TRUE;
+ layout = LP_TEX_LAYOUT_TILED;
+ }
+ else {
+ only_allocate = FALSE;
+ }
+
+ if (lpr->dt) {
+ assert(lpr->linear[level].data);
+ }
+
+ /* which is target? which is other? */
+ if (layout == LP_TEX_LAYOUT_LINEAR) {
+ target_img = &lpr->linear[level];
+ other_img = &lpr->tiled[level];
+ other_layout = LP_TEX_LAYOUT_TILED;
+ }
+ else {
+ target_img = &lpr->tiled[level];
+ other_img = &lpr->linear[level];
+ other_layout = LP_TEX_LAYOUT_LINEAR;
+ }
+
+ target_data = target_img->data;
+ other_data = other_img->data;
+
+ if (!target_data) {
+ /* allocate memory for the target image now */
+ alloc_image_data(lpr, level, layout);
+ target_data = target_img->data;
+ }
+
+ if (face_slice > 0) {
+ unsigned target_offset, other_offset;
+
+ target_offset = face_slice * tex_image_face_size(lpr, level, layout);
+ other_offset = face_slice * tex_image_face_size(lpr, level, other_layout);
+ if (target_data) {
+ target_data = (uint8_t *) target_data + target_offset;
+ }
+ if (other_data) {
+ other_data = (uint8_t *) other_data + other_offset;
+ }
+ }
+
+ if (only_allocate) {
+ /* Just allocating tiled memory. Don't initialize it from the
+ * linear data if it exists.
+ */
+ return target_data;
+ }
+
+ if (other_data) {
+ /* may need to convert other data to the requested layout */
+ enum lp_texture_layout new_layout;
+ unsigned x, y;
+
+ /* loop over all image tiles, doing layout conversion where needed */
+ for (y = 0; y < height_t; y++) {
+ for (x = 0; x < width_t; x++) {
+ enum lp_texture_layout cur_layout =
+ llvmpipe_get_texture_tile_layout(lpr, face_slice, level, x, y);
+ boolean convert;
+
+ layout_logic(cur_layout, layout, usage, &new_layout, &convert);
+
+ if (convert) {
+ if (layout == LP_TEX_LAYOUT_TILED) {
+ lp_linear_to_tiled(other_data, target_data,
+ x * TILE_SIZE, y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ lpr->tiles_per_row[level]);
+ }
+ else {
+ assert(layout == LP_TEX_LAYOUT_LINEAR);
+ lp_tiled_to_linear(other_data, target_data,
+ x * TILE_SIZE, y * TILE_SIZE,
+ TILE_SIZE, TILE_SIZE,
+ lpr->base.format,
+ lpr->row_stride[level],
+ lpr->tiles_per_row[level]);
+ }
+ }
+
+ if (new_layout != cur_layout)
+ llvmpipe_set_texture_tile_layout(lpr, face_slice, level, x, y,
+ new_layout);
+ }
+ }
+ }
+ else {
+ /* no other data */
+ llvmpipe_set_texture_image_layout(lpr, face_slice, level,
+ width_t, height_t, layout);
+ }
+
+ assert(target_data);
+
+ return target_data;
+}
+
+
+/**
+ * Return pointer to start of a texture image (1D, 2D, 3D, CUBE).
+ * All cube faces and 3D slices will be converted to the requested
+ * layout if needed.
+ * This is typically used when we're about to sample from a texture.
+ */
+void *
+llvmpipe_get_texture_image_all(struct llvmpipe_resource *lpr,
+ unsigned level,
+ enum lp_texture_usage usage,
+ enum lp_texture_layout layout)
+{
+ const int slices = lpr->num_slices_faces[level];
+ int slice;
+ void *map = NULL;
+
+ assert(slices > 0);
+
+ for (slice = slices - 1; slice >= 0; slice--) {
+ map = llvmpipe_get_texture_image(lpr, slice, level, usage, layout);
+ }
+
+ return map;
+}
+
+
+/**
+ * Get pointer to a linear image (not the tile!) where the tile at (x,y)
+ * is known to be in linear layout.
+ * Conversion from tiled to linear will be done if necessary.
+ * \return pointer to start of image/face (not the tile)
+ */
+ubyte *
+llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ unsigned x, unsigned y)
+{
+ struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
+ enum lp_texture_layout cur_layout, new_layout;
+ const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+ boolean convert;
+ uint8_t *tiled_image, *linear_image;
+
+ assert(resource_is_texture(&lpr->base));
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ if (!linear_img->data) {
+ /* allocate memory for the linear image now */
+ alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
+ }
+
+ /* compute address of the slice/face of the image that contains the tile */
+ tiled_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_TILED);
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ /* get current tile layout and determine if data conversion is needed */
+ cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty);
+
+ layout_logic(cur_layout, LP_TEX_LAYOUT_LINEAR, usage,
+ &new_layout, &convert);
+
+ if (convert) {
+ lp_tiled_to_linear(tiled_image, linear_image,
+ x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
+ lpr->row_stride[level],
+ lpr->tiles_per_row[level]);
+ }
+
+ if (new_layout != cur_layout)
+ llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout);
+
+ return linear_image;
+}
+
+
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+ubyte *
+llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ unsigned x, unsigned y)
+{
+ struct llvmpipe_texture_image *tiled_img = &lpr->tiled[level];
+ enum lp_texture_layout cur_layout, new_layout;
+ const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+ boolean convert;
+ uint8_t *tiled_image, *linear_image;
+ unsigned tile_offset;
+
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+
+ if (!tiled_img->data) {
+ /* allocate memory for the tiled image now */
+ alloc_image_data(lpr, level, LP_TEX_LAYOUT_TILED);
+ }
+
+ /* compute address of the slice/face of the image that contains the tile */
+ tiled_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_TILED);
+ linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+ LP_TEX_LAYOUT_LINEAR);
+
+ /* get current tile layout and see if we need to convert the data */
+ cur_layout = llvmpipe_get_texture_tile_layout(lpr, face_slice, level, tx, ty);
+
+ layout_logic(cur_layout, LP_TEX_LAYOUT_TILED, usage, &new_layout, &convert);
+ if (convert) {
+ lp_linear_to_tiled(linear_image, tiled_image,
+ x, y, TILE_SIZE, TILE_SIZE, lpr->base.format,
+ lpr->row_stride[level],
+ lpr->tiles_per_row[level]);
+ }
+
+ if (new_layout != cur_layout)
+ llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty, new_layout);
+
+ /* compute, return address of the 64x64 tile */
+ tile_offset = (ty * lpr->tiles_per_row[level] + tx)
+ * TILE_SIZE * TILE_SIZE * 4;
+
+ return (ubyte *) tiled_image + tile_offset;
+}
+
+
+/**
+ * Return size of resource in bytes
+ */
+unsigned
+llvmpipe_resource_size(const struct pipe_resource *resource)
+{
+ const struct llvmpipe_resource *lpr = llvmpipe_resource_const(resource);
+ unsigned lvl, size = 0;
+
+ for (lvl = 0; lvl <= lpr->base.last_level; lvl++) {
+ if (lpr->linear[lvl].data)
+ size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_LINEAR);
+
+ if (lpr->tiled[lvl].data)
+ size += tex_image_size(lpr, lvl, LP_TEX_LAYOUT_TILED);
+ }
+
+ return size;
+}
+
+
+#ifdef DEBUG
+void
+llvmpipe_print_resources(void)
+{
+ struct llvmpipe_resource *lpr;
+ unsigned n = 0, total = 0;
+
+ debug_printf("LLVMPIPE: current resources:\n");
+ foreach(lpr, &resource_list) {
+ unsigned size = llvmpipe_resource_size(&lpr->base);
+ debug_printf("resource %u at %p, size %ux%ux%u: %u bytes, refcount %u\n",
+ lpr->id, (void *) lpr,
+ lpr->base.width0, lpr->base.height0, lpr->base.depth0,
+ size, lpr->base.reference.count);
+ total += size;
+ n++;
+ }
+ debug_printf("LLVMPIPE: total size of %u resources: %u\n", n, total);
+}
+#endif
+
+
+void
+llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
+{
+#ifdef DEBUG
+ /* init linked list for tracking resources */
+ {
+ static boolean first_call = TRUE;
+ if (first_call) {
+ memset(&resource_list, 0, sizeof(resource_list));
+ make_empty_list(&resource_list);
+ first_call = FALSE;
+ }
+ }
+#endif
+
+ screen->resource_create = llvmpipe_resource_create;
+ screen->resource_destroy = llvmpipe_resource_destroy;
+ screen->resource_from_handle = llvmpipe_resource_from_handle;
+ screen->resource_get_handle = llvmpipe_resource_get_handle;
+ screen->user_buffer_create = llvmpipe_user_buffer_create;
+
+ screen->get_tex_surface = llvmpipe_get_tex_surface;
+ screen->tex_surface_destroy = llvmpipe_tex_surface_destroy;
+}
+
+
+void
+llvmpipe_init_context_resource_funcs(struct pipe_context *pipe)
+{
+ pipe->get_transfer = llvmpipe_get_transfer;
+ pipe->transfer_destroy = llvmpipe_transfer_destroy;
+ pipe->transfer_map = llvmpipe_transfer_map;
+ pipe->transfer_unmap = llvmpipe_transfer_unmap;
+ pipe->is_resource_referenced = llvmpipe_is_resource_referenced;
+
+ pipe->transfer_flush_region = u_default_transfer_flush_region;
+ pipe->transfer_inline_write = u_default_transfer_inline_write;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
new file mode 100644
index 0000000000..503b6a19a8
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -0,0 +1,237 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_TEXTURE_H
+#define LP_TEXTURE_H
+
+
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "lp_limits.h"
+
+
+enum lp_texture_usage
+{
+ LP_TEX_USAGE_READ = 100,
+ LP_TEX_USAGE_READ_WRITE,
+ LP_TEX_USAGE_WRITE_ALL
+};
+
+
+/** Per-tile layout mode */
+enum lp_texture_layout
+{
+ LP_TEX_LAYOUT_NONE = 0, /**< no layout for the tile data yet */
+ LP_TEX_LAYOUT_TILED, /**< the tile data is in tiled layout */
+ LP_TEX_LAYOUT_LINEAR, /**< the tile data is in linear layout */
+ LP_TEX_LAYOUT_BOTH /**< the tile data is in both modes */
+};
+
+
+struct pipe_context;
+struct pipe_screen;
+struct llvmpipe_context;
+
+struct sw_displaytarget;
+
+
+/**
+ * We keep one or two copies of the texture image data: one in a simple
+ * linear layout (for texture sampling) and another in a tiled layout (for
+ * render targets). We keep track of whether each image tile is linear
+ * or tiled on a per-tile basis.
+ */
+
+
+/** A 1D/2D/3D image, one mipmap level */
+struct llvmpipe_texture_image
+{
+ void *data;
+};
+
+
+/**
+ * llvmpipe subclass of pipe_resource. A texture, drawing surface,
+ * vertex buffer, const buffer, etc.
+ * Textures are stored differently than othere types of objects such as
+ * vertex buffers and const buffers.
+ * The former are tiled and have per-tile layout flags.
+ * The later are simple malloc'd blocks of memory.
+ */
+struct llvmpipe_resource
+{
+ struct pipe_resource base;
+
+ /** Row stride in bytes */
+ unsigned row_stride[LP_MAX_TEXTURE_LEVELS];
+ /** Image stride (for cube maps or 3D textures) in bytes */
+ unsigned img_stride[LP_MAX_TEXTURE_LEVELS];
+ unsigned tiles_per_row[LP_MAX_TEXTURE_LEVELS];
+ unsigned tiles_per_image[LP_MAX_TEXTURE_LEVELS];
+ /** Number of 3D slices or cube faces per level */
+ unsigned num_slices_faces[LP_MAX_TEXTURE_LEVELS];
+
+ /**
+ * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET
+ * usage.
+ */
+ struct sw_displaytarget *dt;
+
+ /**
+ * Malloc'ed data for regular textures, or a mapping to dt above.
+ */
+ struct llvmpipe_texture_image tiled[LP_MAX_TEXTURE_LEVELS];
+ struct llvmpipe_texture_image linear[LP_MAX_TEXTURE_LEVELS];
+
+ /**
+ * Data for non-texture resources.
+ */
+ void *data;
+
+ /** array [level][face or slice][tile_y][tile_x] of layout values) */
+ enum lp_texture_layout *layout[LP_MAX_TEXTURE_LEVELS];
+
+ boolean userBuffer; /** Is this a user-space buffer? */
+ unsigned timestamp;
+
+ unsigned id; /**< temporary, for debugging */
+
+#ifdef DEBUG
+ /** for linked list */
+ struct llvmpipe_resource *prev, *next;
+#endif
+};
+
+
+struct llvmpipe_transfer
+{
+ struct pipe_transfer base;
+
+ unsigned long offset;
+};
+
+
+/** cast wrappers */
+static INLINE struct llvmpipe_resource *
+llvmpipe_resource(struct pipe_resource *pt)
+{
+ return (struct llvmpipe_resource *) pt;
+}
+
+
+static INLINE const struct llvmpipe_resource *
+llvmpipe_resource_const(const struct pipe_resource *pt)
+{
+ return (const struct llvmpipe_resource *) pt;
+}
+
+
+static INLINE struct llvmpipe_transfer *
+llvmpipe_transfer(struct pipe_transfer *pt)
+{
+ return (struct llvmpipe_transfer *) pt;
+}
+
+
+void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen);
+void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe);
+
+static INLINE unsigned
+llvmpipe_resource_stride(struct pipe_resource *resource,
+ unsigned level)
+{
+ struct llvmpipe_resource *lpr = llvmpipe_resource(resource);
+ assert(level < LP_MAX_TEXTURE_2D_LEVELS);
+ return lpr->row_stride[level];
+}
+
+
+void *
+llvmpipe_resource_map(struct pipe_resource *resource,
+ unsigned face_slice,
+ unsigned level,
+ unsigned zslice,
+ enum lp_texture_usage tex_usage,
+ enum lp_texture_layout layout);
+
+void
+llvmpipe_resource_unmap(struct pipe_resource *resource,
+ unsigned face_slice,
+ unsigned level,
+ unsigned zslice);
+
+
+void *
+llvmpipe_resource_data(struct pipe_resource *resource);
+
+
+unsigned
+llvmpipe_resource_size(const struct pipe_resource *resource);
+
+
+ubyte *
+llvmpipe_get_texture_image_address(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_layout layout);
+
+void *
+llvmpipe_get_texture_image(struct llvmpipe_resource *resource,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ enum lp_texture_layout layout);
+
+void *
+llvmpipe_get_texture_image_all(struct llvmpipe_resource *lpr,
+ unsigned level,
+ enum lp_texture_usage usage,
+ enum lp_texture_layout layout);
+
+ubyte *
+llvmpipe_get_texture_tile_linear(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ unsigned x, unsigned y);
+
+ubyte *
+llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
+ unsigned face_slice, unsigned level,
+ enum lp_texture_usage usage,
+ unsigned x, unsigned y);
+
+
+
+extern void
+llvmpipe_print_resources(void);
+
+
+extern void
+llvmpipe_init_screen_texture_funcs(struct pipe_screen *screen);
+
+extern void
+llvmpipe_init_context_texture_funcs(struct pipe_context *pipe);
+
+#endif /* LP_TEXTURE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
new file mode 100644
index 0000000000..2b63992dd7
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -0,0 +1,328 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Code to convert images from tiled to linear and back.
+ * XXX there are quite a few assumptions about color and z/stencil being
+ * 32bpp.
+ */
+
+
+#include "util/u_format.h"
+#include "lp_tile_soa.h"
+#include "lp_tile_image.h"
+
+
+#define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
+
+
+/**
+ * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
+{
+ uint32_t *d0 = dst;
+ uint32_t *d1 = d0 + dst_stride;
+ uint32_t *d2 = d1 + dst_stride;
+ uint32_t *d3 = d2 + dst_stride;
+
+ d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
+ d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
+ d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
+ d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
+}
+
+
+
+/**
+ * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
+ * at dst, with dst_stride words between rows.
+ */
+static void
+untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
+{
+ uint16_t *d0 = dst;
+ uint16_t *d1 = d0 + dst_stride;
+ uint16_t *d2 = d1 + dst_stride;
+ uint16_t *d3 = d2 + dst_stride;
+
+ d0[0] = src[0]; d0[1] = src[1]; d0[2] = src[4]; d0[3] = src[5];
+ d1[0] = src[2]; d1[1] = src[3]; d1[2] = src[6]; d1[3] = src[7];
+ d2[0] = src[8]; d2[1] = src[9]; d2[2] = src[12]; d2[3] = src[13];
+ d3[0] = src[10]; d3[1] = src[11]; d3[2] = src[14]; d3[3] = src[15];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
+{
+ const uint32_t *s0 = src;
+ const uint32_t *s1 = s0 + src_stride;
+ const uint32_t *s2 = s1 + src_stride;
+ const uint32_t *s3 = s2 + src_stride;
+
+ dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
+ dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
+ dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
+ dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
+}
+
+
+
+/**
+ * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
+ * layout (in which all 16 words are contiguous).
+ */
+static void
+tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
+{
+ const uint16_t *s0 = src;
+ const uint16_t *s1 = s0 + src_stride;
+ const uint16_t *s2 = s1 + src_stride;
+ const uint16_t *s3 = s2 + src_stride;
+
+ dst[0] = s0[0]; dst[1] = s0[1]; dst[4] = s0[2]; dst[5] = s0[3];
+ dst[2] = s1[0]; dst[3] = s1[1]; dst[6] = s1[2]; dst[7] = s1[3];
+ dst[8] = s2[0]; dst[9] = s2[1]; dst[12] = s2[2]; dst[13] = s2[3];
+ dst[10] = s3[0]; dst[11] = s3[1]; dst[14] = s3[2]; dst[15] = s3[3];
+}
+
+
+
+/**
+ * Convert a tiled image into a linear image.
+ * \param dst_stride dest row stride in bytes
+ */
+void
+lp_tiled_to_linear(const void *src, void *dst,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned dst_stride,
+ unsigned tiles_per_row)
+{
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+ /*assert(width % TILE_SIZE == 0);
+ assert(height % TILE_SIZE == 0);*/
+
+ /* Note that Z/stencil surfaces use a different tiling size than
+ * color surfaces.
+ */
+ if (util_format_is_depth_or_stencil(format)) {
+ const uint bpp = util_format_get_blocksize(format);
+ const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
+ const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+ const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
+
+ dst_stride /= bpp; /* convert from bytes to words */
+
+ if (bpp == 4) {
+ const uint32_t *src32 = (const uint32_t *) src;
+ uint32_t *dst32 = (uint32_t *) dst;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 32-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ uint dst_offset = jj * dst_stride + ii;
+ untile_4_4_uint32(src32 + src_offset,
+ dst32 + dst_offset,
+ dst_stride);
+ }
+ }
+ }
+ else {
+ const uint16_t *src16 = (const uint16_t *) src;
+ uint16_t *dst16 = (uint16_t *) dst;
+ uint i, j;
+
+ assert(bpp == 2);
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 16-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ uint dst_offset = jj * dst_stride + ii;
+ untile_4_4_uint16(src16 + src_offset,
+ dst16 + dst_offset,
+ dst_stride);
+ }
+ }
+ }
+ }
+ else {
+ /* color image */
+ const uint bpp = 4;
+ const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+ const uint bytes_per_tile = tile_w * tile_h * bpp;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ uint ii = i + x, jj = j + y;
+ uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+ uint byte_offset = tile_offset * bytes_per_tile;
+ const uint8_t *src_tile = (uint8_t *) src + byte_offset;
+
+ lp_tile_unswizzle_4ub(format,
+ src_tile,
+ dst, dst_stride,
+ ii, jj, tile_w, tile_h);
+ }
+ }
+ }
+}
+
+
+/**
+ * Convert a linear image into a tiled image.
+ * \param src_stride source row stride in bytes
+ */
+void
+lp_linear_to_tiled(const void *src, void *dst,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned src_stride,
+ unsigned tiles_per_row)
+{
+ assert(x % TILE_SIZE == 0);
+ assert(y % TILE_SIZE == 0);
+ /*
+ assert(width % TILE_SIZE == 0);
+ assert(height % TILE_SIZE == 0);
+ */
+
+ if (util_format_is_depth_or_stencil(format)) {
+ const uint bpp = util_format_get_blocksize(format);
+ const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
+ const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
+ const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
+
+ src_stride /= bpp; /* convert from bytes to words */
+
+ if (bpp == 4) {
+ const uint32_t *src32 = (const uint32_t *) src;
+ uint32_t *dst32 = (uint32_t *) dst;
+ uint i, j;
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 32-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = jj * src_stride + ii;
+ uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ tile_4_4_uint32(src32 + src_offset,
+ dst32 + dst_offset,
+ src_stride);
+ }
+ }
+ }
+ else {
+ const uint16_t *src16 = (const uint16_t *) src;
+ uint16_t *dst16 = (uint16_t *) dst;
+ uint i, j;
+
+ assert(bpp == 2);
+
+ for (j = 0; j < height; j += tile_h) {
+ for (i = 0; i < width; i += tile_w) {
+ /* compute offsets in 16-bit words */
+ uint ii = i + x, jj = j + y;
+ uint src_offset = jj * src_stride + ii;
+ uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
+ * (tile_w * tile_h);
+ tile_4_4_uint16(src16 + src_offset,
+ dst16 + dst_offset,
+ src_stride);
+ }
+ }
+ }
+ }
+ else {
+ const uint bpp = 4;
+ const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
+ const uint bytes_per_tile = tile_w * tile_h * bpp;
+ uint i, j;
+
+ for (j = 0; j < height; j += TILE_SIZE) {
+ for (i = 0; i < width; i += TILE_SIZE) {
+ uint ii = i + x, jj = j + y;
+ uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
+ uint byte_offset = tile_offset * bytes_per_tile;
+ uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
+
+ lp_tile_swizzle_4ub(format,
+ dst_tile,
+ src, src_stride,
+ ii, jj, tile_w, tile_h);
+ }
+ }
+ }
+}
+
+
+/**
+ * For testing only.
+ */
+void
+test_tiled_linear_conversion(void *data,
+ enum pipe_format format,
+ unsigned width, unsigned height,
+ unsigned stride)
+{
+ /* size in tiles */
+ unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
+ unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
+
+ uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
+
+ /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
+
+ lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
+ stride, wt);
+
+ lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
+ stride, wt);
+
+ free(tiled);
+}
+
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.h b/src/gallium/drivers/llvmpipe/lp_tile_image.h
new file mode 100644
index 0000000000..8de8efc6c1
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_TILE_IMAGE_H
+#define LP_TILE_IMAGE_H
+
+
+void
+lp_tiled_to_linear(const void *src, void *dst,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned dst_stride,
+ unsigned tiles_per_row);
+
+
+void
+lp_linear_to_tiled(const void *src, void *dst,
+ unsigned x, unsigned y,
+ unsigned width, unsigned height,
+ enum pipe_format format,
+ unsigned src_stride,
+ unsigned tiles_per_row);
+
+
+void
+test_tiled_linear_conversion(void *data,
+ enum pipe_format format,
+ unsigned width, unsigned height,
+ unsigned stride);
+
+
+#endif /* LP_TILE_IMAGE_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
new file mode 100644
index 0000000000..07f71b8411
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -0,0 +1,97 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef LP_TILE_SOA_H
+#define LP_TILE_SOA_H
+
+#include "pipe/p_compiler.h"
+#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */
+#include "lp_limits.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_transfer;
+
+
+#define TILE_VECTOR_HEIGHT 4
+#define TILE_VECTOR_WIDTH 4
+
+extern const unsigned char
+tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH];
+
+#define TILE_C_STRIDE (TILE_VECTOR_HEIGHT * TILE_VECTOR_WIDTH) //16
+#define TILE_X_STRIDE (NUM_CHANNELS * TILE_C_STRIDE) //64
+#define TILE_Y_STRIDE (TILE_VECTOR_HEIGHT * TILE_SIZE * NUM_CHANNELS) //1024
+
+
+#ifdef DEBUG
+extern unsigned lp_tile_unswizzle_count;
+extern unsigned lp_tile_swizzle_count;
+#endif
+
+
+/**
+ * Return offset of the given pixel (and color channel) from the start
+ * of a tile, in bytes.
+ */
+static INLINE unsigned
+tile_pixel_offset(unsigned x, unsigned y, unsigned c)
+{
+ unsigned ix = (x / TILE_VECTOR_WIDTH) * TILE_X_STRIDE;
+ unsigned iy = (y / TILE_VECTOR_HEIGHT) * TILE_Y_STRIDE;
+ unsigned offset = iy + ix + c * TILE_C_STRIDE +
+ tile_offset[y % TILE_VECTOR_HEIGHT][x % TILE_VECTOR_WIDTH];
+ return offset;
+}
+
+
+#define TILE_PIXEL(_p, _x, _y, _c) ((_p)[tile_pixel_offset(_x, _y, _c)])
+
+
+void
+lp_tile_swizzle_4ub(enum pipe_format format,
+ uint8_t *dst,
+ const void *src, unsigned src_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
+
+void
+lp_tile_unswizzle_4ub(enum pipe_format format,
+ const uint8_t *src,
+ void *dst, unsigned dst_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
new file mode 100644
index 0000000000..5ab63cbac6
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python
+
+'''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format accessor functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+'''
+
+
+import sys
+import os.path
+
+sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util'))
+
+from u_format_pack import *
+
+
+def is_format_supported(format):
+ '''Determines whether we actually have the plumbing necessary to generate the
+ to read/write to/from this format.'''
+
+ # FIXME: Ideally we would support any format combination here.
+
+ if format.layout != PLAIN:
+ return False
+
+ for i in range(4):
+ channel = format.channels[i]
+ if channel.type not in (VOID, UNSIGNED, SIGNED, FLOAT):
+ return False
+ if channel.type == FLOAT and channel.size not in (16, 32 ,64):
+ return False
+
+ if format.colorspace not in ('rgb', 'srgb'):
+ return False
+
+ return True
+
+
+def generate_format_read(format, dst_channel, dst_native_type, dst_suffix):
+ '''Generate the function to read pixels from a particular format'''
+
+ name = format.short_name()
+
+ src_native_type = native_type(format)
+
+ print 'static void'
+ print 'lp_tile_%s_swizzle_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print '{'
+ print ' unsigned x, y;'
+ print ' const uint8_t *src_row = src + y0*src_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ names = ['']*4
+ if format.colorspace in ('rgb', 'srgb'):
+ for i in range(4):
+ swizzle = format.swizzles[i]
+ if swizzle < 4:
+ names[swizzle] += 'rgba'[i]
+ elif format.colorspace == 'zs':
+ swizzle = format.swizzles[0]
+ if swizzle < 4:
+ names[swizzle] = 'z'
+ else:
+ assert False
+ else:
+ assert False
+
+ if format.layout == PLAIN:
+ if not format.is_array():
+ print ' %s pixel = *src_pixel++;' % src_native_type
+ shift = 0;
+ for i in range(4):
+ src_channel = format.channels[i]
+ width = src_channel.size
+ if names[i]:
+ value = 'pixel'
+ mask = (1 << width) - 1
+ if shift:
+ value = '(%s >> %u)' % (value, shift)
+ if shift + width < format.block_size():
+ value = '(%s & 0x%x)' % (value, mask)
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
+ print ' %s %s = %s;' % (dst_native_type, names[i], value)
+ shift += width
+ else:
+ for i in range(4):
+ if names[i]:
+ print ' %s %s;' % (dst_native_type, names[i])
+ for i in range(4):
+ src_channel = format.channels[i]
+ if names[i]:
+ value = '(*src_pixel++)'
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
+ print ' %s = %s;' % (names[i], value)
+ elif src_channel.size:
+ print ' ++src_pixel;'
+ else:
+ assert False
+
+ for i in range(4):
+ if format.colorspace in ('rgb', 'srgb'):
+ swizzle = format.swizzles[i]
+ if swizzle < 4:
+ value = names[swizzle]
+ elif swizzle == SWIZZLE_0:
+ value = '0'
+ elif swizzle == SWIZZLE_1:
+ value = get_one(dst_channel)
+ else:
+ assert False
+ elif format.colorspace == 'zs':
+ if i < 3:
+ value = 'z'
+ else:
+ value = get_one(dst_channel)
+ else:
+ assert False
+ print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i])
+
+ print ' }'
+ print ' src_row += src_stride;'
+ print ' }'
+ print '}'
+ print
+
+
+def pack_rgba(format, src_channel, r, g, b, a):
+ """Return an expression for packing r, g, b, a into a pixel of the
+ given format. Ex: '(b << 24) | (g << 16) | (r << 8) | (a << 0)'
+ """
+ assert format.colorspace in ('rgb', 'srgb')
+ inv_swizzle = format.inv_swizzles()
+ shift = 0
+ expr = None
+ for i in range(4):
+ # choose r, g, b, or a depending on the inverse swizzle term
+ if inv_swizzle[i] == 0:
+ value = r
+ elif inv_swizzle[i] == 1:
+ value = g
+ elif inv_swizzle[i] == 2:
+ value = b
+ elif inv_swizzle[i] == 3:
+ value = a
+ else:
+ value = None
+
+ if value:
+ dst_channel = format.channels[i]
+ dst_native_type = native_type(format)
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
+ term = "((%s) << %d)" % (value, shift)
+ if expr:
+ expr = expr + " | " + term
+ else:
+ expr = term
+
+ width = format.channels[i].size
+ shift = shift + width
+ return expr
+
+
+def emit_unrolled_unswizzle_code(format, src_channel):
+ '''Emit code for writing a block based on unrolled loops.
+ This is considerably faster than the TILE_PIXEL-based code below.
+ '''
+ dst_native_type = 'uint%u_t' % format.block_size()
+ print ' const unsigned dstpix_stride = dst_stride / %d;' % format.stride()
+ print ' %s *dstpix = (%s *) dst;' % (dst_native_type, dst_native_type)
+ print ' unsigned int qx, qy, i;'
+ print
+ print ' for (qy = 0; qy < h; qy += TILE_VECTOR_HEIGHT) {'
+ print ' const unsigned py = y0 + qy;'
+ print ' for (qx = 0; qx < w; qx += TILE_VECTOR_WIDTH) {'
+ print ' const unsigned px = x0 + qx;'
+ print ' const uint8_t *r = src + 0 * TILE_C_STRIDE;'
+ print ' const uint8_t *g = src + 1 * TILE_C_STRIDE;'
+ print ' const uint8_t *b = src + 2 * TILE_C_STRIDE;'
+ print ' const uint8_t *a = src + 3 * TILE_C_STRIDE;'
+ print ' (void) r; (void) g; (void) b; (void) a; /* silence warnings */'
+ print ' for (i = 0; i < TILE_C_STRIDE; i += 2) {'
+ print ' const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]")
+ print ' const uint32_t pixel1 = %s;' % pack_rgba(format, src_channel, "r[i+1]", "g[i+1]", "b[i+1]", "a[i+1]")
+ print ' const unsigned offset = (py + tile_y_offset[i]) * dstpix_stride + (px + tile_x_offset[i]);'
+ print ' dstpix[offset + 0] = pixel0;'
+ print ' dstpix[offset + 1] = pixel1;'
+ print ' }'
+ print ' src += TILE_X_STRIDE;'
+ print ' }'
+ print ' }'
+
+
+def emit_tile_pixel_unswizzle_code(format, src_channel):
+ '''Emit code for writing a block based on the TILE_PIXEL macro.'''
+ dst_native_type = native_type(format)
+
+ inv_swizzle = format.inv_swizzles()
+
+ print ' unsigned x, y;'
+ print ' uint8_t *dst_row = dst + y0*dst_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ if format.layout == PLAIN:
+ if not format.is_array():
+ print ' %s pixel = 0;' % dst_native_type
+ shift = 0;
+ for i in range(4):
+ dst_channel = format.channels[i]
+ width = dst_channel.size
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
+ if shift:
+ value = '(%s << %u)' % (value, shift)
+ print ' pixel |= %s;' % value
+ shift += width
+ print ' *dst_pixel++ = pixel;'
+ else:
+ for i in range(4):
+ dst_channel = format.channels[i]
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_channel, dst_channel, dst_native_type, value, clamp=False)
+ print ' *dst_pixel++ = %s;' % value
+ elif dst_channel.size:
+ print ' ++dst_pixel;'
+ else:
+ assert False
+
+ print ' }'
+ print ' dst_row += dst_stride;'
+ print ' }'
+
+
+def generate_format_write(format, src_channel, src_native_type, src_suffix):
+ '''Generate the function to write pixels to a particular format'''
+
+ name = format.short_name()
+
+ print 'static void'
+ print 'lp_tile_%s_unswizzle_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print '{'
+ if format.layout == PLAIN \
+ and format.colorspace == 'rgb' \
+ and format.block_size() <= 32 \
+ and format.is_pot() \
+ and not format.is_mixed() \
+ and (format.channels[0].type == UNSIGNED \
+ or format.channels[1].type == UNSIGNED):
+ emit_unrolled_unswizzle_code(format, src_channel)
+ else:
+ emit_tile_pixel_unswizzle_code(format, src_channel)
+ print '}'
+ print
+
+
+def generate_swizzle(formats, dst_channel, dst_native_type, dst_suffix):
+ '''Generate the dispatch function to read pixels from any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_read(format, dst_channel, dst_native_type, dst_suffix)
+
+ print 'void'
+ print 'lp_tile_swizzle_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print '{'
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print '#ifdef DEBUG'
+ print ' lp_tile_swizzle_count += 1;'
+ print '#endif'
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_swizzle_%s;' % (format.short_name(), dst_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
+ print ' return;'
+ print ' }'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def generate_unswizzle(formats, src_channel, src_native_type, src_suffix):
+ '''Generate the dispatch function to write pixels to any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_write(format, src_channel, src_native_type, src_suffix)
+
+ print 'void'
+ print 'lp_tile_unswizzle_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+
+ print '{'
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print '#ifdef DEBUG'
+ print ' lp_tile_unswizzle_count += 1;'
+ print '#endif'
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_unswizzle_%s;' % (format.short_name(), src_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("%s: unsupported format %s\\n", __FUNCTION__, util_format_name(format));'
+ print ' return;'
+ print ' }'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def main():
+ formats = []
+ for arg in sys.argv[1:]:
+ formats.extend(parse(arg))
+
+ print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */'
+ print
+ # This will print the copyright message on the top of this file
+ print __doc__.strip()
+ print
+ print '#include "pipe/p_compiler.h"'
+ print '#include "util/u_format.h"'
+ print '#include "util/u_math.h"'
+ print '#include "util/u_half.h"'
+ print '#include "lp_tile_soa.h"'
+ print
+ print '#ifdef DEBUG'
+ print 'unsigned lp_tile_unswizzle_count = 0;'
+ print 'unsigned lp_tile_swizzle_count = 0;'
+ print '#endif'
+ print
+ print 'const unsigned char'
+ print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {'
+ print ' { 0, 1, 4, 5},'
+ print ' { 2, 3, 6, 7},'
+ print ' { 8, 9, 12, 13},'
+ print ' { 10, 11, 14, 15}'
+ print '};'
+ print
+ print '/* Note: these lookup tables could be replaced with some'
+ print ' * bit-twiddling code, but this is a little faster.'
+ print ' */'
+ print 'static unsigned tile_x_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {'
+ print ' 0, 1, 0, 1, 2, 3, 2, 3,'
+ print ' 0, 1, 0, 1, 2, 3, 2, 3'
+ print '};'
+ print
+ print 'static unsigned tile_y_offset[TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT] = {'
+ print ' 0, 0, 1, 1, 0, 0, 1, 1,'
+ print ' 2, 2, 3, 3, 2, 2, 3, 3'
+ print '};'
+ print
+
+ channel = Channel(UNSIGNED, True, 8)
+ native_type = 'uint8_t'
+ suffix = '4ub'
+
+ generate_swizzle(formats, channel, native_type, suffix)
+ generate_unswizzle(formats, channel, native_type, suffix)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/gallium/drivers/llvmpipe/sse_mathfun.h b/src/gallium/drivers/llvmpipe/sse_mathfun.h
new file mode 100644
index 0000000000..8ac2064b7b
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/sse_mathfun.h
@@ -0,0 +1,773 @@
+/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
+
+ Inspired by Intel Approximate Math library, and based on the
+ corresponding algorithms of the cephes math library
+
+ The default is to use the SSE1 version. If you define USE_SSE2 the
+ the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
+ not expect any significant performance improvement with SSE2.
+*/
+
+/* Copyright (C) 2007 Julien Pommier
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ (this is the zlib license)
+*/
+
+#include <xmmintrin.h>
+
+/* yes I know, the top of this file is quite ugly */
+
+#ifdef _MSC_VER /* visual c++ */
+# define ALIGN16_BEG __declspec(align(16))
+# define ALIGN16_END
+#else /* gcc or icc */
+# define ALIGN16_BEG
+# define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+/* __m128 is ugly to write */
+typedef __m128 v4sf; // vector of 4 float (sse1)
+
+#ifdef USE_SSE2
+# include <emmintrin.h>
+typedef __m128i v4si; // vector of 4 int (sse2)
+#else
+typedef __m64 v2si; // vector of 2 int (mmx)
+#endif
+
+/* declare some SSE constants -- why can't I figure a better way to do that? */
+#define _PS_CONST(Name, Val) \
+ static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+#define _PI32_CONST(Name, Val) \
+ static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+#define _PS_CONST_TYPE(Name, Type, Val) \
+ static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+
+_PS_CONST(1 , 1.0f);
+_PS_CONST(0p5, 0.5f);
+/* the smallest non denormalized float number */
+_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
+_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
+_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
+
+_PS_CONST_TYPE(sign_mask, int, 0x80000000);
+_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
+
+_PI32_CONST(1, 1);
+_PI32_CONST(inv1, ~1);
+_PI32_CONST(2, 2);
+_PI32_CONST(4, 4);
+_PI32_CONST(0x7f, 0x7f);
+
+_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
+_PS_CONST(cephes_log_p0, 7.0376836292E-2);
+_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
+_PS_CONST(cephes_log_p2, 1.1676998740E-1);
+_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
+_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
+_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
+_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
+_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
+_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
+_PS_CONST(cephes_log_q1, -2.12194440e-4);
+_PS_CONST(cephes_log_q2, 0.693359375);
+
+v4sf log_ps(v4sf x);
+v4sf exp_ps(v4sf x);
+v4sf sin_ps(v4sf x);
+v4sf cos_ps(v4sf x);
+void sincos_ps(v4sf x, v4sf *s, v4sf *c);
+
+#if defined (__MINGW32__)
+
+/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
+ The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
+ broken on my mingw gcc 3.4.5 ...
+
+ Note that the bug on _mm_cmp* does occur only at -O0 optimization level
+*/
+
+inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
+ asm (
+ "movhlps %2,%0\n\t"
+ : "=x" (a)
+ : "0" (a), "x"(b)
+ );
+ return a; }
+#warning "redefined _mm_movehl_ps (see gcc bug 21179)"
+#define _mm_movehl_ps my_movehl_ps
+
+inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
+ asm (
+ "cmpltps %2,%0\n\t"
+ : "=x" (a)
+ : "0" (a), "x"(b)
+ );
+ return a;
+ }
+inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
+ asm (
+ "cmpnleps %2,%0\n\t"
+ : "=x" (a)
+ : "0" (a), "x"(b)
+ );
+ return a;
+}
+inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
+ asm (
+ "cmpeqps %2,%0\n\t"
+ : "=x" (a)
+ : "0" (a), "x"(b)
+ );
+ return a;
+}
+#warning "redefined _mm_cmpxx_ps functions..."
+#define _mm_cmplt_ps my_cmplt_ps
+#define _mm_cmpgt_ps my_cmpgt_ps
+#define _mm_cmpeq_ps my_cmpeq_ps
+#endif
+
+#ifndef USE_SSE2
+typedef union xmm_mm_union {
+ __m128 xmm;
+ __m64 mm[2];
+} xmm_mm_union;
+
+#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
+ xmm_mm_union u; u.xmm = xmm_; \
+ mm0_ = u.mm[0]; \
+ mm1_ = u.mm[1]; \
+}
+
+#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
+ xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
+ }
+
+#endif // USE_SSE2
+
+/* natural logarithm computed for 4 simultaneous float
+ return NaN for x <= 0
+*/
+v4sf log_ps(v4sf x) {
+#ifdef USE_SSE2
+ v4si emm0;
+#else
+ v2si mm0, mm1;
+#endif
+ v4sf one = *(v4sf*)_ps_1;
+
+ v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+ v4sf e, mask, tmp, z, y;
+
+ x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos); /* cut off denormalized stuff */
+
+#ifndef USE_SSE2
+ /* part 1: x = frexpf(x, &e); */
+ COPY_XMM_TO_MM(x, mm0, mm1);
+ mm0 = _mm_srli_pi32(mm0, 23);
+ mm1 = _mm_srli_pi32(mm1, 23);
+#else
+ emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+#endif
+ /* keep only the fractional part */
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
+ x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
+
+#ifndef USE_SSE2
+ /* now e=mm0:mm1 contain the really base-2 exponent */
+ mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
+ mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
+ e = _mm_cvtpi32x2_ps(mm0, mm1);
+ _mm_empty(); /* bye bye mmx */
+#else
+ emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
+ e = _mm_cvtepi32_ps(emm0);
+#endif
+
+ e = _mm_add_ps(e, one);
+
+ /* part2:
+ if( x < SQRTHF ) {
+ e -= 1;
+ x = x + x - 1.0;
+ } else { x = x - 1.0; }
+ */
+
+ mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
+ tmp = _mm_and_ps(x, mask);
+ x = _mm_sub_ps(x, one);
+ e = _mm_sub_ps(e, _mm_and_ps(one, mask));
+ x = _mm_add_ps(x, tmp);
+
+
+ z = _mm_mul_ps(x,x);
+
+ y = *(v4sf*)_ps_cephes_log_p0;
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
+ y = _mm_mul_ps(y, x);
+
+ y = _mm_mul_ps(y, z);
+
+
+ tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
+ y = _mm_add_ps(y, tmp);
+
+
+ tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ y = _mm_sub_ps(y, tmp);
+
+ tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
+ x = _mm_add_ps(x, y);
+ x = _mm_add_ps(x, tmp);
+ x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+ return x;
+}
+
+_PS_CONST(exp_hi, 88.3762626647949f);
+_PS_CONST(exp_lo, -88.3762626647949f);
+
+_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
+_PS_CONST(cephes_exp_C1, 0.693359375);
+_PS_CONST(cephes_exp_C2, -2.12194440e-4);
+
+_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
+_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
+_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
+_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
+_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
+_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
+
+v4sf exp_ps(v4sf x) {
+ v4sf tmp = _mm_setzero_ps(), fx;
+#ifdef USE_SSE2
+ v4si emm0;
+#else
+ v2si mm0, mm1;
+#endif
+ v4sf one = *(v4sf*)_ps_1;
+ v4sf mask, z, y, pow2n;
+
+ x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
+ x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
+
+ /* express exp(x) as exp(g + n*log(2)) */
+ fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
+ fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
+
+ /* how to perform a floorf with SSE: just below */
+#ifndef USE_SSE2
+ /* step 1 : cast to int */
+ tmp = _mm_movehl_ps(tmp, fx);
+ mm0 = _mm_cvttps_pi32(fx);
+ mm1 = _mm_cvttps_pi32(tmp);
+ /* step 2 : cast back to float */
+ tmp = _mm_cvtpi32x2_ps(mm0, mm1);
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ tmp = _mm_cvtepi32_ps(emm0);
+#endif
+ /* if greater, substract 1 */
+ mask = _mm_cmpgt_ps(tmp, fx);
+ mask = _mm_and_ps(mask, one);
+ fx = _mm_sub_ps(tmp, mask);
+
+ tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
+ z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
+ x = _mm_sub_ps(x, tmp);
+ x = _mm_sub_ps(x, z);
+
+ z = _mm_mul_ps(x,x);
+
+ y = *(v4sf*)_ps_cephes_exp_p0;
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
+ y = _mm_mul_ps(y, x);
+ y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, x);
+ y = _mm_add_ps(y, one);
+
+ /* build 2^n */
+#ifndef USE_SSE2
+ z = _mm_movehl_ps(z, fx);
+ mm0 = _mm_cvttps_pi32(fx);
+ mm1 = _mm_cvttps_pi32(z);
+ mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
+ mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
+ mm0 = _mm_slli_pi32(mm0, 23);
+ mm1 = _mm_slli_pi32(mm1, 23);
+
+ COPY_MM_TO_XMM(mm0, mm1, pow2n);
+ _mm_empty();
+#else
+ emm0 = _mm_cvttps_epi32(fx);
+ emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
+ emm0 = _mm_slli_epi32(emm0, 23);
+ pow2n = _mm_castsi128_ps(emm0);
+#endif
+ y = _mm_mul_ps(y, pow2n);
+ return y;
+}
+
+_PS_CONST(minus_cephes_DP1, -0.78515625);
+_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
+_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
+_PS_CONST(sincof_p0, -1.9515295891E-4);
+_PS_CONST(sincof_p1, 8.3321608736E-3);
+_PS_CONST(sincof_p2, -1.6666654611E-1);
+_PS_CONST(coscof_p0, 2.443315711809948E-005);
+_PS_CONST(coscof_p1, -1.388731625493765E-003);
+_PS_CONST(coscof_p2, 4.166664568298827E-002);
+_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
+
+
+/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
+ it runs also on old athlons XPs and the pentium III of your grand
+ mother.
+
+ The code is the exact rewriting of the cephes sinf function.
+ Precision is excellent as long as x < 8192 (I did not bother to
+ take into account the special handling they have for greater values
+ -- it does not return garbage for arguments over 8192, though, but
+ the extra precision is missing).
+
+ Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+ surprising but correct result.
+
+ Performance is also surprisingly good, 1.33 times faster than the
+ macos vsinf SSE2 function, and 1.5 times faster than the
+ __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
+ too bad for an SSE1 function (with no special tuning) !
+ However the latter libraries probably have a much better handling of NaN,
+ Inf, denormalized and other special arguments..
+
+ On my core 1 duo, the execution of this function takes approximately 95 cycles.
+
+ From what I have observed on the experiments with Intel AMath lib, switching to an
+ SSE2 version would improve the perf by only 10%.
+
+ Since it is based on SSE intrinsics, it has to be compiled at -O2 to
+ deliver full speed.
+*/
+v4sf sin_ps(v4sf x) { // any x
+ v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+
+#ifdef USE_SSE2
+ v4si emm0, emm2;
+#else
+ v2si mm0, mm1, mm2, mm3;
+#endif
+ v4sf swap_sign_bit, poly_mask, z, tmp, y2;
+
+ sign_bit = x;
+ /* take the absolute value */
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+ /* extract the sign bit (upper one) */
+ sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
+
+ /* scale by 4/Pi */
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+
+ //printf("plop:"); print4(y);
+#ifdef USE_SSE2
+ /* store the integer part of y in mm0 */
+ emm2 = _mm_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+ y = _mm_cvtepi32_ps(emm2);
+ /* get the swap sign flag */
+ emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ /* get the polynom selection mask
+ there is one polynom for 0 <= x <= Pi/4
+ and another one for Pi/4<x<=Pi/2
+
+ Both branches will be computed.
+ */
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+ swap_sign_bit = _mm_castsi128_ps(emm0);
+ poly_mask = _mm_castsi128_ps(emm2);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+#else
+ /* store the integer part of y in mm0:mm1 */
+ xmm2 = _mm_movehl_ps(xmm2, y);
+ mm2 = _mm_cvttps_pi32(y);
+ mm3 = _mm_cvttps_pi32(xmm2);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
+ /* get the swap sign flag */
+ mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
+ mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
+ mm0 = _mm_slli_pi32(mm0, 29);
+ mm1 = _mm_slli_pi32(mm1, 29);
+ /* get the polynom selection mask */
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+
+ COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+ sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+ _mm_empty(); /* good-bye mmx */
+#endif
+
+ /* The magic pass: "Extended precision modular arithmetic"
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+ xmm1 = _mm_mul_ps(y, xmm1);
+ xmm2 = _mm_mul_ps(y, xmm2);
+ xmm3 = _mm_mul_ps(y, xmm3);
+ x = _mm_add_ps(x, xmm1);
+ x = _mm_add_ps(x, xmm2);
+ x = _mm_add_ps(x, xmm3);
+
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
+ y = *(v4sf*)_ps_coscof_p0;
+ z = _mm_mul_ps(x,x);
+
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+ y = _mm_mul_ps(y, z);
+ y = _mm_mul_ps(y, z);
+ tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ y = _mm_sub_ps(y, tmp);
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
+
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
+
+ y2 = *(v4sf*)_ps_sincof_p0;
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_mul_ps(y2, x);
+ y2 = _mm_add_ps(y2, x);
+
+ /* select the correct result from the two polynoms */
+ xmm3 = poly_mask;
+ y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+ y = _mm_andnot_ps(xmm3, y);
+ y = _mm_add_ps(y,y2);
+ /* update the sign */
+ y = _mm_xor_ps(y, sign_bit);
+
+ return y;
+}
+
+/* almost the same as sin_ps */
+v4sf cos_ps(v4sf x) { // any x
+ v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+#ifdef USE_SSE2
+ v4si emm0, emm2;
+#else
+ v2si mm0, mm1, mm2, mm3;
+#endif
+ v4sf sign_bit, poly_mask, z, tmp, y2;
+
+ /* take the absolute value */
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+
+ /* scale by 4/Pi */
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+
+#ifdef USE_SSE2
+ /* store the integer part of y in mm0 */
+ emm2 = _mm_cvttps_epi32(y);
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+ y = _mm_cvtepi32_ps(emm2);
+
+ emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
+
+ /* get the swap sign flag */
+ emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ /* get the polynom selection mask */
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+
+ sign_bit = _mm_castsi128_ps(emm0);
+ poly_mask = _mm_castsi128_ps(emm2);
+#else
+ /* store the integer part of y in mm0:mm1 */
+ xmm2 = _mm_movehl_ps(xmm2, y);
+ mm2 = _mm_cvttps_pi32(y);
+ mm3 = _mm_cvttps_pi32(xmm2);
+
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
+
+
+ mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
+ mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
+
+ /* get the swap sign flag in mm0:mm1 and the
+ polynom selection mask in mm2:mm3 */
+
+ mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
+ mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
+ mm0 = _mm_slli_pi32(mm0, 29);
+ mm1 = _mm_slli_pi32(mm1, 29);
+
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+
+ COPY_MM_TO_XMM(mm0, mm1, sign_bit);
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+ _mm_empty(); /* good-bye mmx */
+#endif
+ /* The magic pass: "Extended precision modular arithmetic"
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+ xmm1 = _mm_mul_ps(y, xmm1);
+ xmm2 = _mm_mul_ps(y, xmm2);
+ xmm3 = _mm_mul_ps(y, xmm3);
+ x = _mm_add_ps(x, xmm1);
+ x = _mm_add_ps(x, xmm2);
+ x = _mm_add_ps(x, xmm3);
+
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
+ y = *(v4sf*)_ps_coscof_p0;
+ z = _mm_mul_ps(x,x);
+
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+ y = _mm_mul_ps(y, z);
+ y = _mm_mul_ps(y, z);
+ tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ y = _mm_sub_ps(y, tmp);
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
+
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
+
+ y2 = *(v4sf*)_ps_sincof_p0;
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_mul_ps(y2, x);
+ y2 = _mm_add_ps(y2, x);
+
+ /* select the correct result from the two polynoms */
+ xmm3 = poly_mask;
+ y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+ y = _mm_andnot_ps(xmm3, y);
+ y = _mm_add_ps(y,y2);
+ /* update the sign */
+ y = _mm_xor_ps(y, sign_bit);
+
+ return y;
+}
+
+/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
+ it is almost as fast, and gives you a free cosine with your sine */
+void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
+ v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
+#ifdef USE_SSE2
+ v4si emm0, emm2, emm4;
+#else
+ v2si mm0, mm1, mm2, mm3, mm4, mm5;
+#endif
+ v4sf swap_sign_bit_sin, poly_mask, z, tmp, y2, ysin1, ysin2;
+ v4sf sign_bit_cos;
+
+ sign_bit_sin = x;
+ /* take the absolute value */
+ x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+ /* extract the sign bit (upper one) */
+ sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
+
+ /* scale by 4/Pi */
+ y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+
+#ifdef USE_SSE2
+ /* store the integer part of y in emm2 */
+ emm2 = _mm_cvttps_epi32(y);
+
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+ y = _mm_cvtepi32_ps(emm2);
+
+ emm4 = emm2;
+
+ /* get the swap sign flag for the sine */
+ emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
+ emm0 = _mm_slli_epi32(emm0, 29);
+ swap_sign_bit_sin = _mm_castsi128_ps(emm0);
+
+ /* get the polynom selection mask for the sine*/
+ emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+ emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+ poly_mask = _mm_castsi128_ps(emm2);
+#else
+ /* store the integer part of y in mm2:mm3 */
+ xmm3 = _mm_movehl_ps(xmm3, y);
+ mm2 = _mm_cvttps_pi32(y);
+ mm3 = _mm_cvttps_pi32(xmm3);
+
+ /* j=(j+1) & (~1) (see the cephes sources) */
+ mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+ mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+
+ y = _mm_cvtpi32x2_ps(mm2, mm3);
+
+ mm4 = mm2;
+ mm5 = mm3;
+
+ /* get the swap sign flag for the sine */
+ mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
+ mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
+ mm0 = _mm_slli_pi32(mm0, 29);
+ mm1 = _mm_slli_pi32(mm1, 29);
+
+ COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
+
+ /* get the polynom selection mask for the sine */
+
+ mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+ mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+ mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+ mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+
+ COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+#endif
+
+ /* The magic pass: "Extended precision modular arithmetic"
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
+ xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+ xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+ xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+ xmm1 = _mm_mul_ps(y, xmm1);
+ xmm2 = _mm_mul_ps(y, xmm2);
+ xmm3 = _mm_mul_ps(y, xmm3);
+ x = _mm_add_ps(x, xmm1);
+ x = _mm_add_ps(x, xmm2);
+ x = _mm_add_ps(x, xmm3);
+
+#ifdef USE_SSE2
+ emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
+ emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
+ emm4 = _mm_slli_epi32(emm4, 29);
+ sign_bit_cos = _mm_castsi128_ps(emm4);
+#else
+ /* get the sign flag for the cosine */
+ mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
+ mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
+ mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
+ mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
+ mm4 = _mm_slli_pi32(mm4, 29);
+ mm5 = _mm_slli_pi32(mm5, 29);
+ COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
+ _mm_empty(); /* good-bye mmx */
+#endif
+
+ sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
+
+
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
+ z = _mm_mul_ps(x,x);
+ y = *(v4sf*)_ps_coscof_p0;
+
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+ y = _mm_mul_ps(y, z);
+ y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+ y = _mm_mul_ps(y, z);
+ y = _mm_mul_ps(y, z);
+ tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+ y = _mm_sub_ps(y, tmp);
+ y = _mm_add_ps(y, *(v4sf*)_ps_1);
+
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
+
+ y2 = *(v4sf*)_ps_sincof_p0;
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+ y2 = _mm_mul_ps(y2, z);
+ y2 = _mm_mul_ps(y2, x);
+ y2 = _mm_add_ps(y2, x);
+
+ /* select the correct result from the two polynoms */
+ xmm3 = poly_mask;
+ ysin2 = _mm_and_ps(xmm3, y2);
+ ysin1 = _mm_andnot_ps(xmm3, y);
+ y2 = _mm_sub_ps(y2,ysin2);
+ y = _mm_sub_ps(y, ysin1);
+
+ xmm1 = _mm_add_ps(ysin1,ysin2);
+ xmm2 = _mm_add_ps(y,y2);
+
+ /* update the sign */
+ *s = _mm_xor_ps(xmm1, sign_bit_sin);
+ *c = _mm_xor_ps(xmm2, sign_bit_cos);
+}
+